diff options
Diffstat (limited to 'storage/xtradb')
25 files changed, 1144 insertions, 424 deletions
diff --git a/storage/xtradb/btr/btr0cur.c b/storage/xtradb/btr/btr0cur.c index 61c07ac792e..687853a422e 100644 --- a/storage/xtradb/btr/btr0cur.c +++ b/storage/xtradb/btr/btr0cur.c @@ -239,6 +239,7 @@ btr_cur_latch_leaves( mtr_t* mtr) /*!< in: mtr */ { ulint mode; + ulint sibling_mode; ulint left_page_no; ulint right_page_no; buf_block_t* get_block; @@ -261,14 +262,21 @@ btr_cur_latch_leaves( #endif /* UNIV_BTR_DEBUG */ get_block->check_index_page_at_flush = TRUE; return; + case BTR_SEARCH_TREE: case BTR_MODIFY_TREE: - /* x-latch also brothers from left to right */ + if (UNIV_UNLIKELY(latch_mode == BTR_SEARCH_TREE)) { + mode = RW_S_LATCH; + sibling_mode = RW_NO_LATCH; + } else { + mode = sibling_mode = RW_X_LATCH; + } + /* Fetch and possibly latch also brothers from left to right */ left_page_no = btr_page_get_prev(page, mtr); if (left_page_no != FIL_NULL) { get_block = btr_block_get( space, zip_size, left_page_no, - RW_X_LATCH, cursor->index, mtr); + sibling_mode, cursor->index, mtr); if (srv_pass_corrupt_table && !get_block) { return; @@ -280,12 +288,21 @@ btr_cur_latch_leaves( ut_a(btr_page_get_next(get_block->frame, mtr) == page_get_page_no(page)); #endif /* UNIV_BTR_DEBUG */ - get_block->check_index_page_at_flush = TRUE; + if (sibling_mode == RW_NO_LATCH) { + /* btr_block_get() called with RW_NO_LATCH will + fix the read block in the buffer. This serves + no purpose for the fake changes prefetching, + thus we unfix the sibling blocks immediately.*/ + mtr_memo_release(mtr, get_block, + MTR_MEMO_BUF_FIX); + } else { + get_block->check_index_page_at_flush = TRUE; + } } get_block = btr_block_get( space, zip_size, page_no, - RW_X_LATCH, cursor->index, mtr); + mode, cursor->index, mtr); if (srv_pass_corrupt_table && !get_block) { return; @@ -301,7 +318,7 @@ btr_cur_latch_leaves( if (right_page_no != FIL_NULL) { get_block = btr_block_get( space, zip_size, right_page_no, - RW_X_LATCH, cursor->index, mtr); + sibling_mode, cursor->index, mtr); if (srv_pass_corrupt_table && !get_block) { return; @@ -313,7 +330,12 @@ btr_cur_latch_leaves( ut_a(btr_page_get_prev(get_block->frame, mtr) == page_get_page_no(page)); #endif /* UNIV_BTR_DEBUG */ - get_block->check_index_page_at_flush = TRUE; + if (sibling_mode == RW_NO_LATCH) { + mtr_memo_release(mtr, get_block, + MTR_MEMO_BUF_FIX); + } else { + get_block->check_index_page_at_flush = TRUE; + } } return; @@ -1566,6 +1588,9 @@ btr_cur_pessimistic_insert( } if (!(flags & BTR_NO_UNDO_LOG_FLAG)) { + + ut_a(cursor->tree_height != ULINT_UNDEFINED); + /* First reserve enough free space for the file segments of the index tree, so that the insert will not fail because of lack of space */ @@ -1860,7 +1885,8 @@ btr_cur_update_alloc_zip( ulint length, /*!< in: size needed */ ibool create, /*!< in: TRUE=delete-and-insert, FALSE=update-in-place */ - mtr_t* mtr) /*!< in: mini-transaction */ + mtr_t* mtr, /*!< in: mini-transaction */ + trx_t* trx) /*!< in: NULL or transaction */ { ut_a(page_zip == buf_block_get_page_zip(block)); ut_ad(page_zip); @@ -1877,6 +1903,14 @@ btr_cur_update_alloc_zip( return(FALSE); } + if (trx && trx->fake_changes) { + /* Don't call page_zip_compress_write_log_no_data as that has + assert which would fail. Assume there won't be a compression + failure. */ + + return TRUE; + } + if (!page_zip_compress(page_zip, buf_block_get_frame(block), index, mtr)) { /* Unable to compress the page */ @@ -1960,7 +1994,8 @@ btr_cur_update_in_place( /* Check that enough space is available on the compressed page. */ if (page_zip && !btr_cur_update_alloc_zip(page_zip, block, index, - rec_offs_size(offsets), FALSE, mtr)) { + rec_offs_size(offsets), FALSE, mtr, + trx)) { return(DB_ZIP_OVERFLOW); } @@ -2159,7 +2194,8 @@ any_extern: if (page_zip && !btr_cur_update_alloc_zip(page_zip, block, index, - new_rec_size, TRUE, mtr)) { + new_rec_size, TRUE, mtr, + thr_get_trx(thr))) { err = DB_ZIP_OVERFLOW; goto err_exit; } @@ -2402,7 +2438,15 @@ btr_cur_pessimistic_update( of the index tree, so that the update will not fail because of lack of space */ - n_extents = cursor->tree_height / 16 + 3; + if (UNIV_UNLIKELY(cursor->tree_height == ULINT_UNDEFINED)) { + /* When the tree height is uninitialized due to fake + changes, reserve some hardcoded number of extents. */ + ut_a(thr && thr_get_trx(thr)->fake_changes); + n_extents = 3; + } + else { + n_extents = cursor->tree_height / 16 + 3; + } if (flags & BTR_NO_UNDO_LOG_FLAG) { reserve_flag = FSP_CLEANING; @@ -2439,7 +2483,7 @@ btr_cur_pessimistic_update( itself. Thus the following call is safe. */ row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update, FALSE, *heap); - if (!(flags & BTR_KEEP_SYS_FLAG)) { + if (!(flags & BTR_KEEP_SYS_FLAG) && !trx->fake_changes) { row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR, roll_ptr); row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID, @@ -3210,6 +3254,8 @@ btr_cur_pessimistic_delete( of the index tree, so that the node pointer updates will not fail because of lack of space */ + ut_a(cursor->tree_height != ULINT_UNDEFINED); + n_extents = cursor->tree_height / 32 + 1; success = fsp_reserve_free_extents(&n_reserved, diff --git a/storage/xtradb/btr/btr0pcur.c b/storage/xtradb/btr/btr0pcur.c index b335e2c8aee..a1b7affdeb7 100644 --- a/storage/xtradb/btr/btr0pcur.c +++ b/storage/xtradb/btr/btr0pcur.c @@ -47,6 +47,7 @@ btr_pcur_create_for_mysql(void) pcur->btr_cur.index = NULL; btr_pcur_init(pcur); + pcur->btr_cur.tree_height = ULINT_UNDEFINED; return(pcur); } diff --git a/storage/xtradb/buf/buf0lru.c b/storage/xtradb/buf/buf0lru.c index a6a1f8dcf9c..14b5c65132c 100644 --- a/storage/xtradb/buf/buf0lru.c +++ b/storage/xtradb/buf/buf0lru.c @@ -239,9 +239,11 @@ buf_LRU_drop_page_hash_batch( When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page hash index entries belonging to that table. This function tries to do that in batch. Note that this is a 'best effort' attempt and does -not guarantee that ALL hash entries will be removed. */ +not guarantee that ALL hash entries will be removed. + +@return number of hashed pages found*/ static -void +ulint buf_LRU_drop_page_hash_for_tablespace( /*==================================*/ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ @@ -251,13 +253,14 @@ buf_LRU_drop_page_hash_for_tablespace( ulint* page_arr; ulint num_entries; ulint zip_size; + ulint num_found = 0; zip_size = fil_space_get_zip_size(id); if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) { /* Somehow, the tablespace does not exist. Nothing to drop. */ ut_ad(0); - return; + return num_found; } page_arr = ut_malloc( @@ -315,6 +318,7 @@ next_page: ut_a(num_entries < BUF_LRU_DROP_SEARCH_SIZE); ++num_entries; + ++num_found; if (num_entries < BUF_LRU_DROP_SEARCH_SIZE) { goto next_page; @@ -370,6 +374,8 @@ next_page: /* Drop any remaining batch of search hashed pages. */ buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries); ut_free(page_arr); + + return num_found; } /******************************************************************//** @@ -814,8 +820,6 @@ buf_LRU_mark_space_was_deleted( for (i = 0; i < srv_buf_pool_instances; i++) { buf_pool_t* buf_pool; buf_page_t* bpage; - buf_chunk_t* chunk; - ulint j, k; buf_pool = buf_pool_from_array(i); @@ -832,28 +836,10 @@ buf_LRU_mark_space_was_deleted( mutex_exit(&buf_pool->LRU_list_mutex); - btr_search_s_lock_all(); - chunk = buf_pool->chunks; - for (j = buf_pool->n_chunks; j--; chunk++) { - buf_block_t* block = chunk->blocks; - for (k = chunk->size; k--; block++) { - if (buf_block_get_state(block) - != BUF_BLOCK_FILE_PAGE - || !block->index - || buf_page_get_space(&block->page) != id) { - continue; - } - - btr_search_s_unlock_all(); - - rw_lock_x_lock(&block->lock); - btr_search_drop_page_hash_index(block); - rw_lock_x_unlock(&block->lock); - - btr_search_s_lock_all(); - } - } - btr_search_s_unlock_all(); + /* The AHI entries for the tablespace being deleted should be + removed by now. */ + ut_ad(buf_LRU_drop_page_hash_for_tablespace(buf_pool, id) + == 0); } } diff --git a/storage/xtradb/buf/buf0rea.c b/storage/xtradb/buf/buf0rea.c index 67379d614a0..6d76a488af7 100644 --- a/storage/xtradb/buf/buf0rea.c +++ b/storage/xtradb/buf/buf0rea.c @@ -235,6 +235,9 @@ not_to_recover: sync, space, 0, offset, 0, UNIV_PAGE_SIZE, ((buf_block_t*) bpage)->frame, bpage, trx); } + if(sync) { + thd_wait_end(NULL); + } if (*err == DB_TABLESPACE_DELETED) { buf_read_page_handle_error(bpage); @@ -250,7 +253,6 @@ not_to_recover: } if (sync) { - thd_wait_end(NULL); /* The i/o is already completed when we arrive from fil_read */ if (!buf_page_io_complete(bpage)) { diff --git a/storage/xtradb/fsp/fsp0fsp.c b/storage/xtradb/fsp/fsp0fsp.c index d4a2745b90b..5cbc74b0862 100644 --- a/storage/xtradb/fsp/fsp0fsp.c +++ b/storage/xtradb/fsp/fsp0fsp.c @@ -3031,7 +3031,11 @@ try_again: some of them will contain extent descriptor pages, and therefore will not be free extents */ - n_free_up = (size - free_limit) / FSP_EXTENT_SIZE; + if (size <= free_limit) { + n_free_up = 0; + } else { + n_free_up = (size - free_limit) / FSP_EXTENT_SIZE; + } if (n_free_up > 0) { n_free_up--; diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index ec976bfeb45..12f6f5134d2 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -381,6 +381,7 @@ static PSI_file_info all_innodb_files[] = { static INNOBASE_SHARE *get_share(const char *table_name); static void free_share(INNOBASE_SHARE *share); static int innobase_close_connection(handlerton *hton, THD* thd); +static void innobase_kill_query(handlerton *hton, THD* thd, enum thd_kill_levels level); static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all); static int innobase_commit(handlerton *hton, THD* thd, bool all); static int innobase_rollback(handlerton *hton, THD* thd, bool all); @@ -1053,6 +1054,13 @@ thd_to_trx( return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr)); } +my_bool +ha_innobase::is_fake_change_enabled(THD* thd) +{ + trx_t* trx = thd_to_trx(thd); + return(trx && trx->fake_changes); +} + /********************************************************************//** Call this function when mysqld passes control to the client. That is to avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more @@ -1117,8 +1125,7 @@ convert_error_code_to_mysql( return(0); case DB_INTERRUPTED: - my_error(ER_QUERY_INTERRUPTED, MYF(0)); - /* fall through */ + return(HA_ERR_ABORTED_BY_USER); case DB_FOREIGN_EXCEED_MAX_CASCADE: push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, @@ -2382,7 +2389,7 @@ trx_is_interrupted( /*===============*/ trx_t* trx) /*!< in: transaction */ { - return(trx && trx->mysql_thd && thd_killed((THD*) trx->mysql_thd)); + return(trx && trx->mysql_thd && thd_kill_level((THD*) trx->mysql_thd)); } /**********************************************************************//** @@ -2639,6 +2646,7 @@ innobase_init( innobase_hton->flags=HTON_NO_FLAGS; innobase_hton->release_temporary_latches=innobase_release_temporary_latches; innobase_hton->alter_table_flags = innobase_alter_table_flags; + innobase_hton->kill_query = innobase_kill_query; ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR); @@ -3052,6 +3060,14 @@ innobase_change_buffering_inited_ok: srv_use_checksums = (ibool) innobase_use_checksums; srv_fast_checksum = (ibool) innobase_fast_checksum; + if (innobase_fast_checksum) { + fprintf(stderr, + "InnoDB: Warning: innodb_fast_checksum is DEPRECATED " + "and *WILL* be removed in Percona Server 5.6. Please " + "consult the Percona Server 5.6 documentation for " + "help in upgrading.\n"); + } + srv_blocking_lru_restore = (ibool) innobase_blocking_lru_restore; #ifdef HAVE_LARGE_PAGES @@ -3938,6 +3954,33 @@ innobase_close_connection( DBUG_RETURN(0); } +/*****************************************************************//** +Cancel any pending lock request associated with the current THD. */ +static +void +innobase_kill_query( +/*======================*/ + handlerton* hton, /*!< in: innobase handlerton */ + THD* thd, /*!< in: MySQL thread being killed */ + enum thd_kill_levels level) /*!< in: kill level */ +{ + trx_t* trx; + DBUG_ENTER("innobase_kill_query"); + DBUG_ASSERT(hton == innodb_hton_ptr); + + mutex_enter(&kernel_mutex); + + trx = thd_to_trx(thd); + + /* Cancel a pending lock request. */ + if (trx && trx->wait_lock) { + lock_cancel_waiting_and_release(trx->wait_lock); + } + + mutex_exit(&kernel_mutex); + + DBUG_VOID_RETURN; +} /*************************************************************************//** ** InnoDB database tables @@ -6278,7 +6321,9 @@ no_commit: error = row_insert_for_mysql((byte*) record, prebuilt); #ifdef EXTENDED_FOR_USERSTAT - if (error == DB_SUCCESS) rows_changed++; + if (UNIV_LIKELY(error == DB_SUCCESS && !trx->fake_changes)) { + rows_changed++; + } #endif /* Handle duplicate key errors */ @@ -6641,7 +6686,9 @@ ha_innobase::update_row( } #ifdef EXTENDED_FOR_USERSTAT - if (error == DB_SUCCESS) rows_changed++; + if (UNIV_LIKELY(error == DB_SUCCESS && !trx->fake_changes)) { + rows_changed++; + } #endif innodb_srv_conc_exit_innodb(trx); @@ -6704,7 +6751,9 @@ ha_innobase::delete_row( error = row_update_for_mysql((byte*) record, prebuilt); #ifdef EXTENDED_FOR_USERSTAT - if (error == DB_SUCCESS) rows_changed++; + if (UNIV_LIKELY(error == DB_SUCCESS && !trx->fake_changes)) { + rows_changed++; + } #endif innodb_srv_conc_exit_innodb(trx); @@ -9825,7 +9874,7 @@ ha_innobase::check( row_mysql_unlock_data_dictionary(prebuilt->trx); } - if (thd_killed(user_thd)) { + if (thd_kill_level(user_thd)) { break; } @@ -9882,7 +9931,7 @@ ha_innobase::check( mutex_exit(&kernel_mutex); prebuilt->trx->op_info = ""; - if (thd_killed(user_thd)) { + if (thd_kill_level(user_thd)) { my_error(ER_QUERY_INTERRUPTED, MYF(0)); } @@ -12674,6 +12723,8 @@ static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums, static MYSQL_SYSVAR_BOOL(fast_checksum, innobase_fast_checksum, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "DEPRECATED. #### WARNING #### : This feature is DEPRECATED and WILL " + "be removed in Percona Server 5.6. " "Change the algorithm of checksum for the whole of datapage to 4-bytes word based. " "The original checksum is checked after the new one. It may be slow for reading page" " which has orginal checksum. Overwrite the page or recreate the InnoDB database, " @@ -13107,6 +13158,11 @@ static MYSQL_SYSVAR_BOOL(track_changed_pages, srv_track_changed_pages, "Track the redo log for changed pages and output a changed page bitmap", NULL, NULL, FALSE); +static MYSQL_SYSVAR_ULONGLONG(max_bitmap_file_size, srv_max_bitmap_file_size, + PLUGIN_VAR_RQCMDARG, + "The maximum size of changed page bitmap files", + NULL, NULL, 100*1024*1024ULL, 4096ULL, ULONGLONG_MAX, 0); + static MYSQL_SYSVAR_ULONGLONG(changed_pages_limit, srv_changed_pages_limit, PLUGIN_VAR_RQCMDARG, "The maximum number of rows for " @@ -13309,6 +13365,13 @@ static MYSQL_SYSVAR_ULINT(lazy_drop_table, srv_lazy_drop_table, "e.g. for http://bugs.mysql.com/51325", NULL, NULL, 0, 0, 1, 0); +static MYSQL_SYSVAR_BOOL(locking_fake_changes, srv_fake_changes_locks, + PLUGIN_VAR_NOCMDARG, + "###EXPERIMENTAL### if enabled, transactions will get S row locks instead " + "of X locks for fake changes. If disabled, fake change transactions will " + "not take any locks at all.", + NULL, NULL, TRUE); + static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(page_size), MYSQL_SYSVAR(log_block_size), @@ -13400,6 +13463,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(use_native_aio), MYSQL_SYSVAR(change_buffering), MYSQL_SYSVAR(track_changed_pages), + MYSQL_SYSVAR(max_bitmap_file_size), MYSQL_SYSVAR(changed_pages_limit), #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG MYSQL_SYSVAR(change_buffering_debug), @@ -13418,6 +13482,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(corrupt_table_action), MYSQL_SYSVAR(lazy_drop_table), MYSQL_SYSVAR(fake_changes), + MYSQL_SYSVAR(locking_fake_changes), MYSQL_SYSVAR(merge_sort_block_size), NULL }; @@ -13666,7 +13731,7 @@ int ha_innobase::multi_range_read_explain_info(uint mrr_mode, char *str, size_t bool ha_innobase::is_thd_killed() { - return thd_killed(user_thd); + return thd_kill_level(user_thd); } /** diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index 4d9c0a1ab35..359d0b95367 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -137,6 +137,7 @@ class ha_innobase: public handler int close(void); double scan_time(); double read_time(uint index, uint ranges, ha_rows rows); + my_bool is_fake_change_enabled(THD *thd); bool is_corrupt() const; int write_row(uchar * buf); diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc index 29a80594344..4b33d6a780c 100644 --- a/storage/xtradb/handler/i_s.cc +++ b/storage/xtradb/handler/i_s.cc @@ -7147,29 +7147,38 @@ static ST_FIELD_INFO i_s_innodb_changed_pages_info[] = }; /*********************************************************************** - This function parses condition and gets upper bounds for start and end LSN's - if condition corresponds to certain pattern. + This function implements ICP for I_S.INNODB_CHANGED_PAGES by parsing a + condition and getting lower and upper bounds for start and end LSNs if the + condition corresponds to a certain pattern. - We can't know right position to avoid scanning bitmap files from the beginning - to the lower bound. But we can stop scanning bitmap files if we reach upper bound. + In the most general form, we understand queries like - It's expected the most used queries will be like the following: - - SELECT * FROM INNODB_CHANGED_PAGES WHERE START_LSN > num1 AND start_lsn < num2; + SELECT * FROM INNODB_CHANGED_PAGES + WHERE START_LSN > num1 AND START_LSN < num2 + AND END_LSN > num3 AND END_LSN < num4; - That's why the pattern is: + That's why the pattern syntax is: pattern: comp | and_comp; comp: lsn < int_num | lsn <= int_num | int_num > lsn | int_num >= lsn; lsn: start_lsn | end_lsn; - and_comp: some_expression AND some_expression | some_expression AND and_comp; - some_expression: comp | any_other_expression; + and_comp: expression AND expression | expression AND and_comp; + expression: comp | any_other_expression; + + The two bounds are handled differently: the lower bound is used to find the + correct starting _file_, the upper bound the last _block_ that needs reading. + + Lower bound conditions are handled in the following way: start_lsn >= X + specifies that the reading must start from the file that has the highest + starting LSN less than or equal to X. start_lsn > X is equivalent to + start_lsn >= X + 1. For end_lsn, end_lsn >= X is treated as + start_lsn >= X - 1 and end_lsn > X as start_lsn >= X. - Suppose the condition is start_lsn < 100, this means we have to read all - blocks with start_lsn < 100. Which is equivalent to reading all the blocks - with end_lsn <= 99, or just end_lsn < 100. That's why it's enough to find - maximum lsn value, doesn't matter if this is start or end lsn and compare - it with "start_lsn" field. + For the upper bound, suppose the condition is start_lsn < 100, this means we + have to read all blocks with start_lsn < 100. Which is equivalent to reading + all the blocks with end_lsn <= 99, or just end_lsn < 100. That's why it's + enough to find maximum lsn value, doesn't matter if this is start or end lsn + and compare it with "start_lsn" field. LSN <= 100 is treated as LSN < 101. Example: @@ -7180,92 +7189,130 @@ static ST_FIELD_INFO i_s_innodb_changed_pages_info[] = 555 > end_lsn AND page_id = 100; - max_lsn will be set to 555. + end_lsn will be set to 555, start_lsn will be set 11. + + Support for other functions (equal, NULL-safe equal, BETWEEN, IN, etc.) will + be added on demand. + */ static void limit_lsn_range_from_condition( /*===========================*/ - TABLE* table, /*!<in: table */ - COND* cond, /*!<in: condition */ - ib_uint64_t* max_lsn) /*!<in/out: maximum LSN - (must be initialized with maximum - available value) */ + TABLE* table, /*!<in: table */ + COND* cond, /*!<in: condition */ + ib_uint64_t* start_lsn, /*!<in/out: minumum LSN */ + ib_uint64_t* end_lsn) /*!<in/out: maximum LSN */ { + enum Item_func::Functype func_type; + if (cond->type() != Item::COND_ITEM && cond->type() != Item::FUNC_ITEM) return; - switch (((Item_func*) cond)->functype()) + func_type = ((Item_func*) cond)->functype(); + + switch (func_type) { - case Item_func::COND_AND_FUNC: - { - List_iterator<Item> li(*((Item_cond*) cond)-> - argument_list()); - Item *item; - while ((item= li++)) - limit_lsn_range_from_condition(table, - item, - max_lsn); - break; + case Item_func::COND_AND_FUNC: + { + List_iterator<Item> li(*((Item_cond*) cond) + ->argument_list()); + Item *item; + + while ((item= li++)) { + limit_lsn_range_from_condition(table, item, start_lsn, + end_lsn); + } + break; + } + case Item_func::LT_FUNC: + case Item_func::LE_FUNC: + case Item_func::GT_FUNC: + case Item_func::GE_FUNC: + { + Item *left; + Item *right; + Item_field *item_field; + ib_uint64_t tmp_result; + ibool is_end_lsn; + + /* a <= b equals to b >= a that's why we just exchange "left" + and "right" in the case of ">" or ">=" function. We don't + touch the operation itself. */ + if (((Item_func*) cond)->functype() == Item_func::LT_FUNC + || ((Item_func*) cond)->functype() == Item_func::LE_FUNC) { + left = ((Item_func*) cond)->arguments()[0]; + right = ((Item_func*) cond)->arguments()[1]; + } else { + left = ((Item_func*) cond)->arguments()[1]; + right = ((Item_func*) cond)->arguments()[0]; } - case Item_func::LT_FUNC: - case Item_func::LE_FUNC: - case Item_func::GT_FUNC: - case Item_func::GE_FUNC: - { - Item *left; - Item *right; - Item_field *item_field; - ib_uint64_t tmp_result; - - /* - a <= b equals to b >= a that's why we just exchange - "left" and "right" in the case of ">" or ">=" - function - */ - if (((Item_func*) cond)->functype() == - Item_func::LT_FUNC || - ((Item_func*) cond)->functype() == - Item_func::LE_FUNC) - { - left = ((Item_func*) cond)->arguments()[0]; - right = ((Item_func*) cond)->arguments()[1]; - } else { - left = ((Item_func*) cond)->arguments()[1]; - right = ((Item_func*) cond)->arguments()[0]; - } - if (!left || !right) - return; - if (left->type() != Item::FIELD_ITEM) - return; - if (right->type() != Item::INT_ITEM) - return; + if (left->type() == Item::FIELD_ITEM) { + item_field = (Item_field *)left; + } else if (right->type() == Item::FIELD_ITEM) { + item_field = (Item_field *)right; + } else { + return; + } - item_field = (Item_field*)left; + /* Check if the current field belongs to our table */ + if (table != item_field->field->table) { + return; + } - if (/* START_LSN */ - table->field[2] != item_field->field && - /* END_LSN */ - table->field[3] != item_field->field) - { - return; - } + /* Check if the field is START_LSN or END_LSN */ + /* END_LSN */ + is_end_lsn = table->field[3]->eq(item_field->field); + + if (/* START_LSN */ !table->field[2]->eq(item_field->field) + && !is_end_lsn) { + return; + } + + if (left->type() == Item::FIELD_ITEM + && right->type() == Item::INT_ITEM) { - /* Check if the current field belongs to our table */ - if (table != item_field->field->table) - return; + /* The case of start_lsn|end_lsn <|<= const, i.e. the + upper bound. */ tmp_result = right->val_int(); - if (tmp_result < *max_lsn) - *max_lsn = tmp_result; + if (((func_type == Item_func::LE_FUNC) + || (func_type == Item_func::GE_FUNC)) + && (tmp_result != IB_ULONGLONG_MAX)) { - break; + tmp_result++; + } + if (tmp_result < *end_lsn) { + *end_lsn = tmp_result; + } + + } else if (left->type() == Item::INT_ITEM + && right->type() == Item::FIELD_ITEM) { + + /* The case of const <|<= start_lsn|end_lsn, i.e. the + lower bound */ + + tmp_result = left->val_int(); + if (is_end_lsn && tmp_result != 0) { + tmp_result--; + } + if (((func_type == Item_func::LT_FUNC) + || (func_type == Item_func::GT_FUNC)) + && (tmp_result != IB_ULONGLONG_MAX)) { + + tmp_result++; + } + if (tmp_result > *start_lsn) { + *start_lsn = tmp_result; + } } - default:; - } + break; + } + default:; + } } /*********************************************************************** @@ -7282,40 +7329,55 @@ i_s_innodb_changed_pages_fill( TABLE* table = (TABLE *) tables->table; log_bitmap_iterator_t i; ib_uint64_t output_rows_num = 0UL; - ib_uint64_t max_lsn = ~0ULL; + ib_uint64_t max_lsn = IB_ULONGLONG_MAX; + ib_uint64_t min_lsn = 0ULL; + + DBUG_ENTER("i_s_innodb_changed_pages_fill"); - if (!srv_track_changed_pages) - return 0; + /* deny access to non-superusers */ + if (check_global_access(thd, PROCESS_ACL)) { + + DBUG_RETURN(0); + } - if (!log_online_bitmap_iterator_init(&i)) - return 1; + RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name); - if (cond) - limit_lsn_range_from_condition(table, cond, &max_lsn); + if (!srv_track_changed_pages) { + DBUG_RETURN(0); + } + + if (cond) { + limit_lsn_range_from_condition(table, cond, &min_lsn, + &max_lsn); + } + + if (!log_online_bitmap_iterator_init(&i, min_lsn, max_lsn)) { + DBUG_RETURN(1); + } while(log_online_bitmap_iterator_next(&i) && (!srv_changed_pages_limit || output_rows_num < srv_changed_pages_limit) && /* - There is no need to compare both start LSN and end LSN fields - with maximum value. It's enough to compare only start LSN. - Example: - - max_lsn = 100 - \\\\\\\\\\\\\\\\\\\\\\\\\|\\\\\\\\ - Query 1 - I------I I-------I I-------------I I----I - ////////////////// | - Query 2 - 1 2 3 4 - - Query 1: - SELECT * FROM INNODB_CHANGED_PAGES WHERE start_lsn < 100 - will select 1,2,3 bitmaps - Query 2: - SELECT * FROM INNODB_CHANGED_PAGES WHERE end_lsn < 100 - will select 1,2 bitmaps - - The condition start_lsn <= 100 will be false after reading - 1,2,3 bitmaps which suits for both cases. + There is no need to compare both start LSN and end LSN fields + with maximum value. It's enough to compare only start LSN. + Example: + + max_lsn = 100 + \\\\\\\\\\\\\\\\\\\\\\\\\|\\\\\\\\ - Query 1 + I------I I-------I I-------------I I----I + ////////////////// | - Query 2 + 1 2 3 4 + + Query 1: + SELECT * FROM INNODB_CHANGED_PAGES WHERE start_lsn < 100 + will select 1,2,3 bitmaps + Query 2: + SELECT * FROM INNODB_CHANGED_PAGES WHERE end_lsn < 100 + will select 1,2 bitmaps + + The condition start_lsn <= 100 will be false after reading + 1,2,3 bitmaps which suits for both cases. */ LOG_BITMAP_ITERATOR_START_LSN(i) <= max_lsn) { @@ -7330,10 +7392,10 @@ i_s_innodb_changed_pages_fill( LOG_BITMAP_ITERATOR_PAGE_NUM(i)); /* START_LSN */ table->field[2]->store( - LOG_BITMAP_ITERATOR_START_LSN(i)); + LOG_BITMAP_ITERATOR_START_LSN(i), true); /* END_LSN */ table->field[3]->store( - LOG_BITMAP_ITERATOR_END_LSN(i)); + LOG_BITMAP_ITERATOR_END_LSN(i), true); /* I_S tables are in-memory tables. If bitmap file is big enough @@ -7353,14 +7415,14 @@ i_s_innodb_changed_pages_fill( if (schema_table_store_record(thd, table)) { log_online_bitmap_iterator_release(&i); - return 1; + DBUG_RETURN(1); } ++output_rows_num; } log_online_bitmap_iterator_release(&i); - return 0; + DBUG_RETURN(0); } static diff --git a/storage/xtradb/ibuf/ibuf0ibuf.c b/storage/xtradb/ibuf/ibuf0ibuf.c index 78cb6e20176..77305e42fb1 100644 --- a/storage/xtradb/ibuf/ibuf0ibuf.c +++ b/storage/xtradb/ibuf/ibuf0ibuf.c @@ -4044,7 +4044,7 @@ updated_in_place: update) && (!page_zip || btr_cur_update_alloc_zip( page_zip, block, index, - rec_offs_size(offsets), FALSE, mtr))) { + rec_offs_size(offsets), FALSE, mtr, NULL))) { /* This is the easy case. Do something similar to btr_cur_update_in_place(). */ row_upd_rec_in_place(rec, index, offsets, diff --git a/storage/xtradb/include/btr0btr.h b/storage/xtradb/include/btr0btr.h index 03e89ae3f7d..fb06a774b82 100644 --- a/storage/xtradb/include/btr0btr.h +++ b/storage/xtradb/include/btr0btr.h @@ -65,7 +65,10 @@ enum btr_latch_mode { /** Search the previous record. */ BTR_SEARCH_PREV = 35, /** Modify the previous record. */ - BTR_MODIFY_PREV = 36 + BTR_MODIFY_PREV = 36, + /** Weaker BTR_MODIFY_TREE that does not lock the leaf page siblings, + used for fake changes. */ + BTR_SEARCH_TREE = 37 /* BTR_MODIFY_TREE | 4 */ }; /* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually exclusive. */ diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h index cbc6103c2ee..cb44129aeb5 100644 --- a/storage/xtradb/include/btr0cur.h +++ b/storage/xtradb/include/btr0cur.h @@ -259,8 +259,9 @@ btr_cur_update_alloc_zip( ulint length, /*!< in: size needed */ ibool create, /*!< in: TRUE=delete-and-insert, FALSE=update-in-place */ - mtr_t* mtr) /*!< in: mini-transaction */ - __attribute__((nonnull, warn_unused_result)); + mtr_t* mtr, /*!< in: mini-transaction */ + trx_t* trx) /*!< in: NULL or transaction */ + __attribute__((nonnull (1, 2, 3, 6), warn_unused_result)); /*************************************************************//** Updates a record when the update causes no size changes in its fields. @return DB_SUCCESS or error number */ diff --git a/storage/xtradb/include/log0online.h b/storage/xtradb/include/log0online.h index 0e0ca169f6f..e7c3f301e45 100644 --- a/storage/xtradb/include/log0online.h +++ b/storage/xtradb/include/log0online.h @@ -27,6 +27,16 @@ Online database log parsing for changed page tracking #include "univ.i" #include "os0file.h" +/** Single bitmap file information */ +typedef struct log_online_bitmap_file_struct log_online_bitmap_file_t; + +/** A set of bitmap files containing some LSN range */ +typedef struct log_online_bitmap_file_range_struct +log_online_bitmap_file_range_t; + +/** An iterator over changed page info */ +typedef struct log_bitmap_iterator_struct log_bitmap_iterator_t; + /*********************************************************************//** Initializes the online log following subsytem. */ UNIV_INTERN @@ -49,45 +59,32 @@ void log_online_follow_redo_log(); /*=========================*/ -/** The iterator through all bits of changed pages bitmap blocks */ -struct log_bitmap_iterator_struct -{ - char in_name[FN_REFLEN]; /*!< the file name for bitmap - input */ - os_file_t in; /*!< the bitmap input file */ - ib_uint64_t in_offset; /*!< the next write position in the - bitmap output file */ - ib_uint32_t bit_offset; /*!< bit offset inside of bitmap - block*/ - ib_uint64_t start_lsn; /*!< Start lsn of the block */ - ib_uint64_t end_lsn; /*!< End lsn of the block */ - ib_uint32_t space_id; /*!< Block space id */ - ib_uint32_t first_page_id; /*!< First block page id */ - ibool changed; /*!< true if current page was changed */ - byte* page; /*!< Bitmap block */ -}; - -typedef struct log_bitmap_iterator_struct log_bitmap_iterator_t; - #define LOG_BITMAP_ITERATOR_START_LSN(i) \ - ((i).start_lsn) + ((i).start_lsn) #define LOG_BITMAP_ITERATOR_END_LSN(i) \ - ((i).end_lsn) + ((i).end_lsn) #define LOG_BITMAP_ITERATOR_SPACE_ID(i) \ - ((i).space_id) + ((i).space_id) #define LOG_BITMAP_ITERATOR_PAGE_NUM(i) \ - ((i).first_page_id + (i).bit_offset) + ((i).first_page_id + (i).bit_offset) #define LOG_BITMAP_ITERATOR_PAGE_CHANGED(i) \ - ((i).changed) + ((i).changed) /*********************************************************************//** -Initializes log bitmap iterator. +Initializes log bitmap iterator. The minimum LSN is used for finding the +correct starting file with records and it there may be records returned by +the iterator that have LSN less than start_lsn. + @return TRUE if the iterator is initialized OK, FALSE otherwise. */ UNIV_INTERN ibool log_online_bitmap_iterator_init( /*============================*/ - log_bitmap_iterator_t *i); /*!<in/out: iterator */ + log_bitmap_iterator_t *i, /*!<in/out: iterator */ + ib_uint64_t min_lsn, /*!<in: start LSN for the + iterator */ + ib_uint64_t max_lsn); /*!<in: end LSN for the + iterator */ /*********************************************************************//** Releases log bitmap iterator. */ @@ -108,4 +105,57 @@ log_online_bitmap_iterator_next( /*============================*/ log_bitmap_iterator_t *i); /*!<in/out: iterator */ +/** Struct for single bitmap file information */ +struct log_online_bitmap_file_struct { + char name[FN_REFLEN]; /*!< Name with full path */ + os_file_t file; /*!< Handle to opened file */ + ib_uint64_t size; /*!< Size of the file */ + ib_uint64_t offset; /*!< Offset of the next read, + or count of already-read bytes + */ +}; + +/** Struct for a set of bitmap files containing some LSN range */ +struct log_online_bitmap_file_range_struct { + size_t count; /*!< Number of files */ + /*!< Dynamically-allocated array of info about individual files */ + struct { + char name[FN_REFLEN]; /*!< Name of a file */ + ib_uint64_t start_lsn; /*!< Starting LSN of + data in this file */ + ulong seq_num; /*!< Sequence number of + this file */ + } *files; +}; + +/** Struct for an iterator through all bits of changed pages bitmap blocks */ +struct log_bitmap_iterator_struct +{ + log_online_bitmap_file_range_t in_files; /*!< The bitmap files + for this iterator */ + size_t in_i; /*!< Currently read + file index in in_files + */ + log_online_bitmap_file_t in; /*!< Currently read + file */ + ib_uint32_t bit_offset; /*!< bit offset inside + the current bitmap + block */ + ib_uint64_t start_lsn; /*!< Start LSN of the + current bitmap block */ + ib_uint64_t end_lsn; /*!< End LSN of the + current bitmap block */ + ib_uint32_t space_id; /*!< Current block + space id */ + ib_uint32_t first_page_id; /*!< Id of the first + page in the current + block */ + ibool last_page_in_run;/*!< "Last page in + run" flag value for the + current block */ + ibool changed; /*!< true if current + page was changed */ + byte* page; /*!< Bitmap block */ +}; + #endif diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index a95eb8a1d58..6c5b61487f2 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -144,7 +144,8 @@ extern char* srv_doublewrite_file; extern ibool srv_recovery_stats; -extern my_bool srv_track_changed_pages; +extern my_bool srv_track_changed_pages; +extern ib_uint64_t srv_max_bitmap_file_size; extern ulonglong srv_changed_pages_limit; @@ -395,6 +396,10 @@ extern uint srv_auto_lru_dump; /** Whether startup should be blocked until buffer pool is fully restored */ extern ibool srv_blocking_lru_restore; +/** When TRUE, fake change transcations take S rather than X row locks. +When FALSE, row locks are not taken at all. */ +extern my_bool srv_fake_changes_locks; + /** Status variables to be passed to MySQL */ typedef struct export_var_struct export_struc; diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 5d2cd2d0313..77acf54d8dc 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -54,7 +54,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_BUGFIX 8 #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 29.1 +#define PERCONA_INNODB_VERSION 29.3 #endif /* The following is the InnoDB version as shown in diff --git a/storage/xtradb/include/ut0ut.h b/storage/xtradb/include/ut0ut.h index 47ab6eb9b74..48f30b33e65 100644 --- a/storage/xtradb/include/ut0ut.h +++ b/storage/xtradb/include/ut0ut.h @@ -122,6 +122,15 @@ ut_max( /*===*/ ulint n1, /*!< in: first number */ ulint n2); /*!< in: second number */ +/******************************************************//** +Calculates the maximum of two ib_uint64_t values. +@return the maximum */ +UNIV_INLINE +ib_uint64_t +ut_max_uint64( +/*==========*/ + ib_uint64_t n1, /*!< in: first number */ + ib_uint64_t n2); /*!< in: second number */ /****************************************************************//** Calculates minimum of two ulint-pairs. */ UNIV_INLINE diff --git a/storage/xtradb/include/ut0ut.ic b/storage/xtradb/include/ut0ut.ic index 6f55c7e410e..d56deb6266f 100644 --- a/storage/xtradb/include/ut0ut.ic +++ b/storage/xtradb/include/ut0ut.ic @@ -49,6 +49,19 @@ ut_max( return((n1 <= n2) ? n2 : n1); } +/******************************************************//** +Calculates the maximum of two ib_uint64_t values. +@return the maximum */ +UNIV_INLINE +ib_uint64_t +ut_max_uint64( +/*==========*/ + ib_uint64_t n1, /*!< in: first number */ + ib_uint64_t n2) /*!< in: second number */ +{ + return((n1 <= n2) ? n2 : n1); +} + /****************************************************************//** Calculates minimum of two ulint-pairs. */ UNIV_INLINE diff --git a/storage/xtradb/lock/lock0lock.c b/storage/xtradb/lock/lock0lock.c index 414d3ae2c49..f172ad6695b 100644 --- a/storage/xtradb/lock/lock0lock.c +++ b/storage/xtradb/lock/lock0lock.c @@ -5481,8 +5481,13 @@ lock_sec_rec_read_check_and_lock( return(DB_SUCCESS); } - if (thr && thr_get_trx(thr)->fake_changes && mode == LOCK_X) { - mode = LOCK_S; + if (UNIV_UNLIKELY((thr && thr_get_trx(thr)->fake_changes))) { + if (!srv_fake_changes_locks) { + return(DB_SUCCESS); + } + if (mode == LOCK_X) { + mode = LOCK_S; + } } heap_no = page_rec_get_heap_no(rec); @@ -5561,8 +5566,13 @@ lock_clust_rec_read_check_and_lock( return(DB_SUCCESS); } - if (thr && thr_get_trx(thr)->fake_changes && mode == LOCK_X) { - mode = LOCK_S; + if (UNIV_UNLIKELY((thr && thr_get_trx(thr)->fake_changes))) { + if (!srv_fake_changes_locks) { + return(DB_SUCCESS); + } + if (mode == LOCK_X) { + mode = LOCK_S; + } } heap_no = page_rec_get_heap_no(rec); diff --git a/storage/xtradb/log/log0log.c b/storage/xtradb/log/log0log.c index f2066b49662..e7c7a165b9c 100644 --- a/storage/xtradb/log/log0log.c +++ b/storage/xtradb/log/log0log.c @@ -248,7 +248,7 @@ log_check_tracking_margin( checked for the already-written log. */ { ib_uint64_t tracked_lsn; - ulint tracked_lsn_age; + ib_uint64_t tracked_lsn_age; if (!srv_track_changed_pages) { return FALSE; @@ -460,7 +460,7 @@ log_close(void) ib_uint64_t oldest_lsn; ib_uint64_t lsn; ib_uint64_t tracked_lsn; - ulint tracked_lsn_age; + ib_uint64_t tracked_lsn_age; log_t* log = log_sys; ib_uint64_t checkpoint_age; diff --git a/storage/xtradb/log/log0online.c b/storage/xtradb/log/log0online.c index 1d478c467e6..55eb9d17c46 100644 --- a/storage/xtradb/log/log0online.c +++ b/storage/xtradb/log/log0online.c @@ -48,10 +48,8 @@ struct log_bitmap_struct { parsed, it points to the start, otherwise points immediatelly past the end of the incomplete log record. */ - char* out_name; /*!< the file name for bitmap output */ - os_file_t out; /*!< the bitmap output file */ - ib_uint64_t out_offset; /*!< the next write position in the - bitmap output file */ + log_online_bitmap_file_t out; /*!< The current bitmap file */ + ulint out_seq_num; /*!< the bitmap file sequence number */ ib_uint64_t start_lsn; /*!< the LSN of the next unparsed record and the start of the next LSN interval to be parsed. */ @@ -76,8 +74,13 @@ struct log_bitmap_struct { /* The log parsing and bitmap output struct instance */ static struct log_bitmap_struct* log_bmp_sys; -/* File name stem for modified page bitmaps */ -static const char* modified_page_stem = "ib_modified_log."; +/** File name stem for bitmap files. */ +static const char* bmp_file_name_stem = "ib_modified_log_"; + +/** File name template for bitmap files. The 1st format tag is a directory +name, the 2nd tag is the stem, the 3rd tag is a file sequence number, the 4th +tag is the start LSN for the file. */ +static const char* bmp_file_name_template = "%s%s%lu_%llu.xdb"; /* On server startup with empty database srv_start_lsn == 0, in which case the first LSN of actual log records will be this. */ @@ -85,7 +88,7 @@ which case the first LSN of actual log records will be this. */ /* Tests if num bit of bitmap is set */ #define IS_BIT_SET(bitmap, num) \ - (*((bitmap) + ((num) >> 3)) & (1UL << ((num) & 7UL))) + (*((bitmap) + ((num) >> 3)) & (1UL << ((num) & 7UL))) /** The bitmap file block size in bytes. All writes will be multiples of this. */ @@ -243,10 +246,69 @@ log_online_calc_checksum( } /****************************************************************//** +Read one bitmap data page and check it for corruption. + +@return TRUE if page read OK, FALSE if I/O error */ +static +ibool +log_online_read_bitmap_page( +/*========================*/ + log_online_bitmap_file_t *bitmap_file, /*!<in/out: bitmap + file */ + byte *page, /*!<out: read page. + Must be at least + MODIFIED_PAGE_BLOCK_SIZE + bytes long */ + ibool *checksum_ok) /*!<out: TRUE if page + checksum OK */ +{ + ulint offset_low = (ulint)(bitmap_file->offset & 0xFFFFFFFF); + ulint offset_high = (ulint)(bitmap_file->offset >> 32); + ulint checksum; + ulint actual_checksum; + ibool success; + + ut_a(bitmap_file->size >= MODIFIED_PAGE_BLOCK_SIZE); + ut_a(bitmap_file->offset + <= bitmap_file->size - MODIFIED_PAGE_BLOCK_SIZE); + ut_a(bitmap_file->offset % MODIFIED_PAGE_BLOCK_SIZE == 0); + + success = os_file_read(bitmap_file->file, page, offset_low, + offset_high, MODIFIED_PAGE_BLOCK_SIZE); + + if (UNIV_UNLIKELY(!success)) { + + /* The following call prints an error message */ + os_file_get_last_error(TRUE); + fprintf(stderr, + "InnoDB: Warning: failed reading changed page bitmap " + "file \'%s\'\n", bitmap_file->name); + return FALSE; + } + + bitmap_file->offset += MODIFIED_PAGE_BLOCK_SIZE; + ut_ad(bitmap_file->offset <= bitmap_file->size); + + checksum = mach_read_from_4(page + MODIFIED_PAGE_BLOCK_CHECKSUM); + actual_checksum = log_online_calc_checksum(page); + *checksum_ok = (checksum == actual_checksum); + + return TRUE; +} + +/****************************************************************//** Get the last tracked fully LSN from the bitmap file by reading backwards untile a correct end page is found. Detects incomplete writes and corrupted data. Sets the start output position for the written bitmap data. + +Multiple bitmap files are handled using the following assumptions: +1) Only the last file might be corrupted. In case where no good data was found +in the last file, assume that the next to last file is OK. This assumption +does not limit crash recovery capability in any way. +2) If the whole of the last file was corrupted, assume that the start LSN in +its name is correct and use it for (re-)tracking start. + @return the last fully tracked LSN */ static ib_uint64_t @@ -254,73 +316,46 @@ log_online_read_last_tracked_lsn() /*==============================*/ { byte page[MODIFIED_PAGE_BLOCK_SIZE]; - ib_uint64_t read_offset = log_bmp_sys->out_offset; - /* Initialize these to nonequal values so that file size == 0 case with - zero loop repetitions is handled correctly */ - ulint checksum = 0; - ulint actual_checksum = !checksum; ibool is_last_page = FALSE; + ibool checksum_ok = FALSE; ib_uint64_t result; + ib_uint64_t read_offset = log_bmp_sys->out.offset; - ut_ad(log_bmp_sys->out_offset % MODIFIED_PAGE_BLOCK_SIZE == 0); - - while (checksum != actual_checksum && read_offset > 0 && !is_last_page) + while (!checksum_ok && read_offset > 0 && !is_last_page) { - - ulint offset_low, offset_high; - ibool success; - read_offset -= MODIFIED_PAGE_BLOCK_SIZE; - offset_high = (ulint)(read_offset >> 32); - offset_low = (ulint)(read_offset & 0xFFFFFFFF); - - success = os_file_read(log_bmp_sys->out, page, offset_low, - offset_high, MODIFIED_PAGE_BLOCK_SIZE); - if (!success) { + log_bmp_sys->out.offset = read_offset; - /* The following call prints an error message */ - os_file_get_last_error(TRUE); - /* Here and below assume that bitmap file names do not - contain apostrophes, thus no need for - ut_print_filename(). */ - fprintf(stderr, "InnoDB: Warning: failed reading " - "changed page bitmap file \'%s\'\n", - log_bmp_sys->out_name); - return MIN_TRACKED_LSN; + if (!log_online_read_bitmap_page(&log_bmp_sys->out, page, + &checksum_ok)) { + checksum_ok = FALSE; + result = 0; + break; } - is_last_page - = mach_read_from_4(page + MODIFIED_PAGE_IS_LAST_BLOCK); - checksum = mach_read_from_4(page - + MODIFIED_PAGE_BLOCK_CHECKSUM); - actual_checksum = log_online_calc_checksum(page); - if (checksum != actual_checksum) { + if (checksum_ok) { + is_last_page + = mach_read_from_4 + (page + MODIFIED_PAGE_IS_LAST_BLOCK); + } else { - fprintf(stderr, "InnoDB: Warning: corruption " - "detected in \'%s\' at offset %llu\n", - log_bmp_sys->out_name, read_offset); + fprintf(stderr, + "InnoDB: Warning: corruption detected in " + "\'%s\' at offset %llu\n", + log_bmp_sys->out.name, read_offset); } - }; - if (UNIV_LIKELY(checksum == actual_checksum && is_last_page)) { - - log_bmp_sys->out_offset = read_offset - + MODIFIED_PAGE_BLOCK_SIZE; - result = mach_read_from_8(page + MODIFIED_PAGE_END_LSN); - } - else { - log_bmp_sys->out_offset = read_offset; - result = 0; - } + result = (checksum_ok && is_last_page) + ? mach_read_from_8(page + MODIFIED_PAGE_END_LSN) : 0; /* Truncate the output file to discard the corrupted bitmap data, if any */ - if (!os_file_set_eof_at(log_bmp_sys->out, - log_bmp_sys->out_offset)) { + if (!os_file_set_eof_at(log_bmp_sys->out.file, + log_bmp_sys->out.offset)) { fprintf(stderr, "InnoDB: Warning: failed truncating " "changed page bitmap file \'%s\' to %llu bytes\n", - log_bmp_sys->out_name, log_bmp_sys->out_offset); + log_bmp_sys->out.name, log_bmp_sys->out.offset); result = 0; } return result; @@ -350,6 +385,37 @@ log_set_tracked_lsn( #endif } +/*********************************************************************//** +Check if missing, if any, LSN interval can be read and tracked using the +current LSN value, the LSN value where the tracking stopped, and the log group +capacity. + +@return TRUE if the missing interval can be tracked or if there's no missing +data. */ +static +ibool +log_online_can_track_missing( +/*=========================*/ + ib_uint64_t last_tracked_lsn, /*!<in: last tracked LSN */ + ib_uint64_t tracking_start_lsn) /*!<in: current LSN */ +{ + /* last_tracked_lsn might be < MIN_TRACKED_LSN in the case of empty + bitmap file, handle this too. */ + last_tracked_lsn = ut_max_uint64(last_tracked_lsn, MIN_TRACKED_LSN); + + if (last_tracked_lsn > tracking_start_lsn) { + fprintf(stderr, + "InnoDB: Error: last tracked LSN is in future. This " + "can be caused by mismatched bitmap files.\n"); + exit(1); + } + + return (last_tracked_lsn == tracking_start_lsn) + || (log_sys->lsn - last_tracked_lsn + <= log_sys->log_group_capacity); +} + + /****************************************************************//** Diagnose a gap in tracked LSN range on server startup due to crash or very fast shutdown and try to close it by tracking the data @@ -365,22 +431,20 @@ log_online_track_missing_on_startup( { ut_ad(last_tracked_lsn != tracking_start_lsn); - fprintf(stderr, "InnoDB: last tracked LSN in \'%s\' is %llu, but " - "last checkpoint LSN is %llu. This might be due to a server " - "crash or a very fast shutdown. ", log_bmp_sys->out_name, - last_tracked_lsn, tracking_start_lsn); - - /* last_tracked_lsn might be < MIN_TRACKED_LSN in the case of empty - bitmap file, handle this too. */ - last_tracked_lsn = ut_max(last_tracked_lsn, MIN_TRACKED_LSN); + fprintf(stderr, "InnoDB: last tracked LSN is %llu, but the last " + "checkpoint LSN is %llu. This might be due to a server " + "crash or a very fast shutdown. ", last_tracked_lsn, + tracking_start_lsn); /* See if we can fully recover the missing interval */ - if (log_sys->lsn - last_tracked_lsn < log_sys->log_group_capacity) { + if (log_online_can_track_missing(last_tracked_lsn, + tracking_start_lsn)) { fprintf(stderr, "Reading the log to advance the last tracked LSN.\n"); - log_bmp_sys->start_lsn = last_tracked_lsn; + log_bmp_sys->start_lsn = ut_max_uint64(last_tracked_lsn, + MIN_TRACKED_LSN); log_set_tracked_lsn(log_bmp_sys->start_lsn); log_online_follow_redo_log(); ut_ad(log_bmp_sys->end_lsn >= tracking_start_lsn); @@ -406,16 +470,101 @@ log_online_track_missing_on_startup( } /*********************************************************************//** +Format a bitmap output file name to log_bmp_sys->out.name. */ +static +void +log_online_make_bitmap_name( +/*=========================*/ + ib_uint64_t start_lsn) /*!< in: the start LSN name part */ +{ + ut_snprintf(log_bmp_sys->out.name, FN_REFLEN, bmp_file_name_template, + srv_data_home, bmp_file_name_stem, + log_bmp_sys->out_seq_num, start_lsn); + +} + +/*********************************************************************//** +Create a new empty bitmap output file. */ +static +void +log_online_start_bitmap_file() +/*==========================*/ +{ + ibool success; + + log_bmp_sys->out.file + = os_file_create(innodb_file_bmp_key, log_bmp_sys->out.name, + OS_FILE_OVERWRITE, OS_FILE_NORMAL, + OS_DATA_FILE, &success); + if (UNIV_UNLIKELY(!success)) { + + /* The following call prints an error message */ + os_file_get_last_error(TRUE); + fprintf(stderr, + "InnoDB: Error: Cannot create \'%s\'\n", + log_bmp_sys->out.name); + exit(1); + } + + log_bmp_sys->out.offset = 0; +} + +/*********************************************************************//** +Close the current bitmap output file and create the next one. */ +static +void +log_online_rotate_bitmap_file( +/*===========================*/ + ib_uint64_t next_file_start_lsn) /*!<in: the start LSN name + part */ +{ + os_file_close(log_bmp_sys->out.file); + log_bmp_sys->out_seq_num++; + log_online_make_bitmap_name(next_file_start_lsn); + log_online_start_bitmap_file(); +} + +/*********************************************************************//** +Check the name of a given file if it's a changed page bitmap file and +return file sequence and start LSN name components if it is. If is not, +the values of output parameters are undefined. + +@return TRUE if a given file is a changed page bitmap file. */ +static +ibool +log_online_is_bitmap_file( +/*======================*/ + const os_file_stat_t* file_info, /*!<in: file to + check */ + ulong* bitmap_file_seq_num, /*!<out: bitmap file + sequence number */ + ib_uint64_t* bitmap_file_start_lsn) /*!<out: bitmap file + start LSN */ +{ + char stem[FN_REFLEN]; + + ut_ad (strlen(file_info->name) < OS_FILE_MAX_PATH); + + return ((file_info->type == OS_FILE_TYPE_FILE + || file_info->type == OS_FILE_TYPE_LINK) + && (sscanf(file_info->name, "%[a-z_]%lu_%llu.xdb", stem, + bitmap_file_seq_num, bitmap_file_start_lsn) == 3) + && (!strcmp(stem, bmp_file_name_stem))); +} + +/*********************************************************************//** Initialize the online log following subsytem. */ UNIV_INTERN void log_online_read_init() /*==================*/ { - char buf[FN_REFLEN]; ibool success; ib_uint64_t tracking_start_lsn - = ut_max(log_sys->last_checkpoint_lsn, MIN_TRACKED_LSN); + = ut_max_uint64(log_sys->last_checkpoint_lsn, MIN_TRACKED_LSN); + os_file_dir_t bitmap_dir; + os_file_stat_t bitmap_dir_file_info; + ib_uint64_t last_file_start_lsn = MIN_TRACKED_LSN; /* Assert (could be compile-time assert) that bitmap data start and end in a bitmap block is 8-byte aligned */ @@ -424,82 +573,120 @@ log_online_read_init() log_bmp_sys = ut_malloc(sizeof(*log_bmp_sys)); - ut_snprintf(buf, FN_REFLEN, "%s%s%d", srv_data_home, - modified_page_stem, 1); - log_bmp_sys->out_name = ut_malloc(strlen(buf) + 1); - ut_strcpy(log_bmp_sys->out_name, buf); + /* Enumerate existing bitmap files to either open the last one to get + the last tracked LSN either to find that there are none and start + tracking from scratch. */ + log_bmp_sys->out.name[0] = '\0'; + log_bmp_sys->out_seq_num = 0; + + bitmap_dir = os_file_opendir(srv_data_home, TRUE); + ut_a(bitmap_dir); + while (!os_file_readdir_next_file(srv_data_home, bitmap_dir, + &bitmap_dir_file_info)) { + + ulong file_seq_num; + ib_uint64_t file_start_lsn; + + if (!log_online_is_bitmap_file(&bitmap_dir_file_info, + &file_seq_num, + &file_start_lsn)) { + continue; + } + + if (file_seq_num > log_bmp_sys->out_seq_num + && bitmap_dir_file_info.size > 0) { + log_bmp_sys->out_seq_num = file_seq_num; + last_file_start_lsn = file_start_lsn; + /* No dir component (srv_data_home) here, because + that's the cwd */ + strncpy(log_bmp_sys->out.name, + bitmap_dir_file_info.name, FN_REFLEN - 1); + log_bmp_sys->out.name[FN_REFLEN - 1] = '\0'; + } + } + + if (os_file_closedir(bitmap_dir)) { + os_file_get_last_error(TRUE); + fprintf(stderr, "InnoDB: Error: cannot close \'%s\'\n", + srv_data_home); + exit(1); + } + + if (!log_bmp_sys->out_seq_num) { + log_bmp_sys->out_seq_num = 1; + log_online_make_bitmap_name(0); + } log_bmp_sys->modified_pages = rbt_create(MODIFIED_PAGE_BLOCK_SIZE, log_online_compare_bmp_keys); log_bmp_sys->page_free_list = NULL; - log_bmp_sys->out + log_bmp_sys->out.file = os_file_create_simple_no_error_handling - (innodb_file_bmp_key, log_bmp_sys->out_name, OS_FILE_OPEN, + (innodb_file_bmp_key, log_bmp_sys->out.name, OS_FILE_OPEN, OS_FILE_READ_WRITE, &success); if (!success) { /* New file, tracking from scratch */ - log_bmp_sys->out - = os_file_create_simple_no_error_handling - (innodb_file_bmp_key, log_bmp_sys->out_name, - OS_FILE_CREATE, OS_FILE_READ_WRITE, &success); - if (!success) { - - /* The following call prints an error message */ - os_file_get_last_error(TRUE); - fprintf(stderr, - "InnoDB: Error: Cannot create \'%s\'\n", - log_bmp_sys->out_name); - exit(1); - } - - log_bmp_sys->out_offset = 0; + log_online_start_bitmap_file(); } else { - /* Old file, read last tracked LSN and continue from there */ + /* Read the last tracked LSN from the last file */ ulint size_low; ulint size_high; ib_uint64_t last_tracked_lsn; - success = os_file_get_size(log_bmp_sys->out, &size_low, + success = os_file_get_size(log_bmp_sys->out.file, &size_low, &size_high); ut_a(success); - log_bmp_sys->out_offset + log_bmp_sys->out.size = ((ib_uint64_t)size_high << 32) | size_low; + log_bmp_sys->out.offset = log_bmp_sys->out.size; - if (log_bmp_sys->out_offset % MODIFIED_PAGE_BLOCK_SIZE != 0) { + if (log_bmp_sys->out.offset % MODIFIED_PAGE_BLOCK_SIZE != 0) { fprintf(stderr, "InnoDB: Warning: truncated block detected " "in \'%s\' at offset %llu\n", - log_bmp_sys->out_name, - log_bmp_sys->out_offset); - log_bmp_sys->out_offset -= - log_bmp_sys->out_offset + log_bmp_sys->out.name, + log_bmp_sys->out.offset); + log_bmp_sys->out.offset -= + log_bmp_sys->out.offset % MODIFIED_PAGE_BLOCK_SIZE; } last_tracked_lsn = log_online_read_last_tracked_lsn(); + if (!last_tracked_lsn) { + last_tracked_lsn = last_file_start_lsn; + } + + /* Start a new file. Choose the LSN value in its name based on + if we can retrack any missing data. */ + if (log_online_can_track_missing(last_tracked_lsn, + tracking_start_lsn)) { + log_online_rotate_bitmap_file(last_tracked_lsn); + } + else { + log_online_rotate_bitmap_file(tracking_start_lsn); + } if (last_tracked_lsn < tracking_start_lsn) { - log_online_track_missing_on_startup(last_tracked_lsn, - tracking_start_lsn); + log_online_track_missing_on_startup + (last_tracked_lsn, tracking_start_lsn); return; } if (last_tracked_lsn > tracking_start_lsn) { - fprintf(stderr, "InnoDB: last tracked LSN in \'%s\' " - "is %llu, but last checkpoint LSN is %llu. " + fprintf(stderr, "InnoDB: last tracked LSN is %llu, " + "but last the checkpoint LSN is %llu. " "The tracking-based incremental backups will " "work only from the latter LSN!\n", - log_bmp_sys->out_name, last_tracked_lsn, - tracking_start_lsn); + last_tracked_lsn, tracking_start_lsn); } } @@ -519,7 +706,7 @@ log_online_read_shutdown() { ib_rbt_node_t *free_list_node = log_bmp_sys->page_free_list; - os_file_close(log_bmp_sys->out); + os_file_close(log_bmp_sys->out.file); rbt_free(log_bmp_sys->modified_pages); @@ -529,7 +716,6 @@ log_online_read_shutdown() free_list_node = next; } - ut_free(log_bmp_sys->out_name); ut_free(log_bmp_sys); } @@ -746,8 +932,8 @@ log_online_follow_log_seg( /* The next parse LSN is inside the current block, skip data preceding it. */ skip_already_parsed_len - = log_bmp_sys->next_parse_lsn - - block_start_lsn; + = (ulint)(log_bmp_sys->next_parse_lsn + - block_start_lsn); } else { @@ -819,32 +1005,32 @@ log_online_write_bitmap_page( { ibool success; - success = os_file_write(log_bmp_sys->out_name,log_bmp_sys->out, + success = os_file_write(log_bmp_sys->out.name, log_bmp_sys->out.file, block, - (ulint)(log_bmp_sys->out_offset & 0xFFFFFFFF), - (ulint)(log_bmp_sys->out_offset << 32), + (ulint)(log_bmp_sys->out.offset & 0xFFFFFFFF), + (ulint)(log_bmp_sys->out.offset << 32), MODIFIED_PAGE_BLOCK_SIZE); if (UNIV_UNLIKELY(!success)) { /* The following call prints an error message */ os_file_get_last_error(TRUE); fprintf(stderr, "InnoDB: Error: failed writing changed page " - "bitmap file \'%s\'\n", log_bmp_sys->out_name); + "bitmap file \'%s\'\n", log_bmp_sys->out.name); return; } - success = os_file_flush(log_bmp_sys->out, FALSE); + success = os_file_flush(log_bmp_sys->out.file, FALSE); if (UNIV_UNLIKELY(!success)) { /* The following call prints an error message */ os_file_get_last_error(TRUE); fprintf(stderr, "InnoDB: Error: failed flushing " "changed page bitmap file \'%s\'\n", - log_bmp_sys->out_name); + log_bmp_sys->out.name); return; } - log_bmp_sys->out_offset += MODIFIED_PAGE_BLOCK_SIZE; + log_bmp_sys->out.offset += MODIFIED_PAGE_BLOCK_SIZE; } /*********************************************************************//** @@ -858,6 +1044,10 @@ log_online_write_bitmap() ib_rbt_node_t *bmp_tree_node; const ib_rbt_node_t *last_bmp_tree_node; + if (log_bmp_sys->out.offset >= srv_max_bitmap_file_size) { + log_online_rotate_bitmap_file(log_bmp_sys->start_lsn); + } + bmp_tree_node = (ib_rbt_node_t *) rbt_first(log_bmp_sys->modified_pages); last_bmp_tree_node = rbt_last(log_bmp_sys->modified_pages); @@ -930,47 +1120,306 @@ log_online_follow_redo_log() } /*********************************************************************//** -Initializes log bitmap iterator. +List the bitmap files in srv_data_home and setup their range that contains the +specified LSN interval. This range, if non-empty, will start with a file that +has the greatest LSN equal to or less than the start LSN and will include all +the files up to the one with the greatest LSN less than the end LSN. Caller +must free bitmap_files->files when done if bitmap_files set to non-NULL and +this function returned TRUE. Field bitmap_files->count might be set to a +larger value than the actual count of the files, and space for the unused array +slots will be allocated but cleared to zeroes. + +@return TRUE if succeeded +*/ +static +ibool +log_online_setup_bitmap_file_range( +/*===============================*/ + log_online_bitmap_file_range_t *bitmap_files, /*!<in/out: bitmap file + range */ + ib_uint64_t range_start, /*!<in: start LSN */ + ib_uint64_t range_end) /*!<in: end LSN */ +{ + os_file_dir_t bitmap_dir; + os_file_stat_t bitmap_dir_file_info; + ulong first_file_seq_num = ULONG_MAX; + ib_uint64_t first_file_start_lsn = IB_ULONGLONG_MAX; + + bitmap_files->count = 0; + bitmap_files->files = NULL; + + /* 1st pass: size the info array */ + + bitmap_dir = os_file_opendir(srv_data_home, FALSE); + if (!bitmap_dir) { + fprintf(stderr, + "InnoDB: Error: " + "failed to open bitmap directory \'%s\'\n", + srv_data_home); + return FALSE; + } + + while (!os_file_readdir_next_file(srv_data_home, bitmap_dir, + &bitmap_dir_file_info)) { + + ulong file_seq_num; + ib_uint64_t file_start_lsn; + + if (!log_online_is_bitmap_file(&bitmap_dir_file_info, + &file_seq_num, + &file_start_lsn) + || file_start_lsn >= range_end) { + + continue; + } + + if (file_start_lsn >= range_start + || file_start_lsn == first_file_start_lsn + || first_file_start_lsn > range_start) { + + /* A file that falls into the range */ + bitmap_files->count++; + if (file_start_lsn < first_file_start_lsn) { + + first_file_start_lsn = file_start_lsn; + } + if (file_seq_num < first_file_seq_num) { + + first_file_seq_num = file_seq_num; + } + } else if (file_start_lsn > first_file_start_lsn) { + + /* A file that has LSN closer to the range start + but smaller than it, replacing another such file */ + first_file_start_lsn = file_start_lsn; + first_file_seq_num = file_seq_num; + } + } + + ut_a(first_file_seq_num != ULONG_MAX || bitmap_files->count == 0); + + if (os_file_closedir(bitmap_dir)) { + os_file_get_last_error(TRUE); + fprintf(stderr, "InnoDB: Error: cannot close \'%s\'\n", + srv_data_home); + return FALSE; + } + + if (!bitmap_files->count) { + return TRUE; + } + + /* 2nd pass: get the file names in the file_seq_num order */ + + bitmap_dir = os_file_opendir(srv_data_home, FALSE); + if (!bitmap_dir) { + fprintf(stderr, "InnoDB: Error: " + "failed to open bitmap directory \'%s\'\n", + srv_data_home); + return FALSE; + } + + bitmap_files->files = ut_malloc(bitmap_files->count + * sizeof(bitmap_files->files[0])); + memset(bitmap_files->files, 0, + bitmap_files->count * sizeof(bitmap_files->files[0])); + + while (!os_file_readdir_next_file(srv_data_home, bitmap_dir, + &bitmap_dir_file_info)) { + + ulong file_seq_num; + ib_uint64_t file_start_lsn; + size_t array_pos; + + if (!log_online_is_bitmap_file(&bitmap_dir_file_info, + &file_seq_num, + &file_start_lsn) + || file_start_lsn >= range_end + || file_start_lsn < first_file_start_lsn) { + continue; + } + + array_pos = file_seq_num - first_file_seq_num; + if (file_seq_num > bitmap_files->files[array_pos].seq_num) { + bitmap_files->files[array_pos].seq_num = file_seq_num; + strncpy(bitmap_files->files[array_pos].name, + bitmap_dir_file_info.name, FN_REFLEN); + bitmap_files->files[array_pos].name[FN_REFLEN - 1] + = '\0'; + bitmap_files->files[array_pos].start_lsn + = file_start_lsn; + } + } + + if (os_file_closedir(bitmap_dir)) { + os_file_get_last_error(TRUE); + fprintf(stderr, "InnoDB: Error: cannot close \'%s\'\n", + srv_data_home); + free(bitmap_files->files); + return FALSE; + } + +#ifdef UNIV_DEBUG + ut_ad(bitmap_files->files[0].seq_num == first_file_seq_num); + ut_ad(bitmap_files->files[0].start_lsn == first_file_start_lsn); + { + size_t i; + for (i = 1; i < bitmap_files->count; i++) { + if (!bitmap_files->files[i].seq_num) { + break; + } + ut_ad(bitmap_files->files[i].seq_num + > bitmap_files->files[i - 1].seq_num); + ut_ad(bitmap_files->files[i].start_lsn + >= bitmap_files->files[i - 1].start_lsn); + } + } +#endif + + return TRUE; +} + +/****************************************************************//** +Open a bitmap file for reading. + +@return TRUE if opened successfully */ +static +ibool +log_online_open_bitmap_file_read_only( +/*==================================*/ + const char* name, /*!<in: bitmap file + name without directory, + which is assumed to be + srv_data_home */ + log_online_bitmap_file_t* bitmap_file) /*!<out: opened bitmap + file */ +{ + ibool success = FALSE; + ulint size_low; + ulint size_high; + + ut_snprintf(bitmap_file->name, FN_REFLEN, "%s%s", srv_data_home, name); + bitmap_file->file + = os_file_create_simple_no_error_handling(innodb_file_bmp_key, + bitmap_file->name, + OS_FILE_OPEN, + OS_FILE_READ_ONLY, + &success); + if (!success) { + /* Here and below assume that bitmap file names do not + contain apostrophes, thus no need for ut_print_filename(). */ + fprintf(stderr, + "InnoDB: Warning: error opening the changed page " + "bitmap \'%s\'\n", bitmap_file->name); + return FALSE; + } + + success = os_file_get_size(bitmap_file->file, &size_low, &size_high); + bitmap_file->size = (((ib_uint64_t)size_high) << 32) | size_low; + bitmap_file->offset = 0; + +#ifdef UNIV_LINUX + posix_fadvise(bitmap_file->file, 0, 0, POSIX_FADV_SEQUENTIAL); + posix_fadvise(bitmap_file->file, 0, 0, POSIX_FADV_NOREUSE); +#endif + + return TRUE; +} + +/****************************************************************//** +Diagnose one or both of the following situations if we read close to +the end of bitmap file: +1) Warn if the remainder of the file is less than one page. +2) Error if we cannot read any more full pages but the last read page +did not have the last-in-run flag set. + +@return FALSE for the error */ +static +ibool +log_online_diagnose_bitmap_eof( +/*===========================*/ + const log_online_bitmap_file_t* bitmap_file, /*!< in: bitmap file */ + ibool last_page_in_run)/*!< in: "last page in + run" flag value in the + last read page */ +{ + /* Check if we are too close to EOF to read a full page */ + if ((bitmap_file->size < MODIFIED_PAGE_BLOCK_SIZE) + || (bitmap_file->offset + > bitmap_file->size - MODIFIED_PAGE_BLOCK_SIZE)) { + + if (bitmap_file->offset != bitmap_file->size) { + /* If we are not at EOF and we have less than one page + to read, it's junk. This error is not fatal in + itself. */ + + fprintf(stderr, + "InnoDB: Warning: junk at the end of changed " + "page bitmap file \'%s\'.\n", + bitmap_file->name); + } + + if (!last_page_in_run) { + /* We are at EOF but the last read page did not finish + a run */ + /* It's a "Warning" here because it's not a fatal error + for the whole server */ + fprintf(stderr, + "InnoDB: Warning: changed page bitmap " + "file \'%s\' does not contain a complete run " + "at the end.\n", bitmap_file->name); + return FALSE; + } + } + return TRUE; +} + +/*********************************************************************//** +Initialize the log bitmap iterator for a given range. The records are +processed at a bitmap block granularity, i.e. all the records in the same block +share the same start and end LSN values, the exact LSN of each record is +unavailable (nor is it defined for blocks that are touched more than once in +the LSN interval contained in the block). Thus min_lsn and max_lsn should be +set at block boundaries or bigger, otherwise the records at the 1st and the +last blocks will not be returned. Also note that there might be returned +records with LSN < min_lsn, as min_lsn is used to select the correct starting +file but not block. + @return TRUE if the iterator is initialized OK, FALSE otherwise. */ UNIV_INTERN ibool log_online_bitmap_iterator_init( /*============================*/ - log_bitmap_iterator_t *i) /*!<in/out: iterator */ + log_bitmap_iterator_t *i, /*!<in/out: iterator */ + ib_uint64_t min_lsn,/*!< in: start LSN */ + ib_uint64_t max_lsn)/*!< in: end LSN */ { - ibool success; - ut_a(i); - ut_snprintf(i->in_name, FN_REFLEN, "%s%s%d", srv_data_home, - modified_page_stem, 1); - i->in_offset = 0; - /* - Set up bit offset out of the reasonable limit - to intiate reading block from file in - log_online_bitmap_iterator_next() - */ - i->bit_offset = MODIFIED_PAGE_BLOCK_BITMAP_LEN; - i->in = - os_file_create_simple_no_error_handling(innodb_file_bmp_key, - i->in_name, - OS_FILE_OPEN, - OS_FILE_READ_ONLY, - &success); - if (!success) { - /* The following call prints an error message */ - os_file_get_last_error(TRUE); - fprintf(stderr, - "InnoDB: Error: Cannot open \'%s\'\n", - i->in_name); + if (!log_online_setup_bitmap_file_range(&i->in_files, min_lsn, + max_lsn)) { + return FALSE; } - i->page = ut_malloc(MODIFIED_PAGE_BLOCK_SIZE); + ut_a(i->in_files.count > 0); + + /* Open the 1st bitmap file */ + i->in_i = 0; + if (!log_online_open_bitmap_file_read_only(i->in_files.files[i->in_i]. + name, + &i->in)) { + i->in_i = i->in_files.count; + free(i->in_files.files); + return FALSE; + } + i->page = ut_malloc(MODIFIED_PAGE_BLOCK_SIZE); + i->bit_offset = MODIFIED_PAGE_BLOCK_BITMAP_LEN; i->start_lsn = i->end_lsn = 0; i->space_id = 0; i->first_page_id = 0; + i->last_page_in_run = TRUE; i->changed = FALSE; return TRUE; @@ -985,7 +1434,11 @@ log_online_bitmap_iterator_release( log_bitmap_iterator_t *i) /*!<in/out: iterator */ { ut_a(i); - os_file_close(i->in); + + if (i->in_i < i->in_files.count) { + os_file_close(i->in.file); + } + ut_free(i->in_files.files); ut_free(i->page); } @@ -1000,14 +1453,7 @@ log_online_bitmap_iterator_next( /*============================*/ log_bitmap_iterator_t *i) /*!<in/out: iterator */ { - ulint offset_low; - ulint offset_high; - ulint size_low; - ulint size_high; - ulint checksum = 0; - ulint actual_checksum = !checksum; - - ibool success; + ibool checksum_ok = FALSE; ut_a(i); @@ -1020,66 +1466,51 @@ log_online_bitmap_iterator_next( return TRUE; } - while (checksum != actual_checksum) + while (!checksum_ok) { - success = os_file_get_size(i->in, - &size_low, - &size_high); - if (!success) { - os_file_get_last_error(TRUE); - fprintf(stderr, - "InnoDB: Warning: can't get size of " - "page bitmap file \'%s\'\n", - i->in_name); - return FALSE; - } - - if (i->in_offset >= - (ib_uint64_t)(size_low) + - ((ib_uint64_t)(size_high) << 32)) - return FALSE; - - offset_high = (ulint)(i->in_offset >> 32); - offset_low = (ulint)(i->in_offset & 0xFFFFFFFF); + while (i->in.size < MODIFIED_PAGE_BLOCK_SIZE + || (i->in.offset + > i->in.size - MODIFIED_PAGE_BLOCK_SIZE)) { + + /* Advance file */ + i->in_i++; + os_file_close(i->in.file); + log_online_diagnose_bitmap_eof(&i->in, + i->last_page_in_run); + if (i->in_i == i->in_files.count + || i->in_files.files[i->in_i].seq_num == 0) { + + return FALSE; + } - success = os_file_read( - i->in, - i->page, - offset_low, - offset_high, - MODIFIED_PAGE_BLOCK_SIZE); + if (!log_online_open_bitmap_file_read_only( + i->in_files.files[i->in_i].name, + &i->in)) { + return FALSE; + } + } - if (!success) { + if (!log_online_read_bitmap_page(&i->in, i->page, + &checksum_ok)) { os_file_get_last_error(TRUE); fprintf(stderr, "InnoDB: Warning: failed reading " "changed page bitmap file \'%s\'\n", - i->in_name); + i->in_files.files[i->in_i].name); return FALSE; } - - checksum = mach_read_from_4( - i->page + MODIFIED_PAGE_BLOCK_CHECKSUM); - - actual_checksum = log_online_calc_checksum(i->page); - - i->in_offset += MODIFIED_PAGE_BLOCK_SIZE; } - i->start_lsn = - mach_read_from_8(i->page + MODIFIED_PAGE_START_LSN); - i->end_lsn = - mach_read_from_8(i->page + MODIFIED_PAGE_END_LSN); - i->space_id = - mach_read_from_4(i->page + MODIFIED_PAGE_SPACE_ID); - i->first_page_id = - mach_read_from_4(i->page + MODIFIED_PAGE_1ST_PAGE_ID); - i->bit_offset = - 0; - i->changed = - IS_BIT_SET(i->page + MODIFIED_PAGE_BLOCK_BITMAP, - i->bit_offset); + i->start_lsn = mach_read_from_8(i->page + MODIFIED_PAGE_START_LSN); + i->end_lsn = mach_read_from_8(i->page + MODIFIED_PAGE_END_LSN); + i->space_id = mach_read_from_4(i->page + MODIFIED_PAGE_SPACE_ID); + i->first_page_id = mach_read_from_4(i->page + + MODIFIED_PAGE_1ST_PAGE_ID); + i->last_page_in_run = mach_read_from_4(i->page + + MODIFIED_PAGE_IS_LAST_BLOCK); + i->bit_offset = 0; + i->changed = IS_BIT_SET(i->page + MODIFIED_PAGE_BLOCK_BITMAP, + i->bit_offset); return TRUE; } - diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c index 2555c010027..8e0516a84a9 100644 --- a/storage/xtradb/os/os0file.c +++ b/storage/xtradb/os/os0file.c @@ -2158,8 +2158,10 @@ os_file_set_eof_at( ib_uint64_t new_len)/*!< in: new file length */ { #ifdef __WIN__ - /* TODO: untested! */ - return(!_chsize_s(file, new_len)); + LARGE_INTEGER li, li2; + li.QuadPart = new_len; + return(SetFilePointerEx(file, li, &li2,FILE_BEGIN) + && SetEndOfFile(file)); #else /* TODO: works only with -D_FILE_OFFSET_BITS=64 ? */ return(!ftruncate(file, new_len)); diff --git a/storage/xtradb/row/row0ins.c b/storage/xtradb/row/row0ins.c index 3ae4c227ddc..61c3720fa2e 100644 --- a/storage/xtradb/row/row0ins.c +++ b/storage/xtradb/row/row0ins.c @@ -2012,7 +2012,10 @@ row_ins_index_entry_low( the function will return in both low_match and up_match of the cursor sensible values */ - if (dict_index_is_clust(index)) { + if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) { + search_mode = (mode & BTR_MODIFY_TREE) + ? BTR_SEARCH_TREE : BTR_SEARCH_LEAF; + } else if (dict_index_is_clust(index)) { search_mode = mode; } else if (!(thr_get_trx(thr)->check_unique_secondary)) { search_mode = mode | BTR_INSERT | BTR_IGNORE_SEC_UNIQUE; @@ -2021,7 +2024,7 @@ row_ins_index_entry_low( } btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, - thr_get_trx(thr)->fake_changes ? BTR_SEARCH_LEAF : search_mode, + search_mode, &cursor, 0, __FILE__, __LINE__, &mtr); if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) { diff --git a/storage/xtradb/row/row0mysql.c b/storage/xtradb/row/row0mysql.c index 575160501c3..9ab85940760 100644 --- a/storage/xtradb/row/row0mysql.c +++ b/storage/xtradb/row/row0mysql.c @@ -1277,17 +1277,19 @@ run_again: que_thr_stop_for_mysql_no_error(thr, trx); - prebuilt->table->stat_n_rows++; + if (UNIV_LIKELY(!(trx->fake_changes))) { - srv_n_rows_inserted++; + prebuilt->table->stat_n_rows++; - if (prebuilt->table->stat_n_rows == 0) { - /* Avoid wrap-over */ - prebuilt->table->stat_n_rows--; + if (prebuilt->table->stat_n_rows == 0) { + /* Avoid wrap-over */ + prebuilt->table->stat_n_rows--; + } + + srv_n_rows_inserted++; + row_update_statistics_if_needed(prebuilt->table); } - if (!(trx->fake_changes)) - row_update_statistics_if_needed(prebuilt->table); trx->op_info = ""; return((int) err); @@ -1534,6 +1536,11 @@ run_again: que_thr_stop_for_mysql_no_error(thr, trx); + if (UNIV_UNLIKELY(trx->fake_changes)) { + trx->op_info = ""; + return((int) err); + } + if (node->is_delete) { if (prebuilt->table->stat_n_rows > 0) { prebuilt->table->stat_n_rows--; @@ -1548,7 +1555,6 @@ run_again: that changes indexed columns, UPDATEs that change only non-indexed columns would not affect statistics. */ if (node->is_delete || !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { - if (!(trx->fake_changes)) row_update_statistics_if_needed(prebuilt->table); } @@ -1756,6 +1762,11 @@ run_again: return(err); } + if (UNIV_UNLIKELY((trx->fake_changes))) { + + return(err); + } + if (node->is_delete) { if (table->stat_n_rows > 0) { table->stat_n_rows--; @@ -1766,7 +1777,6 @@ run_again: srv_n_rows_updated++; } - if (!(trx->fake_changes)) row_update_statistics_if_needed(table); return(err); diff --git a/storage/xtradb/row/row0upd.c b/storage/xtradb/row/row0upd.c index 859b3d415ab..b2a0eb57669 100644 --- a/storage/xtradb/row/row0upd.c +++ b/storage/xtradb/row/row0upd.c @@ -2018,7 +2018,8 @@ row_upd_clust_rec( the same transaction do not modify the record in the meantime. Therefore we can assert that the restoration of the cursor succeeds. */ - ut_a(btr_pcur_restore_position(thr_get_trx(thr)->fake_changes ? BTR_SEARCH_LEAF : BTR_MODIFY_TREE, + ut_a(btr_pcur_restore_position(thr_get_trx(thr)->fake_changes + ? BTR_SEARCH_TREE : BTR_MODIFY_TREE, pcur, mtr)); ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur), diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c index 9d479ac6c87..6e210071746 100644 --- a/storage/xtradb/srv/srv0srv.c +++ b/storage/xtradb/srv/srv0srv.c @@ -179,8 +179,14 @@ UNIV_INTERN ibool srv_recovery_stats = FALSE; UNIV_INTERN my_bool srv_track_changed_pages = TRUE; +UNIV_INTERN ib_uint64_t srv_max_bitmap_file_size = 100 * 1024 * 1024; + UNIV_INTERN ulonglong srv_changed_pages_limit = 0; +/** When TRUE, fake change transcations take S rather than X row locks. + When FALSE, row locks are not taken at all. */ +UNIV_INTERN my_bool srv_fake_changes_locks = TRUE; + /* if TRUE, then we auto-extend the last data file */ UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE; /* if != 0, this tells the max size auto-extending may increase the diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c index 65a775b56da..2faa68cb87c 100644 --- a/storage/xtradb/srv/srv0start.c +++ b/storage/xtradb/srv/srv0start.c @@ -1148,6 +1148,24 @@ skip_size_check: return(DB_SUCCESS); } +/*********************************************************************//** +Initializes the log tracking subsystem and starts its thread. */ +static +void +init_log_online(void) +/*=================*/ +{ + if (srv_track_changed_pages) { + + log_online_read_init(); + + /* Create the thread that follows the redo log to output the + changed page bitmap */ + os_thread_create(&srv_redo_log_follow_thread, NULL, + thread_ids + 5 + SRV_MAX_N_IO_THREADS); + } +} + /******************************************************************** Starts InnoDB and creates a new database if database files are not found and the user wants. @@ -1794,6 +1812,8 @@ innobase_start_or_create_for_mysql(void) trx_sys_file_format_init(); if (create_new_db) { + init_log_online(); + mtr_start(&mtr); fsp_header_init(0, sum_of_new_sizes, &mtr); @@ -1893,6 +1913,8 @@ innobase_start_or_create_for_mysql(void) return(DB_ERROR); } + init_log_online(); + /* Since the insert buffer init is in dict_boot, and the insert buffer is needed in any disk i/o, first we call dict_boot(). Note that trx_sys_init_at_db_start() only needs @@ -2040,19 +2062,6 @@ innobase_start_or_create_for_mysql(void) if (srv_auto_lru_dump && srv_blocking_lru_restore) buf_LRU_file_restore(); - if (srv_track_changed_pages) { - - /* Initialize the log tracking subsystem here to block - server startup until it's completed due to the potential - need to re-read previous server run's log. */ - log_online_read_init(); - - /* Create the thread that follows the redo log to output the - changed page bitmap */ - os_thread_create(&srv_redo_log_follow_thread, NULL, - thread_ids + 6 + SRV_MAX_N_IO_THREADS); - } - srv_is_being_started = FALSE; err = dict_create_or_check_foreign_constraint_tables(); |