summaryrefslogtreecommitdiff
path: root/storage/xtradb
diff options
context:
space:
mode:
Diffstat (limited to 'storage/xtradb')
-rw-r--r--storage/xtradb/btr/btr0cur.c68
-rw-r--r--storage/xtradb/btr/btr0pcur.c1
-rw-r--r--storage/xtradb/buf/buf0lru.c40
-rw-r--r--storage/xtradb/buf/buf0rea.c4
-rw-r--r--storage/xtradb/fsp/fsp0fsp.c6
-rw-r--r--storage/xtradb/handler/ha_innodb.cc83
-rw-r--r--storage/xtradb/handler/ha_innodb.h1
-rw-r--r--storage/xtradb/handler/i_s.cc282
-rw-r--r--storage/xtradb/ibuf/ibuf0ibuf.c2
-rw-r--r--storage/xtradb/include/btr0btr.h5
-rw-r--r--storage/xtradb/include/btr0cur.h5
-rw-r--r--storage/xtradb/include/log0online.h104
-rw-r--r--storage/xtradb/include/srv0srv.h7
-rw-r--r--storage/xtradb/include/univ.i2
-rw-r--r--storage/xtradb/include/ut0ut.h9
-rw-r--r--storage/xtradb/include/ut0ut.ic13
-rw-r--r--storage/xtradb/lock/lock0lock.c18
-rw-r--r--storage/xtradb/log/log0log.c4
-rw-r--r--storage/xtradb/log/log0online.c829
-rw-r--r--storage/xtradb/os/os0file.c6
-rw-r--r--storage/xtradb/row/row0ins.c7
-rw-r--r--storage/xtradb/row/row0mysql.c28
-rw-r--r--storage/xtradb/row/row0upd.c3
-rw-r--r--storage/xtradb/srv/srv0srv.c6
-rw-r--r--storage/xtradb/srv/srv0start.c35
25 files changed, 1144 insertions, 424 deletions
diff --git a/storage/xtradb/btr/btr0cur.c b/storage/xtradb/btr/btr0cur.c
index 61c07ac792e..687853a422e 100644
--- a/storage/xtradb/btr/btr0cur.c
+++ b/storage/xtradb/btr/btr0cur.c
@@ -239,6 +239,7 @@ btr_cur_latch_leaves(
mtr_t* mtr) /*!< in: mtr */
{
ulint mode;
+ ulint sibling_mode;
ulint left_page_no;
ulint right_page_no;
buf_block_t* get_block;
@@ -261,14 +262,21 @@ btr_cur_latch_leaves(
#endif /* UNIV_BTR_DEBUG */
get_block->check_index_page_at_flush = TRUE;
return;
+ case BTR_SEARCH_TREE:
case BTR_MODIFY_TREE:
- /* x-latch also brothers from left to right */
+ if (UNIV_UNLIKELY(latch_mode == BTR_SEARCH_TREE)) {
+ mode = RW_S_LATCH;
+ sibling_mode = RW_NO_LATCH;
+ } else {
+ mode = sibling_mode = RW_X_LATCH;
+ }
+ /* Fetch and possibly latch also brothers from left to right */
left_page_no = btr_page_get_prev(page, mtr);
if (left_page_no != FIL_NULL) {
get_block = btr_block_get(
space, zip_size, left_page_no,
- RW_X_LATCH, cursor->index, mtr);
+ sibling_mode, cursor->index, mtr);
if (srv_pass_corrupt_table && !get_block) {
return;
@@ -280,12 +288,21 @@ btr_cur_latch_leaves(
ut_a(btr_page_get_next(get_block->frame, mtr)
== page_get_page_no(page));
#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
+ if (sibling_mode == RW_NO_LATCH) {
+ /* btr_block_get() called with RW_NO_LATCH will
+ fix the read block in the buffer. This serves
+ no purpose for the fake changes prefetching,
+ thus we unfix the sibling blocks immediately.*/
+ mtr_memo_release(mtr, get_block,
+ MTR_MEMO_BUF_FIX);
+ } else {
+ get_block->check_index_page_at_flush = TRUE;
+ }
}
get_block = btr_block_get(
space, zip_size, page_no,
- RW_X_LATCH, cursor->index, mtr);
+ mode, cursor->index, mtr);
if (srv_pass_corrupt_table && !get_block) {
return;
@@ -301,7 +318,7 @@ btr_cur_latch_leaves(
if (right_page_no != FIL_NULL) {
get_block = btr_block_get(
space, zip_size, right_page_no,
- RW_X_LATCH, cursor->index, mtr);
+ sibling_mode, cursor->index, mtr);
if (srv_pass_corrupt_table && !get_block) {
return;
@@ -313,7 +330,12 @@ btr_cur_latch_leaves(
ut_a(btr_page_get_prev(get_block->frame, mtr)
== page_get_page_no(page));
#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
+ if (sibling_mode == RW_NO_LATCH) {
+ mtr_memo_release(mtr, get_block,
+ MTR_MEMO_BUF_FIX);
+ } else {
+ get_block->check_index_page_at_flush = TRUE;
+ }
}
return;
@@ -1566,6 +1588,9 @@ btr_cur_pessimistic_insert(
}
if (!(flags & BTR_NO_UNDO_LOG_FLAG)) {
+
+ ut_a(cursor->tree_height != ULINT_UNDEFINED);
+
/* First reserve enough free space for the file segments
of the index tree, so that the insert will not fail because
of lack of space */
@@ -1860,7 +1885,8 @@ btr_cur_update_alloc_zip(
ulint length, /*!< in: size needed */
ibool create, /*!< in: TRUE=delete-and-insert,
FALSE=update-in-place */
- mtr_t* mtr) /*!< in: mini-transaction */
+ mtr_t* mtr, /*!< in: mini-transaction */
+ trx_t* trx) /*!< in: NULL or transaction */
{
ut_a(page_zip == buf_block_get_page_zip(block));
ut_ad(page_zip);
@@ -1877,6 +1903,14 @@ btr_cur_update_alloc_zip(
return(FALSE);
}
+ if (trx && trx->fake_changes) {
+ /* Don't call page_zip_compress_write_log_no_data as that has
+ assert which would fail. Assume there won't be a compression
+ failure. */
+
+ return TRUE;
+ }
+
if (!page_zip_compress(page_zip, buf_block_get_frame(block),
index, mtr)) {
/* Unable to compress the page */
@@ -1960,7 +1994,8 @@ btr_cur_update_in_place(
/* Check that enough space is available on the compressed page. */
if (page_zip
&& !btr_cur_update_alloc_zip(page_zip, block, index,
- rec_offs_size(offsets), FALSE, mtr)) {
+ rec_offs_size(offsets), FALSE, mtr,
+ trx)) {
return(DB_ZIP_OVERFLOW);
}
@@ -2159,7 +2194,8 @@ any_extern:
if (page_zip
&& !btr_cur_update_alloc_zip(page_zip, block, index,
- new_rec_size, TRUE, mtr)) {
+ new_rec_size, TRUE, mtr,
+ thr_get_trx(thr))) {
err = DB_ZIP_OVERFLOW;
goto err_exit;
}
@@ -2402,7 +2438,15 @@ btr_cur_pessimistic_update(
of the index tree, so that the update will not fail because
of lack of space */
- n_extents = cursor->tree_height / 16 + 3;
+ if (UNIV_UNLIKELY(cursor->tree_height == ULINT_UNDEFINED)) {
+ /* When the tree height is uninitialized due to fake
+ changes, reserve some hardcoded number of extents. */
+ ut_a(thr && thr_get_trx(thr)->fake_changes);
+ n_extents = 3;
+ }
+ else {
+ n_extents = cursor->tree_height / 16 + 3;
+ }
if (flags & BTR_NO_UNDO_LOG_FLAG) {
reserve_flag = FSP_CLEANING;
@@ -2439,7 +2483,7 @@ btr_cur_pessimistic_update(
itself. Thus the following call is safe. */
row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
FALSE, *heap);
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
+ if (!(flags & BTR_KEEP_SYS_FLAG) && !trx->fake_changes) {
row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
roll_ptr);
row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
@@ -3210,6 +3254,8 @@ btr_cur_pessimistic_delete(
of the index tree, so that the node pointer updates will
not fail because of lack of space */
+ ut_a(cursor->tree_height != ULINT_UNDEFINED);
+
n_extents = cursor->tree_height / 32 + 1;
success = fsp_reserve_free_extents(&n_reserved,
diff --git a/storage/xtradb/btr/btr0pcur.c b/storage/xtradb/btr/btr0pcur.c
index b335e2c8aee..a1b7affdeb7 100644
--- a/storage/xtradb/btr/btr0pcur.c
+++ b/storage/xtradb/btr/btr0pcur.c
@@ -47,6 +47,7 @@ btr_pcur_create_for_mysql(void)
pcur->btr_cur.index = NULL;
btr_pcur_init(pcur);
+ pcur->btr_cur.tree_height = ULINT_UNDEFINED;
return(pcur);
}
diff --git a/storage/xtradb/buf/buf0lru.c b/storage/xtradb/buf/buf0lru.c
index a6a1f8dcf9c..14b5c65132c 100644
--- a/storage/xtradb/buf/buf0lru.c
+++ b/storage/xtradb/buf/buf0lru.c
@@ -239,9 +239,11 @@ buf_LRU_drop_page_hash_batch(
When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page
hash index entries belonging to that table. This function tries to
do that in batch. Note that this is a 'best effort' attempt and does
-not guarantee that ALL hash entries will be removed. */
+not guarantee that ALL hash entries will be removed.
+
+@return number of hashed pages found*/
static
-void
+ulint
buf_LRU_drop_page_hash_for_tablespace(
/*==================================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
@@ -251,13 +253,14 @@ buf_LRU_drop_page_hash_for_tablespace(
ulint* page_arr;
ulint num_entries;
ulint zip_size;
+ ulint num_found = 0;
zip_size = fil_space_get_zip_size(id);
if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
/* Somehow, the tablespace does not exist. Nothing to drop. */
ut_ad(0);
- return;
+ return num_found;
}
page_arr = ut_malloc(
@@ -315,6 +318,7 @@ next_page:
ut_a(num_entries < BUF_LRU_DROP_SEARCH_SIZE);
++num_entries;
+ ++num_found;
if (num_entries < BUF_LRU_DROP_SEARCH_SIZE) {
goto next_page;
@@ -370,6 +374,8 @@ next_page:
/* Drop any remaining batch of search hashed pages. */
buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
ut_free(page_arr);
+
+ return num_found;
}
/******************************************************************//**
@@ -814,8 +820,6 @@ buf_LRU_mark_space_was_deleted(
for (i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool;
buf_page_t* bpage;
- buf_chunk_t* chunk;
- ulint j, k;
buf_pool = buf_pool_from_array(i);
@@ -832,28 +836,10 @@ buf_LRU_mark_space_was_deleted(
mutex_exit(&buf_pool->LRU_list_mutex);
- btr_search_s_lock_all();
- chunk = buf_pool->chunks;
- for (j = buf_pool->n_chunks; j--; chunk++) {
- buf_block_t* block = chunk->blocks;
- for (k = chunk->size; k--; block++) {
- if (buf_block_get_state(block)
- != BUF_BLOCK_FILE_PAGE
- || !block->index
- || buf_page_get_space(&block->page) != id) {
- continue;
- }
-
- btr_search_s_unlock_all();
-
- rw_lock_x_lock(&block->lock);
- btr_search_drop_page_hash_index(block);
- rw_lock_x_unlock(&block->lock);
-
- btr_search_s_lock_all();
- }
- }
- btr_search_s_unlock_all();
+ /* The AHI entries for the tablespace being deleted should be
+ removed by now. */
+ ut_ad(buf_LRU_drop_page_hash_for_tablespace(buf_pool, id)
+ == 0);
}
}
diff --git a/storage/xtradb/buf/buf0rea.c b/storage/xtradb/buf/buf0rea.c
index 67379d614a0..6d76a488af7 100644
--- a/storage/xtradb/buf/buf0rea.c
+++ b/storage/xtradb/buf/buf0rea.c
@@ -235,6 +235,9 @@ not_to_recover:
sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
((buf_block_t*) bpage)->frame, bpage, trx);
}
+ if(sync) {
+ thd_wait_end(NULL);
+ }
if (*err == DB_TABLESPACE_DELETED) {
buf_read_page_handle_error(bpage);
@@ -250,7 +253,6 @@ not_to_recover:
}
if (sync) {
- thd_wait_end(NULL);
/* The i/o is already completed when we arrive from
fil_read */
if (!buf_page_io_complete(bpage)) {
diff --git a/storage/xtradb/fsp/fsp0fsp.c b/storage/xtradb/fsp/fsp0fsp.c
index d4a2745b90b..5cbc74b0862 100644
--- a/storage/xtradb/fsp/fsp0fsp.c
+++ b/storage/xtradb/fsp/fsp0fsp.c
@@ -3031,7 +3031,11 @@ try_again:
some of them will contain extent descriptor pages, and therefore
will not be free extents */
- n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
+ if (size <= free_limit) {
+ n_free_up = 0;
+ } else {
+ n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
+ }
if (n_free_up > 0) {
n_free_up--;
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index ec976bfeb45..12f6f5134d2 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -381,6 +381,7 @@ static PSI_file_info all_innodb_files[] = {
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static int innobase_close_connection(handlerton *hton, THD* thd);
+static void innobase_kill_query(handlerton *hton, THD* thd, enum thd_kill_levels level);
static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
static int innobase_commit(handlerton *hton, THD* thd, bool all);
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
@@ -1053,6 +1054,13 @@ thd_to_trx(
return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr));
}
+my_bool
+ha_innobase::is_fake_change_enabled(THD* thd)
+{
+ trx_t* trx = thd_to_trx(thd);
+ return(trx && trx->fake_changes);
+}
+
/********************************************************************//**
Call this function when mysqld passes control to the client. That is to
avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
@@ -1117,8 +1125,7 @@ convert_error_code_to_mysql(
return(0);
case DB_INTERRUPTED:
- my_error(ER_QUERY_INTERRUPTED, MYF(0));
- /* fall through */
+ return(HA_ERR_ABORTED_BY_USER);
case DB_FOREIGN_EXCEED_MAX_CASCADE:
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
@@ -2382,7 +2389,7 @@ trx_is_interrupted(
/*===============*/
trx_t* trx) /*!< in: transaction */
{
- return(trx && trx->mysql_thd && thd_killed((THD*) trx->mysql_thd));
+ return(trx && trx->mysql_thd && thd_kill_level((THD*) trx->mysql_thd));
}
/**********************************************************************//**
@@ -2639,6 +2646,7 @@ innobase_init(
innobase_hton->flags=HTON_NO_FLAGS;
innobase_hton->release_temporary_latches=innobase_release_temporary_latches;
innobase_hton->alter_table_flags = innobase_alter_table_flags;
+ innobase_hton->kill_query = innobase_kill_query;
ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
@@ -3052,6 +3060,14 @@ innobase_change_buffering_inited_ok:
srv_use_checksums = (ibool) innobase_use_checksums;
srv_fast_checksum = (ibool) innobase_fast_checksum;
+ if (innobase_fast_checksum) {
+ fprintf(stderr,
+ "InnoDB: Warning: innodb_fast_checksum is DEPRECATED "
+ "and *WILL* be removed in Percona Server 5.6. Please "
+ "consult the Percona Server 5.6 documentation for "
+ "help in upgrading.\n");
+ }
+
srv_blocking_lru_restore = (ibool) innobase_blocking_lru_restore;
#ifdef HAVE_LARGE_PAGES
@@ -3938,6 +3954,33 @@ innobase_close_connection(
DBUG_RETURN(0);
}
+/*****************************************************************//**
+Cancel any pending lock request associated with the current THD. */
+static
+void
+innobase_kill_query(
+/*======================*/
+ handlerton* hton, /*!< in: innobase handlerton */
+ THD* thd, /*!< in: MySQL thread being killed */
+ enum thd_kill_levels level) /*!< in: kill level */
+{
+ trx_t* trx;
+ DBUG_ENTER("innobase_kill_query");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ mutex_enter(&kernel_mutex);
+
+ trx = thd_to_trx(thd);
+
+ /* Cancel a pending lock request. */
+ if (trx && trx->wait_lock) {
+ lock_cancel_waiting_and_release(trx->wait_lock);
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ DBUG_VOID_RETURN;
+}
/*************************************************************************//**
** InnoDB database tables
@@ -6278,7 +6321,9 @@ no_commit:
error = row_insert_for_mysql((byte*) record, prebuilt);
#ifdef EXTENDED_FOR_USERSTAT
- if (error == DB_SUCCESS) rows_changed++;
+ if (UNIV_LIKELY(error == DB_SUCCESS && !trx->fake_changes)) {
+ rows_changed++;
+ }
#endif
/* Handle duplicate key errors */
@@ -6641,7 +6686,9 @@ ha_innobase::update_row(
}
#ifdef EXTENDED_FOR_USERSTAT
- if (error == DB_SUCCESS) rows_changed++;
+ if (UNIV_LIKELY(error == DB_SUCCESS && !trx->fake_changes)) {
+ rows_changed++;
+ }
#endif
innodb_srv_conc_exit_innodb(trx);
@@ -6704,7 +6751,9 @@ ha_innobase::delete_row(
error = row_update_for_mysql((byte*) record, prebuilt);
#ifdef EXTENDED_FOR_USERSTAT
- if (error == DB_SUCCESS) rows_changed++;
+ if (UNIV_LIKELY(error == DB_SUCCESS && !trx->fake_changes)) {
+ rows_changed++;
+ }
#endif
innodb_srv_conc_exit_innodb(trx);
@@ -9825,7 +9874,7 @@ ha_innobase::check(
row_mysql_unlock_data_dictionary(prebuilt->trx);
}
- if (thd_killed(user_thd)) {
+ if (thd_kill_level(user_thd)) {
break;
}
@@ -9882,7 +9931,7 @@ ha_innobase::check(
mutex_exit(&kernel_mutex);
prebuilt->trx->op_info = "";
- if (thd_killed(user_thd)) {
+ if (thd_kill_level(user_thd)) {
my_error(ER_QUERY_INTERRUPTED, MYF(0));
}
@@ -12674,6 +12723,8 @@ static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums,
static MYSQL_SYSVAR_BOOL(fast_checksum, innobase_fast_checksum,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "DEPRECATED. #### WARNING #### : This feature is DEPRECATED and WILL "
+ "be removed in Percona Server 5.6. "
"Change the algorithm of checksum for the whole of datapage to 4-bytes word based. "
"The original checksum is checked after the new one. It may be slow for reading page"
" which has orginal checksum. Overwrite the page or recreate the InnoDB database, "
@@ -13107,6 +13158,11 @@ static MYSQL_SYSVAR_BOOL(track_changed_pages, srv_track_changed_pages,
"Track the redo log for changed pages and output a changed page bitmap",
NULL, NULL, FALSE);
+static MYSQL_SYSVAR_ULONGLONG(max_bitmap_file_size, srv_max_bitmap_file_size,
+ PLUGIN_VAR_RQCMDARG,
+ "The maximum size of changed page bitmap files",
+ NULL, NULL, 100*1024*1024ULL, 4096ULL, ULONGLONG_MAX, 0);
+
static MYSQL_SYSVAR_ULONGLONG(changed_pages_limit, srv_changed_pages_limit,
PLUGIN_VAR_RQCMDARG,
"The maximum number of rows for "
@@ -13309,6 +13365,13 @@ static MYSQL_SYSVAR_ULINT(lazy_drop_table, srv_lazy_drop_table,
"e.g. for http://bugs.mysql.com/51325",
NULL, NULL, 0, 0, 1, 0);
+static MYSQL_SYSVAR_BOOL(locking_fake_changes, srv_fake_changes_locks,
+ PLUGIN_VAR_NOCMDARG,
+ "###EXPERIMENTAL### if enabled, transactions will get S row locks instead "
+ "of X locks for fake changes. If disabled, fake change transactions will "
+ "not take any locks at all.",
+ NULL, NULL, TRUE);
+
static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(page_size),
MYSQL_SYSVAR(log_block_size),
@@ -13400,6 +13463,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(use_native_aio),
MYSQL_SYSVAR(change_buffering),
MYSQL_SYSVAR(track_changed_pages),
+ MYSQL_SYSVAR(max_bitmap_file_size),
MYSQL_SYSVAR(changed_pages_limit),
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
MYSQL_SYSVAR(change_buffering_debug),
@@ -13418,6 +13482,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(corrupt_table_action),
MYSQL_SYSVAR(lazy_drop_table),
MYSQL_SYSVAR(fake_changes),
+ MYSQL_SYSVAR(locking_fake_changes),
MYSQL_SYSVAR(merge_sort_block_size),
NULL
};
@@ -13666,7 +13731,7 @@ int ha_innobase::multi_range_read_explain_info(uint mrr_mode, char *str, size_t
bool ha_innobase::is_thd_killed()
{
- return thd_killed(user_thd);
+ return thd_kill_level(user_thd);
}
/**
diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h
index 4d9c0a1ab35..359d0b95367 100644
--- a/storage/xtradb/handler/ha_innodb.h
+++ b/storage/xtradb/handler/ha_innodb.h
@@ -137,6 +137,7 @@ class ha_innobase: public handler
int close(void);
double scan_time();
double read_time(uint index, uint ranges, ha_rows rows);
+ my_bool is_fake_change_enabled(THD *thd);
bool is_corrupt() const;
int write_row(uchar * buf);
diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc
index 29a80594344..4b33d6a780c 100644
--- a/storage/xtradb/handler/i_s.cc
+++ b/storage/xtradb/handler/i_s.cc
@@ -7147,29 +7147,38 @@ static ST_FIELD_INFO i_s_innodb_changed_pages_info[] =
};
/***********************************************************************
- This function parses condition and gets upper bounds for start and end LSN's
- if condition corresponds to certain pattern.
+ This function implements ICP for I_S.INNODB_CHANGED_PAGES by parsing a
+ condition and getting lower and upper bounds for start and end LSNs if the
+ condition corresponds to a certain pattern.
- We can't know right position to avoid scanning bitmap files from the beginning
- to the lower bound. But we can stop scanning bitmap files if we reach upper bound.
+ In the most general form, we understand queries like
- It's expected the most used queries will be like the following:
-
- SELECT * FROM INNODB_CHANGED_PAGES WHERE START_LSN > num1 AND start_lsn < num2;
+ SELECT * FROM INNODB_CHANGED_PAGES
+ WHERE START_LSN > num1 AND START_LSN < num2
+ AND END_LSN > num3 AND END_LSN < num4;
- That's why the pattern is:
+ That's why the pattern syntax is:
pattern: comp | and_comp;
comp: lsn < int_num | lsn <= int_num | int_num > lsn | int_num >= lsn;
lsn: start_lsn | end_lsn;
- and_comp: some_expression AND some_expression | some_expression AND and_comp;
- some_expression: comp | any_other_expression;
+ and_comp: expression AND expression | expression AND and_comp;
+ expression: comp | any_other_expression;
+
+ The two bounds are handled differently: the lower bound is used to find the
+ correct starting _file_, the upper bound the last _block_ that needs reading.
+
+ Lower bound conditions are handled in the following way: start_lsn >= X
+ specifies that the reading must start from the file that has the highest
+ starting LSN less than or equal to X. start_lsn > X is equivalent to
+ start_lsn >= X + 1. For end_lsn, end_lsn >= X is treated as
+ start_lsn >= X - 1 and end_lsn > X as start_lsn >= X.
- Suppose the condition is start_lsn < 100, this means we have to read all
- blocks with start_lsn < 100. Which is equivalent to reading all the blocks
- with end_lsn <= 99, or just end_lsn < 100. That's why it's enough to find
- maximum lsn value, doesn't matter if this is start or end lsn and compare
- it with "start_lsn" field.
+ For the upper bound, suppose the condition is start_lsn < 100, this means we
+ have to read all blocks with start_lsn < 100. Which is equivalent to reading
+ all the blocks with end_lsn <= 99, or just end_lsn < 100. That's why it's
+ enough to find maximum lsn value, doesn't matter if this is start or end lsn
+ and compare it with "start_lsn" field. LSN <= 100 is treated as LSN < 101.
Example:
@@ -7180,92 +7189,130 @@ static ST_FIELD_INFO i_s_innodb_changed_pages_info[] =
555 > end_lsn AND
page_id = 100;
- max_lsn will be set to 555.
+ end_lsn will be set to 555, start_lsn will be set 11.
+
+ Support for other functions (equal, NULL-safe equal, BETWEEN, IN, etc.) will
+ be added on demand.
+
*/
static
void
limit_lsn_range_from_condition(
/*===========================*/
- TABLE* table, /*!<in: table */
- COND* cond, /*!<in: condition */
- ib_uint64_t* max_lsn) /*!<in/out: maximum LSN
- (must be initialized with maximum
- available value) */
+ TABLE* table, /*!<in: table */
+ COND* cond, /*!<in: condition */
+ ib_uint64_t* start_lsn, /*!<in/out: minumum LSN */
+ ib_uint64_t* end_lsn) /*!<in/out: maximum LSN */
{
+ enum Item_func::Functype func_type;
+
if (cond->type() != Item::COND_ITEM &&
cond->type() != Item::FUNC_ITEM)
return;
- switch (((Item_func*) cond)->functype())
+ func_type = ((Item_func*) cond)->functype();
+
+ switch (func_type)
{
- case Item_func::COND_AND_FUNC:
- {
- List_iterator<Item> li(*((Item_cond*) cond)->
- argument_list());
- Item *item;
- while ((item= li++))
- limit_lsn_range_from_condition(table,
- item,
- max_lsn);
- break;
+ case Item_func::COND_AND_FUNC:
+ {
+ List_iterator<Item> li(*((Item_cond*) cond)
+ ->argument_list());
+ Item *item;
+
+ while ((item= li++)) {
+ limit_lsn_range_from_condition(table, item, start_lsn,
+ end_lsn);
+ }
+ break;
+ }
+ case Item_func::LT_FUNC:
+ case Item_func::LE_FUNC:
+ case Item_func::GT_FUNC:
+ case Item_func::GE_FUNC:
+ {
+ Item *left;
+ Item *right;
+ Item_field *item_field;
+ ib_uint64_t tmp_result;
+ ibool is_end_lsn;
+
+ /* a <= b equals to b >= a that's why we just exchange "left"
+ and "right" in the case of ">" or ">=" function. We don't
+ touch the operation itself. */
+ if (((Item_func*) cond)->functype() == Item_func::LT_FUNC
+ || ((Item_func*) cond)->functype() == Item_func::LE_FUNC) {
+ left = ((Item_func*) cond)->arguments()[0];
+ right = ((Item_func*) cond)->arguments()[1];
+ } else {
+ left = ((Item_func*) cond)->arguments()[1];
+ right = ((Item_func*) cond)->arguments()[0];
}
- case Item_func::LT_FUNC:
- case Item_func::LE_FUNC:
- case Item_func::GT_FUNC:
- case Item_func::GE_FUNC:
- {
- Item *left;
- Item *right;
- Item_field *item_field;
- ib_uint64_t tmp_result;
-
- /*
- a <= b equals to b >= a that's why we just exchange
- "left" and "right" in the case of ">" or ">="
- function
- */
- if (((Item_func*) cond)->functype() ==
- Item_func::LT_FUNC ||
- ((Item_func*) cond)->functype() ==
- Item_func::LE_FUNC)
- {
- left = ((Item_func*) cond)->arguments()[0];
- right = ((Item_func*) cond)->arguments()[1];
- } else {
- left = ((Item_func*) cond)->arguments()[1];
- right = ((Item_func*) cond)->arguments()[0];
- }
- if (!left || !right)
- return;
- if (left->type() != Item::FIELD_ITEM)
- return;
- if (right->type() != Item::INT_ITEM)
- return;
+ if (left->type() == Item::FIELD_ITEM) {
+ item_field = (Item_field *)left;
+ } else if (right->type() == Item::FIELD_ITEM) {
+ item_field = (Item_field *)right;
+ } else {
+ return;
+ }
- item_field = (Item_field*)left;
+ /* Check if the current field belongs to our table */
+ if (table != item_field->field->table) {
+ return;
+ }
- if (/* START_LSN */
- table->field[2] != item_field->field &&
- /* END_LSN */
- table->field[3] != item_field->field)
- {
- return;
- }
+ /* Check if the field is START_LSN or END_LSN */
+ /* END_LSN */
+ is_end_lsn = table->field[3]->eq(item_field->field);
+
+ if (/* START_LSN */ !table->field[2]->eq(item_field->field)
+ && !is_end_lsn) {
+ return;
+ }
+
+ if (left->type() == Item::FIELD_ITEM
+ && right->type() == Item::INT_ITEM) {
- /* Check if the current field belongs to our table */
- if (table != item_field->field->table)
- return;
+ /* The case of start_lsn|end_lsn <|<= const, i.e. the
+ upper bound. */
tmp_result = right->val_int();
- if (tmp_result < *max_lsn)
- *max_lsn = tmp_result;
+ if (((func_type == Item_func::LE_FUNC)
+ || (func_type == Item_func::GE_FUNC))
+ && (tmp_result != IB_ULONGLONG_MAX)) {
- break;
+ tmp_result++;
+ }
+ if (tmp_result < *end_lsn) {
+ *end_lsn = tmp_result;
+ }
+
+ } else if (left->type() == Item::INT_ITEM
+ && right->type() == Item::FIELD_ITEM) {
+
+ /* The case of const <|<= start_lsn|end_lsn, i.e. the
+ lower bound */
+
+ tmp_result = left->val_int();
+ if (is_end_lsn && tmp_result != 0) {
+ tmp_result--;
+ }
+ if (((func_type == Item_func::LT_FUNC)
+ || (func_type == Item_func::GT_FUNC))
+ && (tmp_result != IB_ULONGLONG_MAX)) {
+
+ tmp_result++;
+ }
+ if (tmp_result > *start_lsn) {
+ *start_lsn = tmp_result;
+ }
}
- default:;
- }
+ break;
+ }
+ default:;
+ }
}
/***********************************************************************
@@ -7282,40 +7329,55 @@ i_s_innodb_changed_pages_fill(
TABLE* table = (TABLE *) tables->table;
log_bitmap_iterator_t i;
ib_uint64_t output_rows_num = 0UL;
- ib_uint64_t max_lsn = ~0ULL;
+ ib_uint64_t max_lsn = IB_ULONGLONG_MAX;
+ ib_uint64_t min_lsn = 0ULL;
+
+ DBUG_ENTER("i_s_innodb_changed_pages_fill");
- if (!srv_track_changed_pages)
- return 0;
+ /* deny access to non-superusers */
+ if (check_global_access(thd, PROCESS_ACL)) {
+
+ DBUG_RETURN(0);
+ }
- if (!log_online_bitmap_iterator_init(&i))
- return 1;
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
- if (cond)
- limit_lsn_range_from_condition(table, cond, &max_lsn);
+ if (!srv_track_changed_pages) {
+ DBUG_RETURN(0);
+ }
+
+ if (cond) {
+ limit_lsn_range_from_condition(table, cond, &min_lsn,
+ &max_lsn);
+ }
+
+ if (!log_online_bitmap_iterator_init(&i, min_lsn, max_lsn)) {
+ DBUG_RETURN(1);
+ }
while(log_online_bitmap_iterator_next(&i) &&
(!srv_changed_pages_limit ||
output_rows_num < srv_changed_pages_limit) &&
/*
- There is no need to compare both start LSN and end LSN fields
- with maximum value. It's enough to compare only start LSN.
- Example:
-
- max_lsn = 100
- \\\\\\\\\\\\\\\\\\\\\\\\\|\\\\\\\\ - Query 1
- I------I I-------I I-------------I I----I
- ////////////////// | - Query 2
- 1 2 3 4
-
- Query 1:
- SELECT * FROM INNODB_CHANGED_PAGES WHERE start_lsn < 100
- will select 1,2,3 bitmaps
- Query 2:
- SELECT * FROM INNODB_CHANGED_PAGES WHERE end_lsn < 100
- will select 1,2 bitmaps
-
- The condition start_lsn <= 100 will be false after reading
- 1,2,3 bitmaps which suits for both cases.
+ There is no need to compare both start LSN and end LSN fields
+ with maximum value. It's enough to compare only start LSN.
+ Example:
+
+ max_lsn = 100
+ \\\\\\\\\\\\\\\\\\\\\\\\\|\\\\\\\\ - Query 1
+ I------I I-------I I-------------I I----I
+ ////////////////// | - Query 2
+ 1 2 3 4
+
+ Query 1:
+ SELECT * FROM INNODB_CHANGED_PAGES WHERE start_lsn < 100
+ will select 1,2,3 bitmaps
+ Query 2:
+ SELECT * FROM INNODB_CHANGED_PAGES WHERE end_lsn < 100
+ will select 1,2 bitmaps
+
+ The condition start_lsn <= 100 will be false after reading
+ 1,2,3 bitmaps which suits for both cases.
*/
LOG_BITMAP_ITERATOR_START_LSN(i) <= max_lsn)
{
@@ -7330,10 +7392,10 @@ i_s_innodb_changed_pages_fill(
LOG_BITMAP_ITERATOR_PAGE_NUM(i));
/* START_LSN */
table->field[2]->store(
- LOG_BITMAP_ITERATOR_START_LSN(i));
+ LOG_BITMAP_ITERATOR_START_LSN(i), true);
/* END_LSN */
table->field[3]->store(
- LOG_BITMAP_ITERATOR_END_LSN(i));
+ LOG_BITMAP_ITERATOR_END_LSN(i), true);
/*
I_S tables are in-memory tables. If bitmap file is big enough
@@ -7353,14 +7415,14 @@ i_s_innodb_changed_pages_fill(
if (schema_table_store_record(thd, table))
{
log_online_bitmap_iterator_release(&i);
- return 1;
+ DBUG_RETURN(1);
}
++output_rows_num;
}
log_online_bitmap_iterator_release(&i);
- return 0;
+ DBUG_RETURN(0);
}
static
diff --git a/storage/xtradb/ibuf/ibuf0ibuf.c b/storage/xtradb/ibuf/ibuf0ibuf.c
index 78cb6e20176..77305e42fb1 100644
--- a/storage/xtradb/ibuf/ibuf0ibuf.c
+++ b/storage/xtradb/ibuf/ibuf0ibuf.c
@@ -4044,7 +4044,7 @@ updated_in_place:
update)
&& (!page_zip || btr_cur_update_alloc_zip(
page_zip, block, index,
- rec_offs_size(offsets), FALSE, mtr))) {
+ rec_offs_size(offsets), FALSE, mtr, NULL))) {
/* This is the easy case. Do something similar
to btr_cur_update_in_place(). */
row_upd_rec_in_place(rec, index, offsets,
diff --git a/storage/xtradb/include/btr0btr.h b/storage/xtradb/include/btr0btr.h
index 03e89ae3f7d..fb06a774b82 100644
--- a/storage/xtradb/include/btr0btr.h
+++ b/storage/xtradb/include/btr0btr.h
@@ -65,7 +65,10 @@ enum btr_latch_mode {
/** Search the previous record. */
BTR_SEARCH_PREV = 35,
/** Modify the previous record. */
- BTR_MODIFY_PREV = 36
+ BTR_MODIFY_PREV = 36,
+ /** Weaker BTR_MODIFY_TREE that does not lock the leaf page siblings,
+ used for fake changes. */
+ BTR_SEARCH_TREE = 37 /* BTR_MODIFY_TREE | 4 */
};
/* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually exclusive. */
diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h
index cbc6103c2ee..cb44129aeb5 100644
--- a/storage/xtradb/include/btr0cur.h
+++ b/storage/xtradb/include/btr0cur.h
@@ -259,8 +259,9 @@ btr_cur_update_alloc_zip(
ulint length, /*!< in: size needed */
ibool create, /*!< in: TRUE=delete-and-insert,
FALSE=update-in-place */
- mtr_t* mtr) /*!< in: mini-transaction */
- __attribute__((nonnull, warn_unused_result));
+ mtr_t* mtr, /*!< in: mini-transaction */
+ trx_t* trx) /*!< in: NULL or transaction */
+ __attribute__((nonnull (1, 2, 3, 6), warn_unused_result));
/*************************************************************//**
Updates a record when the update causes no size changes in its fields.
@return DB_SUCCESS or error number */
diff --git a/storage/xtradb/include/log0online.h b/storage/xtradb/include/log0online.h
index 0e0ca169f6f..e7c3f301e45 100644
--- a/storage/xtradb/include/log0online.h
+++ b/storage/xtradb/include/log0online.h
@@ -27,6 +27,16 @@ Online database log parsing for changed page tracking
#include "univ.i"
#include "os0file.h"
+/** Single bitmap file information */
+typedef struct log_online_bitmap_file_struct log_online_bitmap_file_t;
+
+/** A set of bitmap files containing some LSN range */
+typedef struct log_online_bitmap_file_range_struct
+log_online_bitmap_file_range_t;
+
+/** An iterator over changed page info */
+typedef struct log_bitmap_iterator_struct log_bitmap_iterator_t;
+
/*********************************************************************//**
Initializes the online log following subsytem. */
UNIV_INTERN
@@ -49,45 +59,32 @@ void
log_online_follow_redo_log();
/*=========================*/
-/** The iterator through all bits of changed pages bitmap blocks */
-struct log_bitmap_iterator_struct
-{
- char in_name[FN_REFLEN]; /*!< the file name for bitmap
- input */
- os_file_t in; /*!< the bitmap input file */
- ib_uint64_t in_offset; /*!< the next write position in the
- bitmap output file */
- ib_uint32_t bit_offset; /*!< bit offset inside of bitmap
- block*/
- ib_uint64_t start_lsn; /*!< Start lsn of the block */
- ib_uint64_t end_lsn; /*!< End lsn of the block */
- ib_uint32_t space_id; /*!< Block space id */
- ib_uint32_t first_page_id; /*!< First block page id */
- ibool changed; /*!< true if current page was changed */
- byte* page; /*!< Bitmap block */
-};
-
-typedef struct log_bitmap_iterator_struct log_bitmap_iterator_t;
-
#define LOG_BITMAP_ITERATOR_START_LSN(i) \
- ((i).start_lsn)
+ ((i).start_lsn)
#define LOG_BITMAP_ITERATOR_END_LSN(i) \
- ((i).end_lsn)
+ ((i).end_lsn)
#define LOG_BITMAP_ITERATOR_SPACE_ID(i) \
- ((i).space_id)
+ ((i).space_id)
#define LOG_BITMAP_ITERATOR_PAGE_NUM(i) \
- ((i).first_page_id + (i).bit_offset)
+ ((i).first_page_id + (i).bit_offset)
#define LOG_BITMAP_ITERATOR_PAGE_CHANGED(i) \
- ((i).changed)
+ ((i).changed)
/*********************************************************************//**
-Initializes log bitmap iterator.
+Initializes log bitmap iterator. The minimum LSN is used for finding the
+correct starting file with records and it there may be records returned by
+the iterator that have LSN less than start_lsn.
+
@return TRUE if the iterator is initialized OK, FALSE otherwise. */
UNIV_INTERN
ibool
log_online_bitmap_iterator_init(
/*============================*/
- log_bitmap_iterator_t *i); /*!<in/out: iterator */
+ log_bitmap_iterator_t *i, /*!<in/out: iterator */
+ ib_uint64_t min_lsn, /*!<in: start LSN for the
+ iterator */
+ ib_uint64_t max_lsn); /*!<in: end LSN for the
+ iterator */
/*********************************************************************//**
Releases log bitmap iterator. */
@@ -108,4 +105,57 @@ log_online_bitmap_iterator_next(
/*============================*/
log_bitmap_iterator_t *i); /*!<in/out: iterator */
+/** Struct for single bitmap file information */
+struct log_online_bitmap_file_struct {
+ char name[FN_REFLEN]; /*!< Name with full path */
+ os_file_t file; /*!< Handle to opened file */
+ ib_uint64_t size; /*!< Size of the file */
+ ib_uint64_t offset; /*!< Offset of the next read,
+ or count of already-read bytes
+ */
+};
+
+/** Struct for a set of bitmap files containing some LSN range */
+struct log_online_bitmap_file_range_struct {
+ size_t count; /*!< Number of files */
+ /*!< Dynamically-allocated array of info about individual files */
+ struct {
+ char name[FN_REFLEN]; /*!< Name of a file */
+ ib_uint64_t start_lsn; /*!< Starting LSN of
+ data in this file */
+ ulong seq_num; /*!< Sequence number of
+ this file */
+ } *files;
+};
+
+/** Struct for an iterator through all bits of changed pages bitmap blocks */
+struct log_bitmap_iterator_struct
+{
+ log_online_bitmap_file_range_t in_files; /*!< The bitmap files
+ for this iterator */
+ size_t in_i; /*!< Currently read
+ file index in in_files
+ */
+ log_online_bitmap_file_t in; /*!< Currently read
+ file */
+ ib_uint32_t bit_offset; /*!< bit offset inside
+ the current bitmap
+ block */
+ ib_uint64_t start_lsn; /*!< Start LSN of the
+ current bitmap block */
+ ib_uint64_t end_lsn; /*!< End LSN of the
+ current bitmap block */
+ ib_uint32_t space_id; /*!< Current block
+ space id */
+ ib_uint32_t first_page_id; /*!< Id of the first
+ page in the current
+ block */
+ ibool last_page_in_run;/*!< "Last page in
+ run" flag value for the
+ current block */
+ ibool changed; /*!< true if current
+ page was changed */
+ byte* page; /*!< Bitmap block */
+};
+
#endif
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index a95eb8a1d58..6c5b61487f2 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -144,7 +144,8 @@ extern char* srv_doublewrite_file;
extern ibool srv_recovery_stats;
-extern my_bool srv_track_changed_pages;
+extern my_bool srv_track_changed_pages;
+extern ib_uint64_t srv_max_bitmap_file_size;
extern
ulonglong srv_changed_pages_limit;
@@ -395,6 +396,10 @@ extern uint srv_auto_lru_dump;
/** Whether startup should be blocked until buffer pool is fully restored */
extern ibool srv_blocking_lru_restore;
+/** When TRUE, fake change transcations take S rather than X row locks.
+When FALSE, row locks are not taken at all. */
+extern my_bool srv_fake_changes_locks;
+
/** Status variables to be passed to MySQL */
typedef struct export_var_struct export_struc;
diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i
index 5d2cd2d0313..77acf54d8dc 100644
--- a/storage/xtradb/include/univ.i
+++ b/storage/xtradb/include/univ.i
@@ -54,7 +54,7 @@ Created 1/20/1994 Heikki Tuuri
#define INNODB_VERSION_BUGFIX 8
#ifndef PERCONA_INNODB_VERSION
-#define PERCONA_INNODB_VERSION 29.1
+#define PERCONA_INNODB_VERSION 29.3
#endif
/* The following is the InnoDB version as shown in
diff --git a/storage/xtradb/include/ut0ut.h b/storage/xtradb/include/ut0ut.h
index 47ab6eb9b74..48f30b33e65 100644
--- a/storage/xtradb/include/ut0ut.h
+++ b/storage/xtradb/include/ut0ut.h
@@ -122,6 +122,15 @@ ut_max(
/*===*/
ulint n1, /*!< in: first number */
ulint n2); /*!< in: second number */
+/******************************************************//**
+Calculates the maximum of two ib_uint64_t values.
+@return the maximum */
+UNIV_INLINE
+ib_uint64_t
+ut_max_uint64(
+/*==========*/
+ ib_uint64_t n1, /*!< in: first number */
+ ib_uint64_t n2); /*!< in: second number */
/****************************************************************//**
Calculates minimum of two ulint-pairs. */
UNIV_INLINE
diff --git a/storage/xtradb/include/ut0ut.ic b/storage/xtradb/include/ut0ut.ic
index 6f55c7e410e..d56deb6266f 100644
--- a/storage/xtradb/include/ut0ut.ic
+++ b/storage/xtradb/include/ut0ut.ic
@@ -49,6 +49,19 @@ ut_max(
return((n1 <= n2) ? n2 : n1);
}
+/******************************************************//**
+Calculates the maximum of two ib_uint64_t values.
+@return the maximum */
+UNIV_INLINE
+ib_uint64_t
+ut_max_uint64(
+/*==========*/
+ ib_uint64_t n1, /*!< in: first number */
+ ib_uint64_t n2) /*!< in: second number */
+{
+ return((n1 <= n2) ? n2 : n1);
+}
+
/****************************************************************//**
Calculates minimum of two ulint-pairs. */
UNIV_INLINE
diff --git a/storage/xtradb/lock/lock0lock.c b/storage/xtradb/lock/lock0lock.c
index 414d3ae2c49..f172ad6695b 100644
--- a/storage/xtradb/lock/lock0lock.c
+++ b/storage/xtradb/lock/lock0lock.c
@@ -5481,8 +5481,13 @@ lock_sec_rec_read_check_and_lock(
return(DB_SUCCESS);
}
- if (thr && thr_get_trx(thr)->fake_changes && mode == LOCK_X) {
- mode = LOCK_S;
+ if (UNIV_UNLIKELY((thr && thr_get_trx(thr)->fake_changes))) {
+ if (!srv_fake_changes_locks) {
+ return(DB_SUCCESS);
+ }
+ if (mode == LOCK_X) {
+ mode = LOCK_S;
+ }
}
heap_no = page_rec_get_heap_no(rec);
@@ -5561,8 +5566,13 @@ lock_clust_rec_read_check_and_lock(
return(DB_SUCCESS);
}
- if (thr && thr_get_trx(thr)->fake_changes && mode == LOCK_X) {
- mode = LOCK_S;
+ if (UNIV_UNLIKELY((thr && thr_get_trx(thr)->fake_changes))) {
+ if (!srv_fake_changes_locks) {
+ return(DB_SUCCESS);
+ }
+ if (mode == LOCK_X) {
+ mode = LOCK_S;
+ }
}
heap_no = page_rec_get_heap_no(rec);
diff --git a/storage/xtradb/log/log0log.c b/storage/xtradb/log/log0log.c
index f2066b49662..e7c7a165b9c 100644
--- a/storage/xtradb/log/log0log.c
+++ b/storage/xtradb/log/log0log.c
@@ -248,7 +248,7 @@ log_check_tracking_margin(
checked for the already-written log. */
{
ib_uint64_t tracked_lsn;
- ulint tracked_lsn_age;
+ ib_uint64_t tracked_lsn_age;
if (!srv_track_changed_pages) {
return FALSE;
@@ -460,7 +460,7 @@ log_close(void)
ib_uint64_t oldest_lsn;
ib_uint64_t lsn;
ib_uint64_t tracked_lsn;
- ulint tracked_lsn_age;
+ ib_uint64_t tracked_lsn_age;
log_t* log = log_sys;
ib_uint64_t checkpoint_age;
diff --git a/storage/xtradb/log/log0online.c b/storage/xtradb/log/log0online.c
index 1d478c467e6..55eb9d17c46 100644
--- a/storage/xtradb/log/log0online.c
+++ b/storage/xtradb/log/log0online.c
@@ -48,10 +48,8 @@ struct log_bitmap_struct {
parsed, it points to the start,
otherwise points immediatelly past the
end of the incomplete log record. */
- char* out_name; /*!< the file name for bitmap output */
- os_file_t out; /*!< the bitmap output file */
- ib_uint64_t out_offset; /*!< the next write position in the
- bitmap output file */
+ log_online_bitmap_file_t out; /*!< The current bitmap file */
+ ulint out_seq_num; /*!< the bitmap file sequence number */
ib_uint64_t start_lsn; /*!< the LSN of the next unparsed
record and the start of the next LSN
interval to be parsed. */
@@ -76,8 +74,13 @@ struct log_bitmap_struct {
/* The log parsing and bitmap output struct instance */
static struct log_bitmap_struct* log_bmp_sys;
-/* File name stem for modified page bitmaps */
-static const char* modified_page_stem = "ib_modified_log.";
+/** File name stem for bitmap files. */
+static const char* bmp_file_name_stem = "ib_modified_log_";
+
+/** File name template for bitmap files. The 1st format tag is a directory
+name, the 2nd tag is the stem, the 3rd tag is a file sequence number, the 4th
+tag is the start LSN for the file. */
+static const char* bmp_file_name_template = "%s%s%lu_%llu.xdb";
/* On server startup with empty database srv_start_lsn == 0, in
which case the first LSN of actual log records will be this. */
@@ -85,7 +88,7 @@ which case the first LSN of actual log records will be this. */
/* Tests if num bit of bitmap is set */
#define IS_BIT_SET(bitmap, num) \
- (*((bitmap) + ((num) >> 3)) & (1UL << ((num) & 7UL)))
+ (*((bitmap) + ((num) >> 3)) & (1UL << ((num) & 7UL)))
/** The bitmap file block size in bytes. All writes will be multiples of this.
*/
@@ -243,10 +246,69 @@ log_online_calc_checksum(
}
/****************************************************************//**
+Read one bitmap data page and check it for corruption.
+
+@return TRUE if page read OK, FALSE if I/O error */
+static
+ibool
+log_online_read_bitmap_page(
+/*========================*/
+ log_online_bitmap_file_t *bitmap_file, /*!<in/out: bitmap
+ file */
+ byte *page, /*!<out: read page.
+ Must be at least
+ MODIFIED_PAGE_BLOCK_SIZE
+ bytes long */
+ ibool *checksum_ok) /*!<out: TRUE if page
+ checksum OK */
+{
+ ulint offset_low = (ulint)(bitmap_file->offset & 0xFFFFFFFF);
+ ulint offset_high = (ulint)(bitmap_file->offset >> 32);
+ ulint checksum;
+ ulint actual_checksum;
+ ibool success;
+
+ ut_a(bitmap_file->size >= MODIFIED_PAGE_BLOCK_SIZE);
+ ut_a(bitmap_file->offset
+ <= bitmap_file->size - MODIFIED_PAGE_BLOCK_SIZE);
+ ut_a(bitmap_file->offset % MODIFIED_PAGE_BLOCK_SIZE == 0);
+
+ success = os_file_read(bitmap_file->file, page, offset_low,
+ offset_high, MODIFIED_PAGE_BLOCK_SIZE);
+
+ if (UNIV_UNLIKELY(!success)) {
+
+ /* The following call prints an error message */
+ os_file_get_last_error(TRUE);
+ fprintf(stderr,
+ "InnoDB: Warning: failed reading changed page bitmap "
+ "file \'%s\'\n", bitmap_file->name);
+ return FALSE;
+ }
+
+ bitmap_file->offset += MODIFIED_PAGE_BLOCK_SIZE;
+ ut_ad(bitmap_file->offset <= bitmap_file->size);
+
+ checksum = mach_read_from_4(page + MODIFIED_PAGE_BLOCK_CHECKSUM);
+ actual_checksum = log_online_calc_checksum(page);
+ *checksum_ok = (checksum == actual_checksum);
+
+ return TRUE;
+}
+
+/****************************************************************//**
Get the last tracked fully LSN from the bitmap file by reading
backwards untile a correct end page is found. Detects incomplete
writes and corrupted data. Sets the start output position for the
written bitmap data.
+
+Multiple bitmap files are handled using the following assumptions:
+1) Only the last file might be corrupted. In case where no good data was found
+in the last file, assume that the next to last file is OK. This assumption
+does not limit crash recovery capability in any way.
+2) If the whole of the last file was corrupted, assume that the start LSN in
+its name is correct and use it for (re-)tracking start.
+
@return the last fully tracked LSN */
static
ib_uint64_t
@@ -254,73 +316,46 @@ log_online_read_last_tracked_lsn()
/*==============================*/
{
byte page[MODIFIED_PAGE_BLOCK_SIZE];
- ib_uint64_t read_offset = log_bmp_sys->out_offset;
- /* Initialize these to nonequal values so that file size == 0 case with
- zero loop repetitions is handled correctly */
- ulint checksum = 0;
- ulint actual_checksum = !checksum;
ibool is_last_page = FALSE;
+ ibool checksum_ok = FALSE;
ib_uint64_t result;
+ ib_uint64_t read_offset = log_bmp_sys->out.offset;
- ut_ad(log_bmp_sys->out_offset % MODIFIED_PAGE_BLOCK_SIZE == 0);
-
- while (checksum != actual_checksum && read_offset > 0 && !is_last_page)
+ while (!checksum_ok && read_offset > 0 && !is_last_page)
{
-
- ulint offset_low, offset_high;
- ibool success;
-
read_offset -= MODIFIED_PAGE_BLOCK_SIZE;
- offset_high = (ulint)(read_offset >> 32);
- offset_low = (ulint)(read_offset & 0xFFFFFFFF);
-
- success = os_file_read(log_bmp_sys->out, page, offset_low,
- offset_high, MODIFIED_PAGE_BLOCK_SIZE);
- if (!success) {
+ log_bmp_sys->out.offset = read_offset;
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
- /* Here and below assume that bitmap file names do not
- contain apostrophes, thus no need for
- ut_print_filename(). */
- fprintf(stderr, "InnoDB: Warning: failed reading "
- "changed page bitmap file \'%s\'\n",
- log_bmp_sys->out_name);
- return MIN_TRACKED_LSN;
+ if (!log_online_read_bitmap_page(&log_bmp_sys->out, page,
+ &checksum_ok)) {
+ checksum_ok = FALSE;
+ result = 0;
+ break;
}
- is_last_page
- = mach_read_from_4(page + MODIFIED_PAGE_IS_LAST_BLOCK);
- checksum = mach_read_from_4(page
- + MODIFIED_PAGE_BLOCK_CHECKSUM);
- actual_checksum = log_online_calc_checksum(page);
- if (checksum != actual_checksum) {
+ if (checksum_ok) {
+ is_last_page
+ = mach_read_from_4
+ (page + MODIFIED_PAGE_IS_LAST_BLOCK);
+ } else {
- fprintf(stderr, "InnoDB: Warning: corruption "
- "detected in \'%s\' at offset %llu\n",
- log_bmp_sys->out_name, read_offset);
+ fprintf(stderr,
+ "InnoDB: Warning: corruption detected in "
+ "\'%s\' at offset %llu\n",
+ log_bmp_sys->out.name, read_offset);
}
-
};
- if (UNIV_LIKELY(checksum == actual_checksum && is_last_page)) {
-
- log_bmp_sys->out_offset = read_offset
- + MODIFIED_PAGE_BLOCK_SIZE;
- result = mach_read_from_8(page + MODIFIED_PAGE_END_LSN);
- }
- else {
- log_bmp_sys->out_offset = read_offset;
- result = 0;
- }
+ result = (checksum_ok && is_last_page)
+ ? mach_read_from_8(page + MODIFIED_PAGE_END_LSN) : 0;
/* Truncate the output file to discard the corrupted bitmap data, if
any */
- if (!os_file_set_eof_at(log_bmp_sys->out,
- log_bmp_sys->out_offset)) {
+ if (!os_file_set_eof_at(log_bmp_sys->out.file,
+ log_bmp_sys->out.offset)) {
fprintf(stderr, "InnoDB: Warning: failed truncating "
"changed page bitmap file \'%s\' to %llu bytes\n",
- log_bmp_sys->out_name, log_bmp_sys->out_offset);
+ log_bmp_sys->out.name, log_bmp_sys->out.offset);
result = 0;
}
return result;
@@ -350,6 +385,37 @@ log_set_tracked_lsn(
#endif
}
+/*********************************************************************//**
+Check if missing, if any, LSN interval can be read and tracked using the
+current LSN value, the LSN value where the tracking stopped, and the log group
+capacity.
+
+@return TRUE if the missing interval can be tracked or if there's no missing
+data. */
+static
+ibool
+log_online_can_track_missing(
+/*=========================*/
+ ib_uint64_t last_tracked_lsn, /*!<in: last tracked LSN */
+ ib_uint64_t tracking_start_lsn) /*!<in: current LSN */
+{
+ /* last_tracked_lsn might be < MIN_TRACKED_LSN in the case of empty
+ bitmap file, handle this too. */
+ last_tracked_lsn = ut_max_uint64(last_tracked_lsn, MIN_TRACKED_LSN);
+
+ if (last_tracked_lsn > tracking_start_lsn) {
+ fprintf(stderr,
+ "InnoDB: Error: last tracked LSN is in future. This "
+ "can be caused by mismatched bitmap files.\n");
+ exit(1);
+ }
+
+ return (last_tracked_lsn == tracking_start_lsn)
+ || (log_sys->lsn - last_tracked_lsn
+ <= log_sys->log_group_capacity);
+}
+
+
/****************************************************************//**
Diagnose a gap in tracked LSN range on server startup due to crash or
very fast shutdown and try to close it by tracking the data
@@ -365,22 +431,20 @@ log_online_track_missing_on_startup(
{
ut_ad(last_tracked_lsn != tracking_start_lsn);
- fprintf(stderr, "InnoDB: last tracked LSN in \'%s\' is %llu, but "
- "last checkpoint LSN is %llu. This might be due to a server "
- "crash or a very fast shutdown. ", log_bmp_sys->out_name,
- last_tracked_lsn, tracking_start_lsn);
-
- /* last_tracked_lsn might be < MIN_TRACKED_LSN in the case of empty
- bitmap file, handle this too. */
- last_tracked_lsn = ut_max(last_tracked_lsn, MIN_TRACKED_LSN);
+ fprintf(stderr, "InnoDB: last tracked LSN is %llu, but the last "
+ "checkpoint LSN is %llu. This might be due to a server "
+ "crash or a very fast shutdown. ", last_tracked_lsn,
+ tracking_start_lsn);
/* See if we can fully recover the missing interval */
- if (log_sys->lsn - last_tracked_lsn < log_sys->log_group_capacity) {
+ if (log_online_can_track_missing(last_tracked_lsn,
+ tracking_start_lsn)) {
fprintf(stderr,
"Reading the log to advance the last tracked LSN.\n");
- log_bmp_sys->start_lsn = last_tracked_lsn;
+ log_bmp_sys->start_lsn = ut_max_uint64(last_tracked_lsn,
+ MIN_TRACKED_LSN);
log_set_tracked_lsn(log_bmp_sys->start_lsn);
log_online_follow_redo_log();
ut_ad(log_bmp_sys->end_lsn >= tracking_start_lsn);
@@ -406,16 +470,101 @@ log_online_track_missing_on_startup(
}
/*********************************************************************//**
+Format a bitmap output file name to log_bmp_sys->out.name. */
+static
+void
+log_online_make_bitmap_name(
+/*=========================*/
+ ib_uint64_t start_lsn) /*!< in: the start LSN name part */
+{
+ ut_snprintf(log_bmp_sys->out.name, FN_REFLEN, bmp_file_name_template,
+ srv_data_home, bmp_file_name_stem,
+ log_bmp_sys->out_seq_num, start_lsn);
+
+}
+
+/*********************************************************************//**
+Create a new empty bitmap output file. */
+static
+void
+log_online_start_bitmap_file()
+/*==========================*/
+{
+ ibool success;
+
+ log_bmp_sys->out.file
+ = os_file_create(innodb_file_bmp_key, log_bmp_sys->out.name,
+ OS_FILE_OVERWRITE, OS_FILE_NORMAL,
+ OS_DATA_FILE, &success);
+ if (UNIV_UNLIKELY(!success)) {
+
+ /* The following call prints an error message */
+ os_file_get_last_error(TRUE);
+ fprintf(stderr,
+ "InnoDB: Error: Cannot create \'%s\'\n",
+ log_bmp_sys->out.name);
+ exit(1);
+ }
+
+ log_bmp_sys->out.offset = 0;
+}
+
+/*********************************************************************//**
+Close the current bitmap output file and create the next one. */
+static
+void
+log_online_rotate_bitmap_file(
+/*===========================*/
+ ib_uint64_t next_file_start_lsn) /*!<in: the start LSN name
+ part */
+{
+ os_file_close(log_bmp_sys->out.file);
+ log_bmp_sys->out_seq_num++;
+ log_online_make_bitmap_name(next_file_start_lsn);
+ log_online_start_bitmap_file();
+}
+
+/*********************************************************************//**
+Check the name of a given file if it's a changed page bitmap file and
+return file sequence and start LSN name components if it is. If is not,
+the values of output parameters are undefined.
+
+@return TRUE if a given file is a changed page bitmap file. */
+static
+ibool
+log_online_is_bitmap_file(
+/*======================*/
+ const os_file_stat_t* file_info, /*!<in: file to
+ check */
+ ulong* bitmap_file_seq_num, /*!<out: bitmap file
+ sequence number */
+ ib_uint64_t* bitmap_file_start_lsn) /*!<out: bitmap file
+ start LSN */
+{
+ char stem[FN_REFLEN];
+
+ ut_ad (strlen(file_info->name) < OS_FILE_MAX_PATH);
+
+ return ((file_info->type == OS_FILE_TYPE_FILE
+ || file_info->type == OS_FILE_TYPE_LINK)
+ && (sscanf(file_info->name, "%[a-z_]%lu_%llu.xdb", stem,
+ bitmap_file_seq_num, bitmap_file_start_lsn) == 3)
+ && (!strcmp(stem, bmp_file_name_stem)));
+}
+
+/*********************************************************************//**
Initialize the online log following subsytem. */
UNIV_INTERN
void
log_online_read_init()
/*==================*/
{
- char buf[FN_REFLEN];
ibool success;
ib_uint64_t tracking_start_lsn
- = ut_max(log_sys->last_checkpoint_lsn, MIN_TRACKED_LSN);
+ = ut_max_uint64(log_sys->last_checkpoint_lsn, MIN_TRACKED_LSN);
+ os_file_dir_t bitmap_dir;
+ os_file_stat_t bitmap_dir_file_info;
+ ib_uint64_t last_file_start_lsn = MIN_TRACKED_LSN;
/* Assert (could be compile-time assert) that bitmap data start and end
in a bitmap block is 8-byte aligned */
@@ -424,82 +573,120 @@ log_online_read_init()
log_bmp_sys = ut_malloc(sizeof(*log_bmp_sys));
- ut_snprintf(buf, FN_REFLEN, "%s%s%d", srv_data_home,
- modified_page_stem, 1);
- log_bmp_sys->out_name = ut_malloc(strlen(buf) + 1);
- ut_strcpy(log_bmp_sys->out_name, buf);
+ /* Enumerate existing bitmap files to either open the last one to get
+ the last tracked LSN either to find that there are none and start
+ tracking from scratch. */
+ log_bmp_sys->out.name[0] = '\0';
+ log_bmp_sys->out_seq_num = 0;
+
+ bitmap_dir = os_file_opendir(srv_data_home, TRUE);
+ ut_a(bitmap_dir);
+ while (!os_file_readdir_next_file(srv_data_home, bitmap_dir,
+ &bitmap_dir_file_info)) {
+
+ ulong file_seq_num;
+ ib_uint64_t file_start_lsn;
+
+ if (!log_online_is_bitmap_file(&bitmap_dir_file_info,
+ &file_seq_num,
+ &file_start_lsn)) {
+ continue;
+ }
+
+ if (file_seq_num > log_bmp_sys->out_seq_num
+ && bitmap_dir_file_info.size > 0) {
+ log_bmp_sys->out_seq_num = file_seq_num;
+ last_file_start_lsn = file_start_lsn;
+ /* No dir component (srv_data_home) here, because
+ that's the cwd */
+ strncpy(log_bmp_sys->out.name,
+ bitmap_dir_file_info.name, FN_REFLEN - 1);
+ log_bmp_sys->out.name[FN_REFLEN - 1] = '\0';
+ }
+ }
+
+ if (os_file_closedir(bitmap_dir)) {
+ os_file_get_last_error(TRUE);
+ fprintf(stderr, "InnoDB: Error: cannot close \'%s\'\n",
+ srv_data_home);
+ exit(1);
+ }
+
+ if (!log_bmp_sys->out_seq_num) {
+ log_bmp_sys->out_seq_num = 1;
+ log_online_make_bitmap_name(0);
+ }
log_bmp_sys->modified_pages = rbt_create(MODIFIED_PAGE_BLOCK_SIZE,
log_online_compare_bmp_keys);
log_bmp_sys->page_free_list = NULL;
- log_bmp_sys->out
+ log_bmp_sys->out.file
= os_file_create_simple_no_error_handling
- (innodb_file_bmp_key, log_bmp_sys->out_name, OS_FILE_OPEN,
+ (innodb_file_bmp_key, log_bmp_sys->out.name, OS_FILE_OPEN,
OS_FILE_READ_WRITE, &success);
if (!success) {
/* New file, tracking from scratch */
- log_bmp_sys->out
- = os_file_create_simple_no_error_handling
- (innodb_file_bmp_key, log_bmp_sys->out_name,
- OS_FILE_CREATE, OS_FILE_READ_WRITE, &success);
- if (!success) {
-
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
- fprintf(stderr,
- "InnoDB: Error: Cannot create \'%s\'\n",
- log_bmp_sys->out_name);
- exit(1);
- }
-
- log_bmp_sys->out_offset = 0;
+ log_online_start_bitmap_file();
}
else {
- /* Old file, read last tracked LSN and continue from there */
+ /* Read the last tracked LSN from the last file */
ulint size_low;
ulint size_high;
ib_uint64_t last_tracked_lsn;
- success = os_file_get_size(log_bmp_sys->out, &size_low,
+ success = os_file_get_size(log_bmp_sys->out.file, &size_low,
&size_high);
ut_a(success);
- log_bmp_sys->out_offset
+ log_bmp_sys->out.size
= ((ib_uint64_t)size_high << 32) | size_low;
+ log_bmp_sys->out.offset = log_bmp_sys->out.size;
- if (log_bmp_sys->out_offset % MODIFIED_PAGE_BLOCK_SIZE != 0) {
+ if (log_bmp_sys->out.offset % MODIFIED_PAGE_BLOCK_SIZE != 0) {
fprintf(stderr,
"InnoDB: Warning: truncated block detected "
"in \'%s\' at offset %llu\n",
- log_bmp_sys->out_name,
- log_bmp_sys->out_offset);
- log_bmp_sys->out_offset -=
- log_bmp_sys->out_offset
+ log_bmp_sys->out.name,
+ log_bmp_sys->out.offset);
+ log_bmp_sys->out.offset -=
+ log_bmp_sys->out.offset
% MODIFIED_PAGE_BLOCK_SIZE;
}
last_tracked_lsn = log_online_read_last_tracked_lsn();
+ if (!last_tracked_lsn) {
+ last_tracked_lsn = last_file_start_lsn;
+ }
+
+ /* Start a new file. Choose the LSN value in its name based on
+ if we can retrack any missing data. */
+ if (log_online_can_track_missing(last_tracked_lsn,
+ tracking_start_lsn)) {
+ log_online_rotate_bitmap_file(last_tracked_lsn);
+ }
+ else {
+ log_online_rotate_bitmap_file(tracking_start_lsn);
+ }
if (last_tracked_lsn < tracking_start_lsn) {
- log_online_track_missing_on_startup(last_tracked_lsn,
- tracking_start_lsn);
+ log_online_track_missing_on_startup
+ (last_tracked_lsn, tracking_start_lsn);
return;
}
if (last_tracked_lsn > tracking_start_lsn) {
- fprintf(stderr, "InnoDB: last tracked LSN in \'%s\' "
- "is %llu, but last checkpoint LSN is %llu. "
+ fprintf(stderr, "InnoDB: last tracked LSN is %llu, "
+ "but last the checkpoint LSN is %llu. "
"The tracking-based incremental backups will "
"work only from the latter LSN!\n",
- log_bmp_sys->out_name, last_tracked_lsn,
- tracking_start_lsn);
+ last_tracked_lsn, tracking_start_lsn);
}
}
@@ -519,7 +706,7 @@ log_online_read_shutdown()
{
ib_rbt_node_t *free_list_node = log_bmp_sys->page_free_list;
- os_file_close(log_bmp_sys->out);
+ os_file_close(log_bmp_sys->out.file);
rbt_free(log_bmp_sys->modified_pages);
@@ -529,7 +716,6 @@ log_online_read_shutdown()
free_list_node = next;
}
- ut_free(log_bmp_sys->out_name);
ut_free(log_bmp_sys);
}
@@ -746,8 +932,8 @@ log_online_follow_log_seg(
/* The next parse LSN is inside the current block, skip
data preceding it. */
skip_already_parsed_len
- = log_bmp_sys->next_parse_lsn
- - block_start_lsn;
+ = (ulint)(log_bmp_sys->next_parse_lsn
+ - block_start_lsn);
}
else {
@@ -819,32 +1005,32 @@ log_online_write_bitmap_page(
{
ibool success;
- success = os_file_write(log_bmp_sys->out_name,log_bmp_sys->out,
+ success = os_file_write(log_bmp_sys->out.name, log_bmp_sys->out.file,
block,
- (ulint)(log_bmp_sys->out_offset & 0xFFFFFFFF),
- (ulint)(log_bmp_sys->out_offset << 32),
+ (ulint)(log_bmp_sys->out.offset & 0xFFFFFFFF),
+ (ulint)(log_bmp_sys->out.offset << 32),
MODIFIED_PAGE_BLOCK_SIZE);
if (UNIV_UNLIKELY(!success)) {
/* The following call prints an error message */
os_file_get_last_error(TRUE);
fprintf(stderr, "InnoDB: Error: failed writing changed page "
- "bitmap file \'%s\'\n", log_bmp_sys->out_name);
+ "bitmap file \'%s\'\n", log_bmp_sys->out.name);
return;
}
- success = os_file_flush(log_bmp_sys->out, FALSE);
+ success = os_file_flush(log_bmp_sys->out.file, FALSE);
if (UNIV_UNLIKELY(!success)) {
/* The following call prints an error message */
os_file_get_last_error(TRUE);
fprintf(stderr, "InnoDB: Error: failed flushing "
"changed page bitmap file \'%s\'\n",
- log_bmp_sys->out_name);
+ log_bmp_sys->out.name);
return;
}
- log_bmp_sys->out_offset += MODIFIED_PAGE_BLOCK_SIZE;
+ log_bmp_sys->out.offset += MODIFIED_PAGE_BLOCK_SIZE;
}
/*********************************************************************//**
@@ -858,6 +1044,10 @@ log_online_write_bitmap()
ib_rbt_node_t *bmp_tree_node;
const ib_rbt_node_t *last_bmp_tree_node;
+ if (log_bmp_sys->out.offset >= srv_max_bitmap_file_size) {
+ log_online_rotate_bitmap_file(log_bmp_sys->start_lsn);
+ }
+
bmp_tree_node = (ib_rbt_node_t *)
rbt_first(log_bmp_sys->modified_pages);
last_bmp_tree_node = rbt_last(log_bmp_sys->modified_pages);
@@ -930,47 +1120,306 @@ log_online_follow_redo_log()
}
/*********************************************************************//**
-Initializes log bitmap iterator.
+List the bitmap files in srv_data_home and setup their range that contains the
+specified LSN interval. This range, if non-empty, will start with a file that
+has the greatest LSN equal to or less than the start LSN and will include all
+the files up to the one with the greatest LSN less than the end LSN. Caller
+must free bitmap_files->files when done if bitmap_files set to non-NULL and
+this function returned TRUE. Field bitmap_files->count might be set to a
+larger value than the actual count of the files, and space for the unused array
+slots will be allocated but cleared to zeroes.
+
+@return TRUE if succeeded
+*/
+static
+ibool
+log_online_setup_bitmap_file_range(
+/*===============================*/
+ log_online_bitmap_file_range_t *bitmap_files, /*!<in/out: bitmap file
+ range */
+ ib_uint64_t range_start, /*!<in: start LSN */
+ ib_uint64_t range_end) /*!<in: end LSN */
+{
+ os_file_dir_t bitmap_dir;
+ os_file_stat_t bitmap_dir_file_info;
+ ulong first_file_seq_num = ULONG_MAX;
+ ib_uint64_t first_file_start_lsn = IB_ULONGLONG_MAX;
+
+ bitmap_files->count = 0;
+ bitmap_files->files = NULL;
+
+ /* 1st pass: size the info array */
+
+ bitmap_dir = os_file_opendir(srv_data_home, FALSE);
+ if (!bitmap_dir) {
+ fprintf(stderr,
+ "InnoDB: Error: "
+ "failed to open bitmap directory \'%s\'\n",
+ srv_data_home);
+ return FALSE;
+ }
+
+ while (!os_file_readdir_next_file(srv_data_home, bitmap_dir,
+ &bitmap_dir_file_info)) {
+
+ ulong file_seq_num;
+ ib_uint64_t file_start_lsn;
+
+ if (!log_online_is_bitmap_file(&bitmap_dir_file_info,
+ &file_seq_num,
+ &file_start_lsn)
+ || file_start_lsn >= range_end) {
+
+ continue;
+ }
+
+ if (file_start_lsn >= range_start
+ || file_start_lsn == first_file_start_lsn
+ || first_file_start_lsn > range_start) {
+
+ /* A file that falls into the range */
+ bitmap_files->count++;
+ if (file_start_lsn < first_file_start_lsn) {
+
+ first_file_start_lsn = file_start_lsn;
+ }
+ if (file_seq_num < first_file_seq_num) {
+
+ first_file_seq_num = file_seq_num;
+ }
+ } else if (file_start_lsn > first_file_start_lsn) {
+
+ /* A file that has LSN closer to the range start
+ but smaller than it, replacing another such file */
+ first_file_start_lsn = file_start_lsn;
+ first_file_seq_num = file_seq_num;
+ }
+ }
+
+ ut_a(first_file_seq_num != ULONG_MAX || bitmap_files->count == 0);
+
+ if (os_file_closedir(bitmap_dir)) {
+ os_file_get_last_error(TRUE);
+ fprintf(stderr, "InnoDB: Error: cannot close \'%s\'\n",
+ srv_data_home);
+ return FALSE;
+ }
+
+ if (!bitmap_files->count) {
+ return TRUE;
+ }
+
+ /* 2nd pass: get the file names in the file_seq_num order */
+
+ bitmap_dir = os_file_opendir(srv_data_home, FALSE);
+ if (!bitmap_dir) {
+ fprintf(stderr, "InnoDB: Error: "
+ "failed to open bitmap directory \'%s\'\n",
+ srv_data_home);
+ return FALSE;
+ }
+
+ bitmap_files->files = ut_malloc(bitmap_files->count
+ * sizeof(bitmap_files->files[0]));
+ memset(bitmap_files->files, 0,
+ bitmap_files->count * sizeof(bitmap_files->files[0]));
+
+ while (!os_file_readdir_next_file(srv_data_home, bitmap_dir,
+ &bitmap_dir_file_info)) {
+
+ ulong file_seq_num;
+ ib_uint64_t file_start_lsn;
+ size_t array_pos;
+
+ if (!log_online_is_bitmap_file(&bitmap_dir_file_info,
+ &file_seq_num,
+ &file_start_lsn)
+ || file_start_lsn >= range_end
+ || file_start_lsn < first_file_start_lsn) {
+ continue;
+ }
+
+ array_pos = file_seq_num - first_file_seq_num;
+ if (file_seq_num > bitmap_files->files[array_pos].seq_num) {
+ bitmap_files->files[array_pos].seq_num = file_seq_num;
+ strncpy(bitmap_files->files[array_pos].name,
+ bitmap_dir_file_info.name, FN_REFLEN);
+ bitmap_files->files[array_pos].name[FN_REFLEN - 1]
+ = '\0';
+ bitmap_files->files[array_pos].start_lsn
+ = file_start_lsn;
+ }
+ }
+
+ if (os_file_closedir(bitmap_dir)) {
+ os_file_get_last_error(TRUE);
+ fprintf(stderr, "InnoDB: Error: cannot close \'%s\'\n",
+ srv_data_home);
+ free(bitmap_files->files);
+ return FALSE;
+ }
+
+#ifdef UNIV_DEBUG
+ ut_ad(bitmap_files->files[0].seq_num == first_file_seq_num);
+ ut_ad(bitmap_files->files[0].start_lsn == first_file_start_lsn);
+ {
+ size_t i;
+ for (i = 1; i < bitmap_files->count; i++) {
+ if (!bitmap_files->files[i].seq_num) {
+ break;
+ }
+ ut_ad(bitmap_files->files[i].seq_num
+ > bitmap_files->files[i - 1].seq_num);
+ ut_ad(bitmap_files->files[i].start_lsn
+ >= bitmap_files->files[i - 1].start_lsn);
+ }
+ }
+#endif
+
+ return TRUE;
+}
+
+/****************************************************************//**
+Open a bitmap file for reading.
+
+@return TRUE if opened successfully */
+static
+ibool
+log_online_open_bitmap_file_read_only(
+/*==================================*/
+ const char* name, /*!<in: bitmap file
+ name without directory,
+ which is assumed to be
+ srv_data_home */
+ log_online_bitmap_file_t* bitmap_file) /*!<out: opened bitmap
+ file */
+{
+ ibool success = FALSE;
+ ulint size_low;
+ ulint size_high;
+
+ ut_snprintf(bitmap_file->name, FN_REFLEN, "%s%s", srv_data_home, name);
+ bitmap_file->file
+ = os_file_create_simple_no_error_handling(innodb_file_bmp_key,
+ bitmap_file->name,
+ OS_FILE_OPEN,
+ OS_FILE_READ_ONLY,
+ &success);
+ if (!success) {
+ /* Here and below assume that bitmap file names do not
+ contain apostrophes, thus no need for ut_print_filename(). */
+ fprintf(stderr,
+ "InnoDB: Warning: error opening the changed page "
+ "bitmap \'%s\'\n", bitmap_file->name);
+ return FALSE;
+ }
+
+ success = os_file_get_size(bitmap_file->file, &size_low, &size_high);
+ bitmap_file->size = (((ib_uint64_t)size_high) << 32) | size_low;
+ bitmap_file->offset = 0;
+
+#ifdef UNIV_LINUX
+ posix_fadvise(bitmap_file->file, 0, 0, POSIX_FADV_SEQUENTIAL);
+ posix_fadvise(bitmap_file->file, 0, 0, POSIX_FADV_NOREUSE);
+#endif
+
+ return TRUE;
+}
+
+/****************************************************************//**
+Diagnose one or both of the following situations if we read close to
+the end of bitmap file:
+1) Warn if the remainder of the file is less than one page.
+2) Error if we cannot read any more full pages but the last read page
+did not have the last-in-run flag set.
+
+@return FALSE for the error */
+static
+ibool
+log_online_diagnose_bitmap_eof(
+/*===========================*/
+ const log_online_bitmap_file_t* bitmap_file, /*!< in: bitmap file */
+ ibool last_page_in_run)/*!< in: "last page in
+ run" flag value in the
+ last read page */
+{
+ /* Check if we are too close to EOF to read a full page */
+ if ((bitmap_file->size < MODIFIED_PAGE_BLOCK_SIZE)
+ || (bitmap_file->offset
+ > bitmap_file->size - MODIFIED_PAGE_BLOCK_SIZE)) {
+
+ if (bitmap_file->offset != bitmap_file->size) {
+ /* If we are not at EOF and we have less than one page
+ to read, it's junk. This error is not fatal in
+ itself. */
+
+ fprintf(stderr,
+ "InnoDB: Warning: junk at the end of changed "
+ "page bitmap file \'%s\'.\n",
+ bitmap_file->name);
+ }
+
+ if (!last_page_in_run) {
+ /* We are at EOF but the last read page did not finish
+ a run */
+ /* It's a "Warning" here because it's not a fatal error
+ for the whole server */
+ fprintf(stderr,
+ "InnoDB: Warning: changed page bitmap "
+ "file \'%s\' does not contain a complete run "
+ "at the end.\n", bitmap_file->name);
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+/*********************************************************************//**
+Initialize the log bitmap iterator for a given range. The records are
+processed at a bitmap block granularity, i.e. all the records in the same block
+share the same start and end LSN values, the exact LSN of each record is
+unavailable (nor is it defined for blocks that are touched more than once in
+the LSN interval contained in the block). Thus min_lsn and max_lsn should be
+set at block boundaries or bigger, otherwise the records at the 1st and the
+last blocks will not be returned. Also note that there might be returned
+records with LSN < min_lsn, as min_lsn is used to select the correct starting
+file but not block.
+
@return TRUE if the iterator is initialized OK, FALSE otherwise. */
UNIV_INTERN
ibool
log_online_bitmap_iterator_init(
/*============================*/
- log_bitmap_iterator_t *i) /*!<in/out: iterator */
+ log_bitmap_iterator_t *i, /*!<in/out: iterator */
+ ib_uint64_t min_lsn,/*!< in: start LSN */
+ ib_uint64_t max_lsn)/*!< in: end LSN */
{
- ibool success;
-
ut_a(i);
- ut_snprintf(i->in_name, FN_REFLEN, "%s%s%d", srv_data_home,
- modified_page_stem, 1);
- i->in_offset = 0;
- /*
- Set up bit offset out of the reasonable limit
- to intiate reading block from file in
- log_online_bitmap_iterator_next()
- */
- i->bit_offset = MODIFIED_PAGE_BLOCK_BITMAP_LEN;
- i->in =
- os_file_create_simple_no_error_handling(innodb_file_bmp_key,
- i->in_name,
- OS_FILE_OPEN,
- OS_FILE_READ_ONLY,
- &success);
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
- fprintf(stderr,
- "InnoDB: Error: Cannot open \'%s\'\n",
- i->in_name);
+ if (!log_online_setup_bitmap_file_range(&i->in_files, min_lsn,
+ max_lsn)) {
+
return FALSE;
}
- i->page = ut_malloc(MODIFIED_PAGE_BLOCK_SIZE);
+ ut_a(i->in_files.count > 0);
+
+ /* Open the 1st bitmap file */
+ i->in_i = 0;
+ if (!log_online_open_bitmap_file_read_only(i->in_files.files[i->in_i].
+ name,
+ &i->in)) {
+ i->in_i = i->in_files.count;
+ free(i->in_files.files);
+ return FALSE;
+ }
+ i->page = ut_malloc(MODIFIED_PAGE_BLOCK_SIZE);
+ i->bit_offset = MODIFIED_PAGE_BLOCK_BITMAP_LEN;
i->start_lsn = i->end_lsn = 0;
i->space_id = 0;
i->first_page_id = 0;
+ i->last_page_in_run = TRUE;
i->changed = FALSE;
return TRUE;
@@ -985,7 +1434,11 @@ log_online_bitmap_iterator_release(
log_bitmap_iterator_t *i) /*!<in/out: iterator */
{
ut_a(i);
- os_file_close(i->in);
+
+ if (i->in_i < i->in_files.count) {
+ os_file_close(i->in.file);
+ }
+ ut_free(i->in_files.files);
ut_free(i->page);
}
@@ -1000,14 +1453,7 @@ log_online_bitmap_iterator_next(
/*============================*/
log_bitmap_iterator_t *i) /*!<in/out: iterator */
{
- ulint offset_low;
- ulint offset_high;
- ulint size_low;
- ulint size_high;
- ulint checksum = 0;
- ulint actual_checksum = !checksum;
-
- ibool success;
+ ibool checksum_ok = FALSE;
ut_a(i);
@@ -1020,66 +1466,51 @@ log_online_bitmap_iterator_next(
return TRUE;
}
- while (checksum != actual_checksum)
+ while (!checksum_ok)
{
- success = os_file_get_size(i->in,
- &size_low,
- &size_high);
- if (!success) {
- os_file_get_last_error(TRUE);
- fprintf(stderr,
- "InnoDB: Warning: can't get size of "
- "page bitmap file \'%s\'\n",
- i->in_name);
- return FALSE;
- }
-
- if (i->in_offset >=
- (ib_uint64_t)(size_low) +
- ((ib_uint64_t)(size_high) << 32))
- return FALSE;
-
- offset_high = (ulint)(i->in_offset >> 32);
- offset_low = (ulint)(i->in_offset & 0xFFFFFFFF);
+ while (i->in.size < MODIFIED_PAGE_BLOCK_SIZE
+ || (i->in.offset
+ > i->in.size - MODIFIED_PAGE_BLOCK_SIZE)) {
+
+ /* Advance file */
+ i->in_i++;
+ os_file_close(i->in.file);
+ log_online_diagnose_bitmap_eof(&i->in,
+ i->last_page_in_run);
+ if (i->in_i == i->in_files.count
+ || i->in_files.files[i->in_i].seq_num == 0) {
+
+ return FALSE;
+ }
- success = os_file_read(
- i->in,
- i->page,
- offset_low,
- offset_high,
- MODIFIED_PAGE_BLOCK_SIZE);
+ if (!log_online_open_bitmap_file_read_only(
+ i->in_files.files[i->in_i].name,
+ &i->in)) {
+ return FALSE;
+ }
+ }
- if (!success) {
+ if (!log_online_read_bitmap_page(&i->in, i->page,
+ &checksum_ok)) {
os_file_get_last_error(TRUE);
fprintf(stderr,
"InnoDB: Warning: failed reading "
"changed page bitmap file \'%s\'\n",
- i->in_name);
+ i->in_files.files[i->in_i].name);
return FALSE;
}
-
- checksum = mach_read_from_4(
- i->page + MODIFIED_PAGE_BLOCK_CHECKSUM);
-
- actual_checksum = log_online_calc_checksum(i->page);
-
- i->in_offset += MODIFIED_PAGE_BLOCK_SIZE;
}
- i->start_lsn =
- mach_read_from_8(i->page + MODIFIED_PAGE_START_LSN);
- i->end_lsn =
- mach_read_from_8(i->page + MODIFIED_PAGE_END_LSN);
- i->space_id =
- mach_read_from_4(i->page + MODIFIED_PAGE_SPACE_ID);
- i->first_page_id =
- mach_read_from_4(i->page + MODIFIED_PAGE_1ST_PAGE_ID);
- i->bit_offset =
- 0;
- i->changed =
- IS_BIT_SET(i->page + MODIFIED_PAGE_BLOCK_BITMAP,
- i->bit_offset);
+ i->start_lsn = mach_read_from_8(i->page + MODIFIED_PAGE_START_LSN);
+ i->end_lsn = mach_read_from_8(i->page + MODIFIED_PAGE_END_LSN);
+ i->space_id = mach_read_from_4(i->page + MODIFIED_PAGE_SPACE_ID);
+ i->first_page_id = mach_read_from_4(i->page
+ + MODIFIED_PAGE_1ST_PAGE_ID);
+ i->last_page_in_run = mach_read_from_4(i->page
+ + MODIFIED_PAGE_IS_LAST_BLOCK);
+ i->bit_offset = 0;
+ i->changed = IS_BIT_SET(i->page + MODIFIED_PAGE_BLOCK_BITMAP,
+ i->bit_offset);
return TRUE;
}
-
diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c
index 2555c010027..8e0516a84a9 100644
--- a/storage/xtradb/os/os0file.c
+++ b/storage/xtradb/os/os0file.c
@@ -2158,8 +2158,10 @@ os_file_set_eof_at(
ib_uint64_t new_len)/*!< in: new file length */
{
#ifdef __WIN__
- /* TODO: untested! */
- return(!_chsize_s(file, new_len));
+ LARGE_INTEGER li, li2;
+ li.QuadPart = new_len;
+ return(SetFilePointerEx(file, li, &li2,FILE_BEGIN)
+ && SetEndOfFile(file));
#else
/* TODO: works only with -D_FILE_OFFSET_BITS=64 ? */
return(!ftruncate(file, new_len));
diff --git a/storage/xtradb/row/row0ins.c b/storage/xtradb/row/row0ins.c
index 3ae4c227ddc..61c3720fa2e 100644
--- a/storage/xtradb/row/row0ins.c
+++ b/storage/xtradb/row/row0ins.c
@@ -2012,7 +2012,10 @@ row_ins_index_entry_low(
the function will return in both low_match and up_match of the
cursor sensible values */
- if (dict_index_is_clust(index)) {
+ if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) {
+ search_mode = (mode & BTR_MODIFY_TREE)
+ ? BTR_SEARCH_TREE : BTR_SEARCH_LEAF;
+ } else if (dict_index_is_clust(index)) {
search_mode = mode;
} else if (!(thr_get_trx(thr)->check_unique_secondary)) {
search_mode = mode | BTR_INSERT | BTR_IGNORE_SEC_UNIQUE;
@@ -2021,7 +2024,7 @@ row_ins_index_entry_low(
}
btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
- thr_get_trx(thr)->fake_changes ? BTR_SEARCH_LEAF : search_mode,
+ search_mode,
&cursor, 0, __FILE__, __LINE__, &mtr);
if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
diff --git a/storage/xtradb/row/row0mysql.c b/storage/xtradb/row/row0mysql.c
index 575160501c3..9ab85940760 100644
--- a/storage/xtradb/row/row0mysql.c
+++ b/storage/xtradb/row/row0mysql.c
@@ -1277,17 +1277,19 @@ run_again:
que_thr_stop_for_mysql_no_error(thr, trx);
- prebuilt->table->stat_n_rows++;
+ if (UNIV_LIKELY(!(trx->fake_changes))) {
- srv_n_rows_inserted++;
+ prebuilt->table->stat_n_rows++;
- if (prebuilt->table->stat_n_rows == 0) {
- /* Avoid wrap-over */
- prebuilt->table->stat_n_rows--;
+ if (prebuilt->table->stat_n_rows == 0) {
+ /* Avoid wrap-over */
+ prebuilt->table->stat_n_rows--;
+ }
+
+ srv_n_rows_inserted++;
+ row_update_statistics_if_needed(prebuilt->table);
}
- if (!(trx->fake_changes))
- row_update_statistics_if_needed(prebuilt->table);
trx->op_info = "";
return((int) err);
@@ -1534,6 +1536,11 @@ run_again:
que_thr_stop_for_mysql_no_error(thr, trx);
+ if (UNIV_UNLIKELY(trx->fake_changes)) {
+ trx->op_info = "";
+ return((int) err);
+ }
+
if (node->is_delete) {
if (prebuilt->table->stat_n_rows > 0) {
prebuilt->table->stat_n_rows--;
@@ -1548,7 +1555,6 @@ run_again:
that changes indexed columns, UPDATEs that change only non-indexed
columns would not affect statistics. */
if (node->is_delete || !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
- if (!(trx->fake_changes))
row_update_statistics_if_needed(prebuilt->table);
}
@@ -1756,6 +1762,11 @@ run_again:
return(err);
}
+ if (UNIV_UNLIKELY((trx->fake_changes))) {
+
+ return(err);
+ }
+
if (node->is_delete) {
if (table->stat_n_rows > 0) {
table->stat_n_rows--;
@@ -1766,7 +1777,6 @@ run_again:
srv_n_rows_updated++;
}
- if (!(trx->fake_changes))
row_update_statistics_if_needed(table);
return(err);
diff --git a/storage/xtradb/row/row0upd.c b/storage/xtradb/row/row0upd.c
index 859b3d415ab..b2a0eb57669 100644
--- a/storage/xtradb/row/row0upd.c
+++ b/storage/xtradb/row/row0upd.c
@@ -2018,7 +2018,8 @@ row_upd_clust_rec(
the same transaction do not modify the record in the meantime.
Therefore we can assert that the restoration of the cursor succeeds. */
- ut_a(btr_pcur_restore_position(thr_get_trx(thr)->fake_changes ? BTR_SEARCH_LEAF : BTR_MODIFY_TREE,
+ ut_a(btr_pcur_restore_position(thr_get_trx(thr)->fake_changes
+ ? BTR_SEARCH_TREE : BTR_MODIFY_TREE,
pcur, mtr));
ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur),
diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c
index 9d479ac6c87..6e210071746 100644
--- a/storage/xtradb/srv/srv0srv.c
+++ b/storage/xtradb/srv/srv0srv.c
@@ -179,8 +179,14 @@ UNIV_INTERN ibool srv_recovery_stats = FALSE;
UNIV_INTERN my_bool srv_track_changed_pages = TRUE;
+UNIV_INTERN ib_uint64_t srv_max_bitmap_file_size = 100 * 1024 * 1024;
+
UNIV_INTERN ulonglong srv_changed_pages_limit = 0;
+/** When TRUE, fake change transcations take S rather than X row locks.
+ When FALSE, row locks are not taken at all. */
+UNIV_INTERN my_bool srv_fake_changes_locks = TRUE;
+
/* if TRUE, then we auto-extend the last data file */
UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
/* if != 0, this tells the max size auto-extending may increase the
diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c
index 65a775b56da..2faa68cb87c 100644
--- a/storage/xtradb/srv/srv0start.c
+++ b/storage/xtradb/srv/srv0start.c
@@ -1148,6 +1148,24 @@ skip_size_check:
return(DB_SUCCESS);
}
+/*********************************************************************//**
+Initializes the log tracking subsystem and starts its thread. */
+static
+void
+init_log_online(void)
+/*=================*/
+{
+ if (srv_track_changed_pages) {
+
+ log_online_read_init();
+
+ /* Create the thread that follows the redo log to output the
+ changed page bitmap */
+ os_thread_create(&srv_redo_log_follow_thread, NULL,
+ thread_ids + 5 + SRV_MAX_N_IO_THREADS);
+ }
+}
+
/********************************************************************
Starts InnoDB and creates a new database if database files
are not found and the user wants.
@@ -1794,6 +1812,8 @@ innobase_start_or_create_for_mysql(void)
trx_sys_file_format_init();
if (create_new_db) {
+ init_log_online();
+
mtr_start(&mtr);
fsp_header_init(0, sum_of_new_sizes, &mtr);
@@ -1893,6 +1913,8 @@ innobase_start_or_create_for_mysql(void)
return(DB_ERROR);
}
+ init_log_online();
+
/* Since the insert buffer init is in dict_boot, and the
insert buffer is needed in any disk i/o, first we call
dict_boot(). Note that trx_sys_init_at_db_start() only needs
@@ -2040,19 +2062,6 @@ innobase_start_or_create_for_mysql(void)
if (srv_auto_lru_dump && srv_blocking_lru_restore)
buf_LRU_file_restore();
- if (srv_track_changed_pages) {
-
- /* Initialize the log tracking subsystem here to block
- server startup until it's completed due to the potential
- need to re-read previous server run's log. */
- log_online_read_init();
-
- /* Create the thread that follows the redo log to output the
- changed page bitmap */
- os_thread_create(&srv_redo_log_follow_thread, NULL,
- thread_ids + 6 + SRV_MAX_N_IO_THREADS);
- }
-
srv_is_being_started = FALSE;
err = dict_create_or_check_foreign_constraint_tables();