summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorSergei Golubchik <sergii@pisem.net>2013-01-29 15:10:47 +0100
committerSergei Golubchik <sergii@pisem.net>2013-01-29 15:10:47 +0100
commit0af4b6c6ee2b8a61823478c0a56ebdfa52cae3cc (patch)
tree7b24eb150b9cca718c88edaabbfc6c8bb16fd015 /storage
parentcf20de000bdff07a34a373079991d24837423896 (diff)
parent52fbe44fbbe60ecaba6453884ec1ad32755d7a04 (diff)
downloadmariadb-git-0af4b6c6ee2b8a61823478c0a56ebdfa52cae3cc.tar.gz
5.5 merge
Diffstat (limited to 'storage')
-rw-r--r--storage/innobase/btr/btr0btr.c32
-rw-r--r--storage/innobase/btr/btr0cur.c26
-rw-r--r--storage/innobase/buf/buf0buf.c7
-rw-r--r--storage/innobase/buf/buf0lru.c4
-rw-r--r--storage/innobase/dict/dict0dict.c45
-rw-r--r--storage/innobase/fil/fil0fil.c71
-rw-r--r--storage/innobase/handler/ha_innodb.cc74
-rw-r--r--storage/innobase/handler/ha_innodb.h9
-rw-r--r--storage/innobase/handler/handler0alter.cc8
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.c38
-rw-r--r--storage/innobase/include/btr0cur.h2
-rw-r--r--storage/innobase/include/dict0dict.h12
-rw-r--r--storage/innobase/include/dict0mem.h7
-rw-r--r--storage/innobase/include/page0zip.h8
-rw-r--r--storage/innobase/include/row0undo.h7
-rw-r--r--storage/innobase/include/row0upd.ic3
-rw-r--r--storage/innobase/include/univ.i18
-rw-r--r--storage/innobase/log/log0recv.c5
-rw-r--r--storage/innobase/os/os0file.c13
-rw-r--r--storage/innobase/page/page0cur.c6
-rw-r--r--storage/innobase/page/page0page.c14
-rw-r--r--storage/innobase/page/page0zip.c157
-rw-r--r--storage/innobase/row/row0mysql.c5
-rw-r--r--storage/innobase/row/row0sel.c27
-rw-r--r--storage/innobase/row/row0umod.c53
-rw-r--r--storage/innobase/row/row0undo.c19
-rw-r--r--storage/maria/ha_maria.cc3
-rw-r--r--storage/myisam/mi_open.c3
-rw-r--r--storage/myisam/myisamchk.c9
-rw-r--r--storage/perfschema/pfs.cc2
-rw-r--r--storage/sphinx/ha_sphinx.cc12
-rw-r--r--storage/xtradb/btr/btr0cur.c68
-rw-r--r--storage/xtradb/btr/btr0pcur.c1
-rw-r--r--storage/xtradb/buf/buf0lru.c40
-rw-r--r--storage/xtradb/buf/buf0rea.c4
-rw-r--r--storage/xtradb/fsp/fsp0fsp.c6
-rw-r--r--storage/xtradb/handler/ha_innodb.cc83
-rw-r--r--storage/xtradb/handler/ha_innodb.h1
-rw-r--r--storage/xtradb/handler/i_s.cc282
-rw-r--r--storage/xtradb/ibuf/ibuf0ibuf.c2
-rw-r--r--storage/xtradb/include/btr0btr.h5
-rw-r--r--storage/xtradb/include/btr0cur.h5
-rw-r--r--storage/xtradb/include/log0online.h104
-rw-r--r--storage/xtradb/include/srv0srv.h7
-rw-r--r--storage/xtradb/include/univ.i2
-rw-r--r--storage/xtradb/include/ut0ut.h9
-rw-r--r--storage/xtradb/include/ut0ut.ic13
-rw-r--r--storage/xtradb/lock/lock0lock.c18
-rw-r--r--storage/xtradb/log/log0log.c4
-rw-r--r--storage/xtradb/log/log0online.c829
-rw-r--r--storage/xtradb/os/os0file.c6
-rw-r--r--storage/xtradb/row/row0ins.c7
-rw-r--r--storage/xtradb/row/row0mysql.c28
-rw-r--r--storage/xtradb/row/row0upd.c3
-rw-r--r--storage/xtradb/srv/srv0srv.c6
-rw-r--r--storage/xtradb/srv/srv0start.c35
56 files changed, 1595 insertions, 672 deletions
diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c
index a6fdff72f50..1f4d8126be6 100644
--- a/storage/innobase/btr/btr0btr.c
+++ b/storage/innobase/btr/btr0btr.c
@@ -1594,7 +1594,7 @@ btr_page_reorganize_low(
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
btr_assert_not_corrupted(block, index);
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
data_size1 = page_get_data_size(page);
max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1);
@@ -1713,7 +1713,7 @@ btr_page_reorganize_low(
func_exit:
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
#ifndef UNIV_HOTBACKUP
buf_block_free(temp_block);
@@ -1788,7 +1788,7 @@ btr_page_empty(
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
ut_ad(page_zip == buf_block_get_page_zip(block));
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
btr_search_drop_page_hash_index(block);
@@ -1845,10 +1845,10 @@ btr_root_raise_and_insert(
root_block = btr_cur_get_block(cursor);
root_page_zip = buf_block_get_page_zip(root_block);
ut_ad(page_get_n_recs(root) > 0);
+ index = btr_cur_get_index(cursor);
#ifdef UNIV_ZIP_DEBUG
- ut_a(!root_page_zip || page_zip_validate(root_page_zip, root));
+ ut_a(!root_page_zip || page_zip_validate(root_page_zip, root, index));
#endif /* UNIV_ZIP_DEBUG */
- index = btr_cur_get_index(cursor);
#ifdef UNIV_BTR_DEBUG
if (!dict_index_is_ibuf(index)) {
ulint space = dict_index_get_space(index);
@@ -2778,8 +2778,8 @@ insert_empty:
#ifdef UNIV_ZIP_DEBUG
if (UNIV_LIKELY_NULL(page_zip)) {
- ut_a(page_zip_validate(page_zip, page));
- ut_a(page_zip_validate(new_page_zip, new_page));
+ ut_a(page_zip_validate(page_zip, page, cursor->index));
+ ut_a(page_zip_validate(new_page_zip, new_page, cursor->index));
}
#endif /* UNIV_ZIP_DEBUG */
@@ -2813,7 +2813,8 @@ insert_empty:
= buf_block_get_page_zip(insert_block);
ut_a(!insert_page_zip
- || page_zip_validate(insert_page_zip, insert_page));
+ || page_zip_validate(insert_page_zip, insert_page,
+ cursor->index));
}
#endif /* UNIV_ZIP_DEBUG */
@@ -3178,7 +3179,7 @@ btr_lift_page_up(
btr_page_set_level(page, page_zip, page_level, mtr);
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
}
@@ -3354,8 +3355,8 @@ err_exit:
const page_zip_des_t* page_zip
= buf_block_get_page_zip(block);
ut_a(page_zip);
- ut_a(page_zip_validate(merge_page_zip, merge_page));
- ut_a(page_zip_validate(page_zip, page));
+ ut_a(page_zip_validate(merge_page_zip, merge_page, index));
+ ut_a(page_zip_validate(page_zip, page, index));
}
#endif /* UNIV_ZIP_DEBUG */
@@ -3488,7 +3489,8 @@ err_exit:
ut_ad(page_validate(merge_page, index));
#ifdef UNIV_ZIP_DEBUG
- ut_a(!merge_page_zip || page_zip_validate(merge_page_zip, merge_page));
+ ut_a(!merge_page_zip || page_zip_validate(merge_page_zip, merge_page,
+ index));
#endif /* UNIV_ZIP_DEBUG */
/* Free the file page */
@@ -3671,7 +3673,7 @@ btr_discard_page(
page_zip_des_t* merge_page_zip
= buf_block_get_page_zip(merge_block);
ut_a(!merge_page_zip
- || page_zip_validate(merge_page_zip, merge_page));
+ || page_zip_validate(merge_page_zip, merge_page, index));
}
#endif /* UNIV_ZIP_DEBUG */
@@ -4149,7 +4151,7 @@ btr_validate_level(
ut_a(space == page_get_space_id(page));
#ifdef UNIV_ZIP_DEBUG
page_zip = buf_block_get_page_zip(block);
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
ut_a(!page_is_leaf(page));
@@ -4177,7 +4179,7 @@ loop:
#ifdef UNIV_ZIP_DEBUG
page_zip = buf_block_get_page_zip(block);
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
/* Check ordering etc. of records */
diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c
index ce43cba8525..1f92de245b1 100644
--- a/storage/innobase/btr/btr0cur.c
+++ b/storage/innobase/btr/btr0cur.c
@@ -673,7 +673,7 @@ retry_page_get:
#ifdef UNIV_ZIP_DEBUG
const page_zip_des_t* page_zip
= buf_block_get_page_zip(block);
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
buf_block_dbg_add_level(
@@ -2042,7 +2042,7 @@ any_extern:
page_zip = buf_block_get_page_zip(block);
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
if (page_zip
@@ -2253,7 +2253,7 @@ btr_cur_pessimistic_update(
MTR_MEMO_X_LOCK));
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
/* The insert buffer tree should never be updated in place. */
ut_ad(!dict_index_is_ibuf(index));
@@ -2391,7 +2391,7 @@ make_external:
btr_search_update_hash_on_delete(cursor);
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
page_cursor = btr_cur_get_page_cur(cursor);
@@ -2498,7 +2498,7 @@ make_external:
buf_block_t* rec_block = btr_cur_get_block(cursor);
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
page = buf_block_get_frame(rec_block);
#endif /* UNIV_ZIP_DEBUG */
page_zip = buf_block_get_page_zip(rec_block);
@@ -2524,7 +2524,7 @@ make_external:
return_after_reservations:
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
if (n_extents > 0) {
@@ -2886,7 +2886,7 @@ btr_cur_set_deleted_flag_for_ibuf(
when the tablespace is
uncompressed */
ibool val, /*!< in: value to set */
- mtr_t* mtr) /*!< in: mtr */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
{
/* We do not need to reserve btr_search_latch, as the page
has just been read to the buffer pool and there cannot be
@@ -2986,12 +2986,14 @@ btr_cur_optimistic_delete(
page, 1);
}
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip
+ || page_zip_validate(page_zip, page, cursor->index));
#endif /* UNIV_ZIP_DEBUG */
page_cur_delete_rec(btr_cur_get_page_cur(cursor),
cursor->index, offsets, mtr);
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip
+ || page_zip_validate(page_zip, page, cursor->index));
#endif /* UNIV_ZIP_DEBUG */
if (dict_index_is_clust(cursor->index)
@@ -3086,7 +3088,7 @@ btr_cur_pessimistic_delete(
rec = btr_cur_get_rec(cursor);
page_zip = buf_block_get_page_zip(block);
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
@@ -3096,7 +3098,7 @@ btr_cur_pessimistic_delete(
rec, offsets, page_zip,
rb_ctx, mtr);
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
}
@@ -3157,7 +3159,7 @@ btr_cur_pessimistic_delete(
page_cur_delete_rec(btr_cur_get_page_cur(cursor), index, offsets, mtr);
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
ut_ad(btr_check_node_ptr(index, block, mtr));
diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
index 70841a168ab..1af019dfc45 100644
--- a/storage/innobase/buf/buf0buf.c
+++ b/storage/innobase/buf/buf0buf.c
@@ -242,7 +242,7 @@ the read requests for the whole area.
#ifndef UNIV_HOTBACKUP
/** Value in microseconds */
-static const int WAIT_FOR_READ = 5000;
+static const int WAIT_FOR_READ = 100;
/** Number of attemtps made to read in a page in the buffer pool */
static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
@@ -2582,8 +2582,9 @@ wait_until_unfixed:
mutex_exit(&block->mutex);
if (io_fix == BUF_IO_READ) {
-
- os_thread_sleep(WAIT_FOR_READ);
+ /* wait by temporaly s-latch */
+ rw_lock_s_lock(&(block->lock));
+ rw_lock_s_unlock(&(block->lock));
} else {
break;
}
diff --git a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
index 7c8100df58e..60894cae6ad 100644
--- a/storage/innobase/buf/buf0lru.c
+++ b/storage/innobase/buf/buf0lru.c
@@ -1998,7 +1998,9 @@ buf_LRU_block_remove_hashed_page(
break;
case FIL_PAGE_INDEX:
#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(&bpage->zip, page));
+ ut_a(page_zip_validate(
+ &bpage->zip, page,
+ ((buf_block_t*) bpage)->index));
#endif /* UNIV_ZIP_DEBUG */
break;
default:
diff --git a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c
index 6f2c2caffaf..d5b16d419f1 100644
--- a/storage/innobase/dict/dict0dict.c
+++ b/storage/innobase/dict/dict0dict.c
@@ -487,10 +487,12 @@ Looks for column n in an index.
ULINT_UNDEFINED if not contained */
UNIV_INTERN
ulint
-dict_index_get_nth_col_pos(
-/*=======================*/
- const dict_index_t* index, /*!< in: index */
- ulint n) /*!< in: column number */
+dict_index_get_nth_col_or_prefix_pos(
+/*=================================*/
+ const dict_index_t* index, /*!< in: index */
+ ulint n, /*!< in: column number */
+ ibool inc_prefix) /*!< in: TRUE=consider
+ column prefixes too */
{
const dict_field_t* field;
const dict_col_t* col;
@@ -512,7 +514,8 @@ dict_index_get_nth_col_pos(
for (pos = 0; pos < n_fields; pos++) {
field = dict_index_get_nth_field(index, pos);
- if (col == field->col && field->prefix_len == 0) {
+ if (col == field->col
+ && (inc_prefix || field->prefix_len == 0)) {
return(pos);
}
@@ -521,6 +524,20 @@ dict_index_get_nth_col_pos(
return(ULINT_UNDEFINED);
}
+/********************************************************************//**
+Looks for column n in an index.
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+UNIV_INTERN
+ulint
+dict_index_get_nth_col_pos(
+/*=======================*/
+ const dict_index_t* index, /*!< in: index */
+ ulint n) /*!< in: column number */
+{
+ return(dict_index_get_nth_col_or_prefix_pos(index, n, FALSE));
+}
+
#ifndef UNIV_HOTBACKUP
/********************************************************************//**
Returns TRUE if the index contains a column or a prefix of that column.
@@ -2017,7 +2034,6 @@ dict_index_build_internal_clust(
{
dict_index_t* new_index;
dict_field_t* field;
- ulint fixed_size;
ulint trx_id_pos;
ulint i;
ibool* indexed;
@@ -2094,7 +2110,7 @@ dict_index_build_internal_clust(
for (i = 0; i < trx_id_pos; i++) {
- fixed_size = dict_col_get_fixed_size(
+ ulint fixed_size = dict_col_get_fixed_size(
dict_index_get_nth_col(new_index, i),
dict_table_is_comp(table));
@@ -2111,7 +2127,20 @@ dict_index_build_internal_clust(
break;
}
- new_index->trx_id_offset += (unsigned int) fixed_size;
+ /* Add fixed_size to new_index->trx_id_offset.
+ Because the latter is a bit-field, an overflow
+ can theoretically occur. Check for it. */
+ fixed_size += new_index->trx_id_offset;
+
+ new_index->trx_id_offset = fixed_size;
+
+ if (new_index->trx_id_offset != fixed_size) {
+ /* Overflow. Pretend that this is a
+ variable-length PRIMARY KEY. */
+ ut_ad(0);
+ new_index->trx_id_offset = 0;
+ break;
+ }
}
}
diff --git a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c
index 23fe76f2281..8fe33459994 100644
--- a/storage/innobase/fil/fil0fil.c
+++ b/storage/innobase/fil/fil0fil.c
@@ -187,14 +187,16 @@ struct fil_space_struct {
requests on the file */
ibool stop_new_ops;
/*!< we set this TRUE when we start
- deleting a single-table tablespace */
- ibool is_being_deleted;
- /*!< this is set to TRUE when we start
- deleting a single-table tablespace and its
- file; when this flag is set no further i/o
- or flush requests can be placed on this space,
- though there may be such requests still being
- processed on this space */
+ deleting a single-table tablespace.
+ When this is set following new ops
+ are not allowed:
+ * read IO request
+ * ibuf merge
+ * file flush
+ Note that we can still possibly have
+ new write operations because we don't
+ check this flag when doing flush
+ batches. */
ulint purpose;/*!< FIL_TABLESPACE, FIL_LOG, or
FIL_ARCH_LOG */
UT_LIST_BASE_NODE_T(fil_node_t) chain;
@@ -1286,7 +1288,6 @@ try_again:
space->stop_ios = FALSE;
space->stop_new_ops = FALSE;
- space->is_being_deleted = FALSE;
space->purpose = purpose;
space->size = 0;
space->flags = flags;
@@ -2301,11 +2302,9 @@ try_again:
return(FALSE);
}
- ut_a(space);
+ ut_a(space->stop_new_ops);
ut_a(space->n_pending_ops == 0);
- space->is_being_deleted = TRUE;
-
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
node = UT_LIST_GET_FIRST(space->chain);
@@ -2348,13 +2347,26 @@ try_again:
rw_lock_x_lock(&space->latch);
#ifndef UNIV_HOTBACKUP
- /* Invalidate in the buffer pool all pages belonging to the
- tablespace. Since we have set space->is_being_deleted = TRUE, readahead
- or ibuf merge can no longer read more pages of this tablespace to the
- buffer pool. Thus we can clean the tablespace out of the buffer pool
- completely and permanently. The flag is_being_deleted also prevents
- fil_flush() from being applied to this tablespace. */
-
+ /* IMPORTANT: Because we have set space::stop_new_ops there
+ can't be any new ibuf merges, reads or flushes. We are here
+ because node::n_pending was zero above. However, it is still
+ possible to have pending read and write requests:
+
+ A read request can happen because the reader thread has
+ gone through the ::stop_new_ops check in buf_page_init_for_read()
+ before the flag was set and has not yet incremented ::n_pending
+ when we checked it above.
+
+ A write request can be issued any time because we don't check
+ the ::stop_new_ops flag when queueing a block for write.
+
+ We deal with pending write requests in the following function
+ where we'd minimally evict all dirty pages belonging to this
+ space from the flush_list. Not that if a block is IO-fixed
+ we'll wait for IO to complete.
+
+ To deal with potential read requests by checking the
+ ::stop_new_ops flag in fil_io() */
buf_LRU_flush_or_remove_pages(
id, evict_all
? BUF_REMOVE_ALL_NO_WRITE
@@ -2364,6 +2376,15 @@ try_again:
mutex_enter(&fil_system->mutex);
+ /* Double check the sanity of pending ops after reacquiring
+ the fil_system::mutex. */
+ if (fil_space_get_by_id(id)) {
+ ut_a(space->n_pending_ops == 0);
+ ut_a(UT_LIST_GET_LEN(space->chain) == 1);
+ node = UT_LIST_GET_FIRST(space->chain);
+ ut_a(node->n_pending == 0);
+ }
+
success = fil_space_free(id, TRUE);
mutex_exit(&fil_system->mutex);
@@ -2421,7 +2442,7 @@ fil_tablespace_is_being_deleted(
ut_a(space != NULL);
- is_being_deleted = space->is_being_deleted;
+ is_being_deleted = space->stop_new_ops;
mutex_exit(&fil_system->mutex);
@@ -3695,7 +3716,7 @@ fil_tablespace_deleted_or_being_deleted_in_mem(
space = fil_space_get_by_id(id);
- if (space == NULL || space->is_being_deleted) {
+ if (space == NULL || space->stop_new_ops) {
mutex_exit(&fil_system->mutex);
return(TRUE);
@@ -4408,7 +4429,9 @@ fil_io(
space = fil_space_get_by_id(space_id);
- if (!space) {
+ /* If we are deleting a tablespace we don't allow any read
+ operations on that. However, we do allow write operations. */
+ if (!space || (type == OS_FILE_READ && space->stop_new_ops)) {
mutex_exit(&fil_system->mutex);
ut_print_timestamp(stderr);
@@ -4624,7 +4647,7 @@ fil_flush(
space = fil_space_get_by_id(space_id);
- if (!space || space->is_being_deleted) {
+ if (!space || space->stop_new_ops) {
mutex_exit(&fil_system->mutex);
return;
@@ -4755,7 +4778,7 @@ fil_flush_file_spaces(
space;
space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
- if (space->purpose == purpose && !space->is_being_deleted) {
+ if (space->purpose == purpose && !space->stop_new_ops) {
space_ids[n_space_ids++] = space->id;
}
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 42b0fe2b65f..bf7bfce6b5c 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -342,6 +342,7 @@ static PSI_file_info all_innodb_files[] = {
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static int innobase_close_connection(handlerton *hton, THD* thd);
+static void innobase_kill_query(handlerton *hton, THD* thd, enum thd_kill_levels level);
static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
static int innobase_commit(handlerton *hton, THD* thd, bool all);
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
@@ -921,8 +922,7 @@ convert_error_code_to_mysql(
return(0);
case DB_INTERRUPTED:
- my_error(ER_QUERY_INTERRUPTED, MYF(0));
- /* fall through */
+ return(HA_ERR_ABORTED_BY_USER);
case DB_FOREIGN_EXCEED_MAX_CASCADE:
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
@@ -1013,11 +1013,23 @@ convert_error_code_to_mysql(
case DB_TABLE_NOT_FOUND:
return(HA_ERR_NO_SUCH_TABLE);
- case DB_TOO_BIG_RECORD:
- my_error(ER_TOO_BIG_ROWSIZE, MYF(0),
- page_get_free_space_of_empty(flags
- & DICT_TF_COMPACT) / 2);
+ case DB_TOO_BIG_RECORD: {
+ /* If prefix is true then a 768-byte prefix is stored
+ locally for BLOB fields. Refer to dict_table_get_format() */
+ bool prefix = ((flags & DICT_TF_FORMAT_MASK)
+ >> DICT_TF_FORMAT_SHIFT) < UNIV_FORMAT_B;
+ my_printf_error(ER_TOO_BIG_ROWSIZE,
+ "Row size too large (> %lu). Changing some columns "
+ "to TEXT or BLOB %smay help. In current row "
+ "format, BLOB prefix of %d bytes is stored inline.",
+ MYF(0),
+ page_get_free_space_of_empty(flags &
+ DICT_TF_COMPACT) / 2,
+ prefix ? "or using ROW_FORMAT=DYNAMIC "
+ "or ROW_FORMAT=COMPRESSED ": "",
+ prefix ? DICT_MAX_FIXED_COL_LEN : 0);
return(HA_ERR_TO_BIG_ROW);
+ }
case DB_TOO_BIG_INDEX_COL:
my_error(ER_INDEX_COLUMN_TOO_LONG, MYF(0),
@@ -1487,19 +1499,19 @@ innobase_next_autoinc(
*/
max_value= (~(ulonglong) 0);
- /* Current value should never be greater than the maximum. */
- ut_a(current <= max_value);
-
/* According to MySQL documentation, if the offset is greater than
the step then the offset is ignored. */
if (offset > block) {
offset = 0;
}
- /* Check for overflow. */
+ /* Check for overflow. Current can be > max_value if the value is
+ in reality a negative value.The visual studio compilers converts
+ large double values automatically into unsigned long long datatype
+ maximum value */
if (block >= max_value
|| offset > max_value
- || current == max_value
+ || current >= max_value
|| max_value - offset <= offset) {
next_value = max_value;
@@ -2145,7 +2157,7 @@ trx_is_interrupted(
/*===============*/
trx_t* trx) /*!< in: transaction */
{
- return(trx && trx->mysql_thd && thd_killed((THD*) trx->mysql_thd));
+ return(trx && trx->mysql_thd && thd_kill_level((THD*) trx->mysql_thd));
}
/**********************************************************************//**
@@ -2276,6 +2288,7 @@ innobase_init(
innobase_hton->flags=HTON_NO_FLAGS;
innobase_hton->release_temporary_latches=innobase_release_temporary_latches;
innobase_hton->alter_table_flags = innobase_alter_table_flags;
+ innobase_hton->kill_query = innobase_kill_query;
ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
@@ -3324,6 +3337,35 @@ innobase_close_connection(
}
+/*****************************************************************//**
+Cancel any pending lock request associated with the current THD. */
+static
+void
+innobase_kill_query(
+/*======================*/
+ handlerton* hton, /*!< in: innobase handlerton */
+ THD* thd, /*!< in: MySQL thread being killed */
+ enum thd_kill_levels level) /*!< in: kill level */
+{
+ trx_t* trx;
+ DBUG_ENTER("innobase_kill_query");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ mutex_enter(&kernel_mutex);
+
+ trx = thd_to_trx(thd);
+
+ /* Cancel a pending lock request. */
+ if (trx && trx->wait_lock) {
+ lock_cancel_waiting_and_release(trx->wait_lock);
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ DBUG_VOID_RETURN;
+}
+
+
/*************************************************************************//**
** InnoDB database tables
*****************************************************************************/
@@ -8825,7 +8867,7 @@ ha_innobase::check(
row_mysql_unlock_data_dictionary(prebuilt->trx);
}
- if (thd_killed(user_thd)) {
+ if (thd_kill_level(user_thd)) {
break;
}
@@ -8882,7 +8924,7 @@ ha_innobase::check(
mutex_exit(&kernel_mutex);
prebuilt->trx->op_info = "";
- if (thd_killed(user_thd)) {
+ if (thd_kill_level(user_thd)) {
my_error(ER_QUERY_INTERRUPTED, MYF(0));
}
@@ -11867,8 +11909,8 @@ static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
PLUGIN_VAR_RQCMDARG,
- "Debug flags for InnoDB change buffering (0=none)",
- NULL, NULL, 0, 0, 1, 0);
+ "Debug flags for InnoDB change buffering (0=none, 2=crash at merge)",
+ NULL, NULL, 0, 0, 2, 0);
#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
static MYSQL_SYSVAR_BOOL(random_read_ahead, srv_random_read_ahead,
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index 7ef3e954636..a33af4db2fe 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -80,12 +80,13 @@ class ha_innobase: public handler
uchar* upd_buf; /*!< buffer used in updates */
ulint upd_buf_size; /*!< the size of upd_buf in bytes */
- uchar srch_key_val1[REC_VERSION_56_MAX_INDEX_COL_LEN + 2];
- uchar srch_key_val2[REC_VERSION_56_MAX_INDEX_COL_LEN + 2];
+ uchar srch_key_val1[MAX_KEY_LENGTH + MAX_REF_PARTS*2];
+ uchar srch_key_val2[MAX_KEY_LENGTH + MAX_REF_PARTS*2];
/*!< buffers used in converting
search key values from MySQL format
- to InnoDB format. "+ 2" for the two
- bytes where the length is stored */
+ to InnoDB format. For each column
+ 2 bytes are used to store length,
+ hence MAX_REF_PARTS*2. */
Table_flags int_table_flags;
uint primary_key;
ulong start_of_scan; /*!< this is set to 1 when we are
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index dc65fb3ff1a..451e37f2136 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -112,13 +112,17 @@ innobase_col_to_mysql(
/* These column types should never be shipped to MySQL. */
ut_ad(0);
- case DATA_CHAR:
case DATA_FIXBINARY:
case DATA_FLOAT:
case DATA_DOUBLE:
case DATA_DECIMAL:
/* Above are the valid column types for MySQL data. */
ut_ad(flen == len);
+ /* fall through */
+ case DATA_CHAR:
+ /* We may have flen > len when there is a shorter
+ prefix on a CHAR column. */
+ ut_ad(flen >= len);
#else /* UNIV_DEBUG */
default:
#endif /* UNIV_DEBUG */
@@ -151,7 +155,7 @@ innobase_rec_to_mysql(
field->reset();
- ipos = dict_index_get_nth_col_pos(index, i);
+ ipos = dict_index_get_nth_col_or_prefix_pos(index, i, TRUE);
if (UNIV_UNLIKELY(ipos == ULINT_UNDEFINED)) {
null_field:
diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
index c7887afef92..70af56b99f2 100644
--- a/storage/innobase/ibuf/ibuf0ibuf.c
+++ b/storage/innobase/ibuf/ibuf0ibuf.c
@@ -2868,6 +2868,14 @@ ibuf_get_volume_buffered_count_func(
ut_a(len == 1);
ut_ad(trx_sys_multiple_tablespace_format);
+ if (rec_get_deleted_flag(rec, 0)) {
+ /* This record has been merged already,
+ but apparently the system crashed before
+ the change was discarded from the buffer.
+ Pretend that the record does not exist. */
+ return(0);
+ }
+
types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
switch (UNIV_EXPECT(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
@@ -4176,11 +4184,11 @@ ibuf_delete(
page, 1);
}
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
page_cur_delete_rec(&page_cur, index, offsets, mtr);
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
if (page_zip) {
@@ -4285,6 +4293,22 @@ ibuf_delete_rec(
ut_ad(ibuf_rec_get_page_no(mtr, btr_pcur_get_rec(pcur)) == page_no);
ut_ad(ibuf_rec_get_space(mtr, btr_pcur_get_rec(pcur)) == space);
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+ if (ibuf_debug == 2) {
+ /* Inject a fault (crash). We do this before trying
+ optimistic delete, because a pessimistic delete in the
+ change buffer would require a larger test case. */
+
+ /* Flag the buffered record as processed, to avoid
+ an assertion failure after crash recovery. */
+ btr_cur_set_deleted_flag_for_ibuf(
+ btr_pcur_get_rec(pcur), NULL, TRUE, mtr);
+ ibuf_mtr_commit(mtr);
+ log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE);
+ DBUG_SUICIDE();
+ }
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr);
if (success) {
@@ -4319,7 +4343,13 @@ ibuf_delete_rec(
ut_ad(ibuf_rec_get_page_no(mtr, btr_pcur_get_rec(pcur)) == page_no);
ut_ad(ibuf_rec_get_space(mtr, btr_pcur_get_rec(pcur)) == space);
- /* We have to resort to a pessimistic delete from ibuf */
+ /* We have to resort to a pessimistic delete from ibuf.
+ Delete-mark the record so that it will not be applied again,
+ in case the server crashes before the pessimistic delete is
+ made persistent. */
+ btr_cur_set_deleted_flag_for_ibuf(
+ btr_pcur_get_rec(pcur), NULL, TRUE, mtr);
+
btr_pcur_store_position(pcur, mtr);
ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
@@ -4600,7 +4630,7 @@ loop:
fputs("InnoDB: Discarding record\n ", stderr);
rec_print_old(stderr, rec);
fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
- } else if (block) {
+ } else if (block && !rec_get_deleted_flag(rec, 0)) {
/* Now we have at pcur a record which should be
applied on the index page; NOTE that the call below
copies pointers to fields in rec, and we must
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index cbc6103c2ee..6819f2bc2c5 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -635,7 +635,7 @@ btr_cur_set_deleted_flag_for_ibuf(
when the tablespace is
uncompressed */
ibool val, /*!< in: value to set */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr); /*!< in/out: mini-transaction */
/*######################################################################*/
/** In the pessimistic delete, if the page data size drops below this
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index b609bce9d41..54af14313c4 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -910,6 +910,18 @@ dict_index_get_nth_col_pos(
const dict_index_t* index, /*!< in: index */
ulint n); /*!< in: column number */
/********************************************************************//**
+Looks for column n in an index.
+@return position in internal representation of the index;
+ULINT_UNDEFINED if not contained */
+UNIV_INTERN
+ulint
+dict_index_get_nth_col_or_prefix_pos(
+/*=================================*/
+ const dict_index_t* index, /*!< in: index */
+ ulint n, /*!< in: column number */
+ ibool inc_prefix); /*!< in: TRUE=consider
+ column prefixes too */
+/********************************************************************//**
Returns TRUE if the index contains a column or a prefix of that column.
@return TRUE if contains the column or its prefix */
UNIV_INTERN
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index 4c371c8d5cf..980417715b3 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -377,10 +377,15 @@ struct dict_index_struct{
unsigned type:DICT_IT_BITS;
/*!< index type (DICT_CLUSTERED, DICT_UNIQUE,
DICT_UNIVERSAL, DICT_IBUF, DICT_CORRUPT) */
- unsigned trx_id_offset:10;/*!< position of the trx id column
+#define MAX_KEY_LENGTH_BITS 12
+ unsigned trx_id_offset:MAX_KEY_LENGTH_BITS;
+ /*!< position of the trx id column
in a clustered index record, if the fields
before it are known to be of a fixed size,
0 otherwise */
+#if (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH
+# error (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH
+#endif
unsigned n_user_defined_cols:10;
/*!< number of columns the user defined to
be in the index: in the internal
diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h
index 00c1d0516e6..9cf3b9805bc 100644
--- a/storage/innobase/include/page0zip.h
+++ b/storage/innobase/include/page0zip.h
@@ -156,9 +156,10 @@ page_zip_validate_low(
/*==================*/
const page_zip_des_t* page_zip,/*!< in: compressed page */
const page_t* page, /*!< in: uncompressed page */
+ const dict_index_t* index, /*!< in: index of the page, if known */
ibool sloppy) /*!< in: FALSE=strict,
TRUE=ignore the MIN_REC_FLAG */
- __attribute__((nonnull));
+ __attribute__((nonnull(1,2)));
/**********************************************************************//**
Check that the compressed and decompressed pages match. */
UNIV_INTERN
@@ -166,8 +167,9 @@ ibool
page_zip_validate(
/*==============*/
const page_zip_des_t* page_zip,/*!< in: compressed page */
- const page_t* page) /*!< in: uncompressed page */
- __attribute__((nonnull));
+ const page_t* page, /*!< in: uncompressed page */
+ const dict_index_t* index) /*!< in: index of the page, if known */
+ __attribute__((nonnull(1,2)));
#endif /* UNIV_ZIP_DEBUG */
/**********************************************************************//**
diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h
index 6eb4ca448b3..9420d022e3b 100644
--- a/storage/innobase/include/row0undo.h
+++ b/storage/innobase/include/row0undo.h
@@ -87,10 +87,6 @@ that index record. */
enum undo_exec {
UNDO_NODE_FETCH_NEXT = 1, /*!< we should fetch the next
undo log record */
- UNDO_NODE_PREV_VERS, /*!< the roll ptr to previous
- version of a row is stored in
- node, and undo should be done
- based on it */
UNDO_NODE_INSERT, /*!< undo a fresh insert of a
row to a table */
UNDO_NODE_MODIFY /*!< undo a modify operation
@@ -108,9 +104,6 @@ struct undo_node_struct{
undo_no_t undo_no;/*!< undo number of the record */
ulint rec_type;/*!< undo log record type: TRX_UNDO_INSERT_REC,
... */
- roll_ptr_t new_roll_ptr;
- /*!< roll ptr to restore to clustered index
- record */
trx_id_t new_trx_id; /*!< trx id to restore to clustered index
record */
btr_pcur_t pcur; /*!< persistent cursor used in searching the
diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic
index 10646241125..6706c9f8c69 100644
--- a/storage/innobase/include/row0upd.ic
+++ b/storage/innobase/include/row0upd.ic
@@ -28,6 +28,7 @@ Created 12/27/1996 Heikki Tuuri
# include "trx0trx.h"
# include "trx0undo.h"
# include "row0row.h"
+# include "lock0lock.h"
#endif /* !UNIV_HOTBACKUP */
#include "page0zip.h"
@@ -171,6 +172,8 @@ row_upd_rec_sys_fields(
#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
#endif
+ ut_ad(lock_check_trx_id_sanity(trx_read_trx_id(rec + offset),
+ rec, index, offsets, FALSE));
trx_write_trx_id(rec + offset, trx->id);
trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
}
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index ce7181e7bd5..0b0b4b8d04c 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -295,6 +295,24 @@ management to ensure correct alignment for doubles etc. */
========================
*/
+/** There are currently two InnoDB file formats which are used to group
+features with similar restrictions and dependencies. Using an enum allows
+switch statements to give a compiler warning when a new one is introduced. */
+enum innodb_file_formats_enum {
+ /** Antelope File Format: InnoDB/MySQL up to 5.1.
+ This format includes REDUNDANT and COMPACT row formats */
+ UNIV_FORMAT_A = 0,
+
+ /** Barracuda File Format: Introduced in InnoDB plugin for 5.1:
+ This format includes COMPRESSED and DYNAMIC row formats. It
+ includes the ability to create secondary indexes from data that
+ is not on the clustered index page and the ability to store more
+ data off the clustered index page. */
+ UNIV_FORMAT_B = 1
+};
+
+typedef enum innodb_file_formats_enum innodb_file_formats_t;
+
/* The 2-logarithm of UNIV_PAGE_SIZE: */
#define UNIV_PAGE_SIZE_SHIFT 14
/* The universal page size of the database */
diff --git a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
index 6c55a1badc5..03ba4d12cd0 100644
--- a/storage/innobase/log/log0recv.c
+++ b/storage/innobase/log/log0recv.c
@@ -1636,9 +1636,8 @@ recv_recover_page_func(
if (fil_page_get_type(page) == FIL_PAGE_INDEX) {
page_zip_des_t* page_zip = buf_block_get_page_zip(block);
- if (page_zip) {
- ut_a(page_zip_validate_low(page_zip, page, FALSE));
- }
+ ut_a(!page_zip
+ || page_zip_validate_low(page_zip, page, NULL, FALSE));
}
#endif /* UNIV_ZIP_DEBUG */
diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
index 1068c033871..46eb9c4a935 100644
--- a/storage/innobase/os/os0file.c
+++ b/storage/innobase/os/os0file.c
@@ -1397,6 +1397,13 @@ os_file_create_func(
DWORD create_flag;
DWORD attributes;
ibool retry;
+
+ DBUG_EXECUTE_IF(
+ "ib_create_table_fail_disk_full",
+ *success = FALSE;
+ SetLastError(ERROR_DISK_FULL);
+ return((os_file_t) -1);
+ );
try_again:
ut_a(name);
@@ -1512,6 +1519,12 @@ try_again:
ibool retry;
const char* mode_str = NULL;
+ DBUG_EXECUTE_IF(
+ "ib_create_table_fail_disk_full",
+ *success = FALSE;
+ errno = ENOSPC;
+ return((os_file_t) -1);
+ );
try_again:
ut_a(name);
diff --git a/storage/innobase/page/page0cur.c b/storage/innobase/page/page0cur.c
index 55ba72b4fe9..a722f5b188d 100644
--- a/storage/innobase/page/page0cur.c
+++ b/storage/innobase/page/page0cur.c
@@ -310,7 +310,7 @@ page_cur_search_with_match(
#endif /* UNIV_DEBUG */
page = buf_block_get_frame(block);
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
page_check_dir(page);
@@ -1248,7 +1248,7 @@ page_cur_insert_rec_zip(
ut_ad(!page_rec_is_supremum(*current_rec));
#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page));
+ ut_a(page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
/* 1. Get the size of the physical record in the page */
@@ -1973,7 +1973,7 @@ page_cur_delete_rec(
}
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
}
diff --git a/storage/innobase/page/page0page.c b/storage/innobase/page/page0page.c
index 78135b0aff2..81051f8f4fe 100644
--- a/storage/innobase/page/page0page.c
+++ b/storage/innobase/page/page0page.c
@@ -626,7 +626,7 @@ page_copy_rec_list_end(
Furthermore, btr_compress() may set FIL_PAGE_PREV to
FIL_NULL on new_page while leaving it intact on
new_page_zip. So, we cannot validate new_page_zip. */
- ut_a(page_zip_validate_low(page_zip, page, TRUE));
+ ut_a(page_zip_validate_low(page_zip, page, index, TRUE));
}
#endif /* UNIV_ZIP_DEBUG */
ut_ad(buf_block_get_frame(block) == page);
@@ -946,7 +946,7 @@ page_delete_rec_list_end(
ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE);
ut_ad(!page_zip || page_rec_is_comp(rec));
#ifdef UNIV_ZIP_DEBUG
- ut_a(!page_zip || page_zip_validate(page_zip, page));
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
if (page_rec_is_infimum(rec)) {
@@ -988,7 +988,7 @@ page_delete_rec_list_end(
ULINT_UNDEFINED, &heap);
rec = rec_get_next_ptr(rec, TRUE);
#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page));
+ ut_a(page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
page_cur_delete_rec(&cur, index, offsets, mtr);
} while (page_offset(rec) != PAGE_NEW_SUPREMUM);
@@ -1128,7 +1128,8 @@ page_delete_rec_list_start(
between btr_attach_half_pages() and insert_page = ...
when btr_page_get_split_rec_to_left() holds
(direction == FSP_DOWN). */
- ut_a(!page_zip || page_zip_validate_low(page_zip, page, TRUE));
+ ut_a(!page_zip
+ || page_zip_validate_low(page_zip, page, index, TRUE));
}
#endif /* UNIV_ZIP_DEBUG */
@@ -1199,9 +1200,10 @@ page_move_rec_list_end(
= buf_block_get_page_zip(block);
ut_a(!new_page_zip == !page_zip);
ut_a(!new_page_zip
- || page_zip_validate(new_page_zip, new_page));
+ || page_zip_validate(new_page_zip, new_page, index));
ut_a(!page_zip
- || page_zip_validate(page_zip, page_align(split_rec)));
+ || page_zip_validate(page_zip, page_align(split_rec),
+ index));
}
#endif /* UNIV_ZIP_DEBUG */
diff --git a/storage/innobase/page/page0zip.c b/storage/innobase/page/page0zip.c
index ca3836689d3..9f895e60803 100644
--- a/storage/innobase/page/page0zip.c
+++ b/storage/innobase/page/page0zip.c
@@ -1433,7 +1433,7 @@ err_exit:
page_zip_get_size(page_zip) - PAGE_DATA);
mem_heap_free(heap);
#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page));
+ ut_a(page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
if (mtr) {
@@ -3119,6 +3119,7 @@ page_zip_validate_low(
/*==================*/
const page_zip_des_t* page_zip,/*!< in: compressed page */
const page_t* page, /*!< in: uncompressed page */
+ const dict_index_t* index, /*!< in: index of the page, if known */
ibool sloppy) /*!< in: FALSE=strict,
TRUE=ignore the MIN_REC_FLAG */
{
@@ -3206,39 +3207,102 @@ page_zip_validate_low(
committed. Let us tolerate that difference when we
are performing a sloppy validation. */
- if (sloppy) {
- byte info_bits_diff;
- ulint offset
- = rec_get_next_offs(page + PAGE_NEW_INFIMUM,
- TRUE);
- ut_a(offset >= PAGE_NEW_SUPREMUM);
- offset -= 5 /* REC_NEW_INFO_BITS */;
-
- info_bits_diff = page[offset] ^ temp_page[offset];
-
- if (info_bits_diff == REC_INFO_MIN_REC_FLAG) {
- temp_page[offset] = page[offset];
-
- if (!memcmp(page + PAGE_HEADER,
- temp_page + PAGE_HEADER,
- UNIV_PAGE_SIZE - PAGE_HEADER
- - FIL_PAGE_DATA_END)) {
-
- /* Only the minimum record flag
- differed. Let us ignore it. */
- page_zip_fail(("page_zip_validate: "
- "min_rec_flag "
- "(ignored, "
- "%lu,%lu,0x%02lx)\n",
- page_get_space_id(page),
- page_get_page_no(page),
- (ulong) page[offset]));
- goto func_exit;
+ ulint* offsets;
+ mem_heap_t* heap;
+ const rec_t* rec;
+ const rec_t* trec;
+ byte info_bits_diff;
+ ulint offset
+ = rec_get_next_offs(page + PAGE_NEW_INFIMUM, TRUE);
+ ut_a(offset >= PAGE_NEW_SUPREMUM);
+ offset -= 5/*REC_NEW_INFO_BITS*/;
+
+ info_bits_diff = page[offset] ^ temp_page[offset];
+
+ if (info_bits_diff == REC_INFO_MIN_REC_FLAG) {
+ temp_page[offset] = page[offset];
+
+ if (!memcmp(page + PAGE_HEADER,
+ temp_page + PAGE_HEADER,
+ UNIV_PAGE_SIZE - PAGE_HEADER
+ - FIL_PAGE_DATA_END)) {
+
+ /* Only the minimum record flag
+ differed. Let us ignore it. */
+ page_zip_fail(("page_zip_validate: "
+ "min_rec_flag "
+ "(%s"
+ "%lu,%lu,0x%02lx)\n",
+ sloppy ? "ignored, " : "",
+ page_get_space_id(page),
+ page_get_page_no(page),
+ (ulong) page[offset]));
+ valid = sloppy;
+ goto func_exit;
+ }
+ }
+
+ /* Compare the pointers in the PAGE_FREE list. */
+ rec = page_header_get_ptr(page, PAGE_FREE);
+ trec = page_header_get_ptr(temp_page, PAGE_FREE);
+
+ while (rec || trec) {
+ if (page_offset(rec) != page_offset(trec)) {
+ page_zip_fail(("page_zip_validate: "
+ "PAGE_FREE list: %u!=%u\n",
+ (unsigned) page_offset(rec),
+ (unsigned) page_offset(trec)));
+ valid = FALSE;
+ goto func_exit;
+ }
+
+ rec = page_rec_get_next_low(rec, TRUE);
+ trec = page_rec_get_next_low(trec, TRUE);
+ }
+
+ /* Compare the records. */
+ heap = NULL;
+ offsets = NULL;
+ rec = page_rec_get_next_low(
+ page + PAGE_NEW_INFIMUM, TRUE);
+ trec = page_rec_get_next_low(
+ temp_page + PAGE_NEW_INFIMUM, TRUE);
+
+ do {
+ if (page_offset(rec) != page_offset(trec)) {
+ page_zip_fail(("page_zip_validate: "
+ "record list: 0x%02x!=0x%02x\n",
+ (unsigned) page_offset(rec),
+ (unsigned) page_offset(trec)));
+ valid = FALSE;
+ break;
+ }
+
+ if (index) {
+ /* Compare the data. */
+ offsets = rec_get_offsets(
+ rec, index, offsets,
+ ULINT_UNDEFINED, &heap);
+
+ if (memcmp(rec - rec_offs_extra_size(offsets),
+ trec - rec_offs_extra_size(offsets),
+ rec_offs_size(offsets))) {
+ page_zip_fail(
+ ("page_zip_validate: "
+ "record content: 0x%02x",
+ (unsigned) page_offset(rec)));
+ valid = FALSE;
+ break;
}
}
+
+ rec = page_rec_get_next_low(rec, TRUE);
+ trec = page_rec_get_next_low(trec, TRUE);
+ } while (rec || trec);
+
+ if (heap) {
+ mem_heap_free(heap);
}
- page_zip_fail(("page_zip_validate: content\n"));
- valid = FALSE;
}
func_exit:
@@ -3260,9 +3324,10 @@ ibool
page_zip_validate(
/*==============*/
const page_zip_des_t* page_zip,/*!< in: compressed page */
- const page_t* page) /*!< in: uncompressed page */
+ const page_t* page, /*!< in: uncompressed page */
+ const dict_index_t* index) /*!< in: index of the page, if known */
{
- return(page_zip_validate_low(page_zip, page,
+ return(page_zip_validate_low(page_zip, page, index,
recv_recovery_is_on()));
}
#endif /* UNIV_ZIP_DEBUG */
@@ -3593,7 +3658,7 @@ page_zip_write_rec(
page_zip->m_nonempty = TRUE;
#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page_align(rec)));
+ ut_a(page_zip_validate(page_zip, page_align(rec), index));
#endif /* UNIV_ZIP_DEBUG */
}
@@ -3640,7 +3705,7 @@ corrupt:
}
#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page));
+ ut_a(page_zip_validate(page_zip, page, NULL));
#endif /* UNIV_ZIP_DEBUG */
memcpy(page + offset,
@@ -3649,7 +3714,7 @@ corrupt:
ptr + 4, BTR_EXTERN_FIELD_REF_SIZE);
#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page));
+ ut_a(page_zip_validate(page_zip, page, NULL));
#endif /* UNIV_ZIP_DEBUG */
}
@@ -3716,7 +3781,7 @@ page_zip_write_blob_ptr(
memcpy(externs, field, BTR_EXTERN_FIELD_REF_SIZE);
#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page));
+ ut_a(page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
if (mtr) {
@@ -3787,7 +3852,7 @@ corrupt:
}
#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page));
+ ut_a(page_zip_validate(page_zip, page, NULL));
#endif /* UNIV_ZIP_DEBUG */
field = page + offset;
@@ -3808,7 +3873,7 @@ corrupt:
memcpy(storage, ptr + 4, REC_NODE_PTR_SIZE);
#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page));
+ ut_a(page_zip_validate(page_zip, page, NULL));
#endif /* UNIV_ZIP_DEBUG */
}
@@ -4035,7 +4100,7 @@ page_zip_clear_rec(
}
#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page));
+ ut_a(page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
}
@@ -4059,7 +4124,7 @@ page_zip_rec_set_deleted(
*slot &= ~(PAGE_ZIP_DIR_SLOT_DEL >> 8);
}
#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page_align(rec)));
+ ut_a(page_zip_validate(page_zip, page_align(rec), NULL));
#endif /* UNIV_ZIP_DEBUG */
}
@@ -4360,14 +4425,14 @@ corrupt:
goto corrupt;
}
#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page));
+ ut_a(page_zip_validate(page_zip, page, NULL));
#endif /* UNIV_ZIP_DEBUG */
memcpy(page + offset, ptr, len);
memcpy(page_zip->data + offset, ptr, len);
#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page));
+ ut_a(page_zip_validate(page_zip, page, NULL));
#endif /* UNIV_ZIP_DEBUG */
}
@@ -4445,7 +4510,7 @@ page_zip_reorganize(
ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
ut_ad(page_is_comp(page));
ut_ad(!dict_index_is_ibuf(index));
- /* Note that page_zip_validate(page_zip, page) may fail here. */
+ /* Note that page_zip_validate(page_zip, page, index) may fail here. */
UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE);
UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
@@ -4532,7 +4597,7 @@ page_zip_copy_recs(
FIL_PAGE_PREV or PAGE_LEVEL, causing a temporary min_rec_flag
mismatch. A strict page_zip_validate() will be executed later
during the B-tree operations. */
- ut_a(page_zip_validate_low(src_zip, src, TRUE));
+ ut_a(page_zip_validate_low(src_zip, src, index, TRUE));
#endif /* UNIV_ZIP_DEBUG */
ut_a(page_zip_get_size(page_zip) == page_zip_get_size(src_zip));
if (UNIV_UNLIKELY(src_zip->n_blobs)) {
@@ -4593,7 +4658,7 @@ page_zip_copy_recs(
}
#ifdef UNIV_ZIP_DEBUG
- ut_a(page_zip_validate(page_zip, page));
+ ut_a(page_zip_validate(page_zip, page, index));
#endif /* UNIV_ZIP_DEBUG */
btr_blob_dbg_add(page, index, "page_zip_copy_recs");
diff --git a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c
index 20e8c13ea70..7e1d18d41b6 100644
--- a/storage/innobase/row/row0mysql.c
+++ b/storage/innobase/row/row0mysql.c
@@ -1855,7 +1855,8 @@ Creates a table for MySQL. If the name of the table ends in
one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor",
"innodb_table_monitor", then this will also start the printing of monitor
output by the master thread. If the table name ends in "innodb_mem_validate",
-InnoDB will try to invoke mem_validate().
+InnoDB will try to invoke mem_validate(). On failure the transaction will
+be rolled back and the 'table' object will be freed.
@return error code or DB_SUCCESS */
UNIV_INTERN
int
@@ -1993,6 +1994,8 @@ err_exit:
row_drop_table_for_mysql(table->name, trx, FALSE);
trx_commit_for_mysql(trx);
+ } else {
+ dict_mem_table_free(table);
}
break;
diff --git a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c
index 7bec0a26225..b46b2eacd9d 100644
--- a/storage/innobase/row/row0sel.c
+++ b/storage/innobase/row/row0sel.c
@@ -2487,6 +2487,9 @@ row_sel_convert_mysql_key_to_innobase(
dfield++;
}
+ DBUG_EXECUTE_IF("innodb_srch_key_buffer_full",
+ ut_a(buf == (original_buf + buf_len)););
+
ut_a(buf <= original_buf + buf_len);
/* We set the length of tuple to n_fields: we assume that the memory
@@ -3915,6 +3918,11 @@ wait_table_again:
}
rec_loop:
+ if (trx_is_interrupted(trx)) {
+ err = DB_INTERRUPTED;
+ goto normal_return;
+ }
+
/*-------------------------------------------------------------*/
/* PHASE 4: Look for matching records in a loop */
@@ -4839,11 +4847,15 @@ row_search_autoinc_read_column(
rec_offs_init(offsets_);
- offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap);
+ offsets = rec_get_offsets(rec, index, offsets, col_no + 1, &heap);
- data = rec_get_nth_field(rec, offsets, col_no, &len);
+ if (rec_offs_nth_sql_null(offsets, col_no)) {
+ /* There is no non-NULL value in the auto-increment column. */
+ value = 0;
+ goto func_exit;
+ }
- ut_a(len != UNIV_SQL_NULL);
+ data = rec_get_nth_field(rec, offsets, col_no, &len);
switch (mtype) {
case DATA_INT:
@@ -4865,14 +4877,15 @@ row_search_autoinc_read_column(
ut_error;
}
- if (UNIV_LIKELY_NULL(heap)) {
- mem_heap_free(heap);
- }
-
if (!unsigned_type && (ib_int64_t) value < 0) {
value = 0;
}
+func_exit:
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+
return(value);
}
diff --git a/storage/innobase/row/row0umod.c b/storage/innobase/row/row0umod.c
index 9597c476125..9e5fb8686c6 100644
--- a/storage/innobase/row/row0umod.c
+++ b/storage/innobase/row/row0umod.c
@@ -69,36 +69,6 @@ If you make a change in this module make sure that no codepath is
introduced where a call to log_free_check() is bypassed. */
/***********************************************************//**
-Checks if also the previous version of the clustered index record was
-modified or inserted by the same transaction, and its undo number is such
-that it should be undone in the same rollback.
-@return TRUE if also previous modify or insert of this row should be undone */
-static
-ibool
-row_undo_mod_undo_also_prev_vers(
-/*=============================*/
- undo_node_t* node, /*!< in: row undo node */
- undo_no_t* undo_no)/*!< out: the undo number */
-{
- trx_undo_rec_t* undo_rec;
- trx_t* trx;
-
- trx = node->trx;
-
- if (node->new_trx_id != trx->id) {
-
- *undo_no = 0;
- return(FALSE);
- }
-
- undo_rec = trx_undo_get_undo_rec_low(node->new_roll_ptr, node->heap);
-
- *undo_no = trx_undo_rec_get_undo_no(undo_rec);
-
- return(trx->roll_limit <= *undo_no);
-}
-
-/***********************************************************//**
Undoes a modify in a clustered index record.
@return DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
static
@@ -226,19 +196,11 @@ row_undo_mod_clust(
btr_pcur_t* pcur;
mtr_t mtr;
ulint err;
- ibool success;
- ibool more_vers;
- undo_no_t new_undo_no;
ut_ad(node && thr);
log_free_check();
- /* Check if also the previous version of the clustered index record
- should be undone in this same rollback operation */
-
- more_vers = row_undo_mod_undo_also_prev_vers(node, &new_undo_no);
-
pcur = &(node->pcur);
mtr_start(&mtr);
@@ -286,20 +248,6 @@ row_undo_mod_clust(
trx_undo_rec_release(node->trx, node->undo_no);
- if (more_vers && err == DB_SUCCESS) {
-
- /* Reserve the undo log record to the prior version after
- committing &mtr: this is necessary to comply with the latching
- order, as &mtr may contain the fsp latch which is lower in
- the latch hierarchy than trx->undo_mutex. */
-
- success = trx_undo_rec_reserve(node->trx, new_undo_no);
-
- if (success) {
- node->state = UNDO_NODE_PREV_VERS;
- }
- }
-
return(err);
}
@@ -847,7 +795,6 @@ row_undo_mod_parse_undo_rec(
trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id,
roll_ptr, info_bits, trx,
node->heap, &(node->update));
- node->new_roll_ptr = roll_ptr;
node->new_trx_id = trx_id;
node->cmpl_info = cmpl_info;
}
diff --git a/storage/innobase/row/row0undo.c b/storage/innobase/row/row0undo.c
index 09970b7fe21..a1c1d72f8c6 100644
--- a/storage/innobase/row/row0undo.c
+++ b/storage/innobase/row/row0undo.c
@@ -282,25 +282,6 @@ row_undo(
} else {
node->state = UNDO_NODE_MODIFY;
}
-
- } else if (node->state == UNDO_NODE_PREV_VERS) {
-
- /* Undo should be done to the same clustered index record
- again in this same rollback, restoring the previous version */
-
- roll_ptr = node->new_roll_ptr;
-
- node->undo_rec = trx_undo_get_undo_rec_low(roll_ptr,
- node->heap);
- node->roll_ptr = roll_ptr;
- node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec);
-
- if (trx_undo_roll_ptr_is_insert(roll_ptr)) {
-
- node->state = UNDO_NODE_INSERT;
- } else {
- node->state = UNDO_NODE_MODIFY;
- }
}
/* Prevent DROP TABLE etc. while we are rolling back this row.
diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc
index 632236dc8fe..6264a342d7a 100644
--- a/storage/maria/ha_maria.cc
+++ b/storage/maria/ha_maria.cc
@@ -3052,7 +3052,10 @@ int ha_maria::create(const char *name, register TABLE *table_arg,
ha_create_info->transactional != HA_CHOICE_NO);
if (ha_create_info->options & HA_LEX_CREATE_TMP_TABLE)
+ {
create_flags|= HA_CREATE_TMP_TABLE;
+ create_info.transactional= 0;
+ }
if (ha_create_info->options & HA_CREATE_KEEP_FILES)
create_flags|= HA_CREATE_KEEP_FILES;
if (options & HA_OPTION_PACK_RECORD)
diff --git a/storage/myisam/mi_open.c b/storage/myisam/mi_open.c
index 305984ee94d..438057e22df 100644
--- a/storage/myisam/mi_open.c
+++ b/storage/myisam/mi_open.c
@@ -237,7 +237,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags)
}
/* sanity check */
- if (share->base.keystart > 65535 || share->base.rec_reflength > 8)
+ if (share->base.keystart > 65535 ||
+ share->base.rec_reflength > 8 || share->base.key_reflength > 7)
{
my_errno=HA_ERR_CRASHED;
goto err;
diff --git a/storage/myisam/myisamchk.c b/storage/myisam/myisamchk.c
index 7d8b577e8b0..c8546ee56f5 100644
--- a/storage/myisam/myisamchk.c
+++ b/storage/myisam/myisamchk.c
@@ -291,7 +291,14 @@ static struct my_option my_long_options[] =
&check_param.write_buffer_length, 0, GET_ULONG, REQUIRED_ARG,
(long) READ_BUFFER_INIT, (long) MALLOC_OVERHEAD,
INT_MAX32, (long) MALLOC_OVERHEAD, (long) 1L, 0},
- { "sort_buffer_size", OPT_SORT_BUFFER_SIZE, "",
+ { "sort_buffer_size", OPT_SORT_BUFFER_SIZE,
+ "Deprecated. myisam_sort_buffer_size alias is being used",
+ &check_param.sort_buffer_length,
+ &check_param.sort_buffer_length, 0, GET_ULL, REQUIRED_ARG,
+ (long) SORT_BUFFER_INIT, (long) (MIN_SORT_BUFFER + MALLOC_OVERHEAD),
+ SIZE_T_MAX, (long) MALLOC_OVERHEAD, (long) 1L, 0},
+ { "myisam_sort_buffer_size", OPT_SORT_BUFFER_SIZE,
+ "Alias of sort_buffer_size parameter",
&check_param.sort_buffer_length,
&check_param.sort_buffer_length, 0, GET_ULL, REQUIRED_ARG,
(long) SORT_BUFFER_INIT, (long) (MIN_SORT_BUFFER + MALLOC_OVERHEAD),
diff --git a/storage/perfschema/pfs.cc b/storage/perfschema/pfs.cc
index 9a6ada2f814..104fecb2d1e 100644
--- a/storage/perfschema/pfs.cc
+++ b/storage/perfschema/pfs.cc
@@ -119,7 +119,7 @@
mode when a server is installed.
The implementation of the database creation script is located in
- @verbatim ./scripts/mysql_system_tables.sql @endverbatim
+ @verbatim ./scripts/mysql_performance_tables.sql @endverbatim
@subsection INT_CONFIG Runtime configuration interface
diff --git a/storage/sphinx/ha_sphinx.cc b/storage/sphinx/ha_sphinx.cc
index 97e83cd9221..3305875c124 100644
--- a/storage/sphinx/ha_sphinx.cc
+++ b/storage/sphinx/ha_sphinx.cc
@@ -3591,12 +3591,12 @@ struct st_mysql_storage_engine sphinx_storage_engine =
struct st_mysql_show_var sphinx_status_vars[] =
{
- {"sphinx_total", (char *)sphinx_showfunc_total, SHOW_SIMPLE_FUNC},
- {"sphinx_total_found", (char *)sphinx_showfunc_total_found, SHOW_SIMPLE_FUNC},
- {"sphinx_time", (char *)sphinx_showfunc_time, SHOW_SIMPLE_FUNC},
- {"sphinx_word_count", (char *)sphinx_showfunc_word_count, SHOW_SIMPLE_FUNC},
- {"sphinx_words", (char *)sphinx_showfunc_words, SHOW_SIMPLE_FUNC},
- {"sphinx_error", (char *)sphinx_showfunc_error, SHOW_SIMPLE_FUNC},
+ {"Sphinx_total", (char *)sphinx_showfunc_total, SHOW_SIMPLE_FUNC},
+ {"Sphinx_total_found", (char *)sphinx_showfunc_total_found, SHOW_SIMPLE_FUNC},
+ {"Sphinx_time", (char *)sphinx_showfunc_time, SHOW_SIMPLE_FUNC},
+ {"Sphinx_word_count", (char *)sphinx_showfunc_word_count, SHOW_SIMPLE_FUNC},
+ {"Sphinx_words", (char *)sphinx_showfunc_words, SHOW_SIMPLE_FUNC},
+ {"Sphinx_error", (char *)sphinx_showfunc_error, SHOW_SIMPLE_FUNC},
{0, 0, (enum_mysql_show_type)0}
};
diff --git a/storage/xtradb/btr/btr0cur.c b/storage/xtradb/btr/btr0cur.c
index 61c07ac792e..687853a422e 100644
--- a/storage/xtradb/btr/btr0cur.c
+++ b/storage/xtradb/btr/btr0cur.c
@@ -239,6 +239,7 @@ btr_cur_latch_leaves(
mtr_t* mtr) /*!< in: mtr */
{
ulint mode;
+ ulint sibling_mode;
ulint left_page_no;
ulint right_page_no;
buf_block_t* get_block;
@@ -261,14 +262,21 @@ btr_cur_latch_leaves(
#endif /* UNIV_BTR_DEBUG */
get_block->check_index_page_at_flush = TRUE;
return;
+ case BTR_SEARCH_TREE:
case BTR_MODIFY_TREE:
- /* x-latch also brothers from left to right */
+ if (UNIV_UNLIKELY(latch_mode == BTR_SEARCH_TREE)) {
+ mode = RW_S_LATCH;
+ sibling_mode = RW_NO_LATCH;
+ } else {
+ mode = sibling_mode = RW_X_LATCH;
+ }
+ /* Fetch and possibly latch also brothers from left to right */
left_page_no = btr_page_get_prev(page, mtr);
if (left_page_no != FIL_NULL) {
get_block = btr_block_get(
space, zip_size, left_page_no,
- RW_X_LATCH, cursor->index, mtr);
+ sibling_mode, cursor->index, mtr);
if (srv_pass_corrupt_table && !get_block) {
return;
@@ -280,12 +288,21 @@ btr_cur_latch_leaves(
ut_a(btr_page_get_next(get_block->frame, mtr)
== page_get_page_no(page));
#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
+ if (sibling_mode == RW_NO_LATCH) {
+ /* btr_block_get() called with RW_NO_LATCH will
+ fix the read block in the buffer. This serves
+ no purpose for the fake changes prefetching,
+ thus we unfix the sibling blocks immediately.*/
+ mtr_memo_release(mtr, get_block,
+ MTR_MEMO_BUF_FIX);
+ } else {
+ get_block->check_index_page_at_flush = TRUE;
+ }
}
get_block = btr_block_get(
space, zip_size, page_no,
- RW_X_LATCH, cursor->index, mtr);
+ mode, cursor->index, mtr);
if (srv_pass_corrupt_table && !get_block) {
return;
@@ -301,7 +318,7 @@ btr_cur_latch_leaves(
if (right_page_no != FIL_NULL) {
get_block = btr_block_get(
space, zip_size, right_page_no,
- RW_X_LATCH, cursor->index, mtr);
+ sibling_mode, cursor->index, mtr);
if (srv_pass_corrupt_table && !get_block) {
return;
@@ -313,7 +330,12 @@ btr_cur_latch_leaves(
ut_a(btr_page_get_prev(get_block->frame, mtr)
== page_get_page_no(page));
#endif /* UNIV_BTR_DEBUG */
- get_block->check_index_page_at_flush = TRUE;
+ if (sibling_mode == RW_NO_LATCH) {
+ mtr_memo_release(mtr, get_block,
+ MTR_MEMO_BUF_FIX);
+ } else {
+ get_block->check_index_page_at_flush = TRUE;
+ }
}
return;
@@ -1566,6 +1588,9 @@ btr_cur_pessimistic_insert(
}
if (!(flags & BTR_NO_UNDO_LOG_FLAG)) {
+
+ ut_a(cursor->tree_height != ULINT_UNDEFINED);
+
/* First reserve enough free space for the file segments
of the index tree, so that the insert will not fail because
of lack of space */
@@ -1860,7 +1885,8 @@ btr_cur_update_alloc_zip(
ulint length, /*!< in: size needed */
ibool create, /*!< in: TRUE=delete-and-insert,
FALSE=update-in-place */
- mtr_t* mtr) /*!< in: mini-transaction */
+ mtr_t* mtr, /*!< in: mini-transaction */
+ trx_t* trx) /*!< in: NULL or transaction */
{
ut_a(page_zip == buf_block_get_page_zip(block));
ut_ad(page_zip);
@@ -1877,6 +1903,14 @@ btr_cur_update_alloc_zip(
return(FALSE);
}
+ if (trx && trx->fake_changes) {
+ /* Don't call page_zip_compress_write_log_no_data as that has
+ assert which would fail. Assume there won't be a compression
+ failure. */
+
+ return TRUE;
+ }
+
if (!page_zip_compress(page_zip, buf_block_get_frame(block),
index, mtr)) {
/* Unable to compress the page */
@@ -1960,7 +1994,8 @@ btr_cur_update_in_place(
/* Check that enough space is available on the compressed page. */
if (page_zip
&& !btr_cur_update_alloc_zip(page_zip, block, index,
- rec_offs_size(offsets), FALSE, mtr)) {
+ rec_offs_size(offsets), FALSE, mtr,
+ trx)) {
return(DB_ZIP_OVERFLOW);
}
@@ -2159,7 +2194,8 @@ any_extern:
if (page_zip
&& !btr_cur_update_alloc_zip(page_zip, block, index,
- new_rec_size, TRUE, mtr)) {
+ new_rec_size, TRUE, mtr,
+ thr_get_trx(thr))) {
err = DB_ZIP_OVERFLOW;
goto err_exit;
}
@@ -2402,7 +2438,15 @@ btr_cur_pessimistic_update(
of the index tree, so that the update will not fail because
of lack of space */
- n_extents = cursor->tree_height / 16 + 3;
+ if (UNIV_UNLIKELY(cursor->tree_height == ULINT_UNDEFINED)) {
+ /* When the tree height is uninitialized due to fake
+ changes, reserve some hardcoded number of extents. */
+ ut_a(thr && thr_get_trx(thr)->fake_changes);
+ n_extents = 3;
+ }
+ else {
+ n_extents = cursor->tree_height / 16 + 3;
+ }
if (flags & BTR_NO_UNDO_LOG_FLAG) {
reserve_flag = FSP_CLEANING;
@@ -2439,7 +2483,7 @@ btr_cur_pessimistic_update(
itself. Thus the following call is safe. */
row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update,
FALSE, *heap);
- if (!(flags & BTR_KEEP_SYS_FLAG)) {
+ if (!(flags & BTR_KEEP_SYS_FLAG) && !trx->fake_changes) {
row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR,
roll_ptr);
row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID,
@@ -3210,6 +3254,8 @@ btr_cur_pessimistic_delete(
of the index tree, so that the node pointer updates will
not fail because of lack of space */
+ ut_a(cursor->tree_height != ULINT_UNDEFINED);
+
n_extents = cursor->tree_height / 32 + 1;
success = fsp_reserve_free_extents(&n_reserved,
diff --git a/storage/xtradb/btr/btr0pcur.c b/storage/xtradb/btr/btr0pcur.c
index b335e2c8aee..a1b7affdeb7 100644
--- a/storage/xtradb/btr/btr0pcur.c
+++ b/storage/xtradb/btr/btr0pcur.c
@@ -47,6 +47,7 @@ btr_pcur_create_for_mysql(void)
pcur->btr_cur.index = NULL;
btr_pcur_init(pcur);
+ pcur->btr_cur.tree_height = ULINT_UNDEFINED;
return(pcur);
}
diff --git a/storage/xtradb/buf/buf0lru.c b/storage/xtradb/buf/buf0lru.c
index a6a1f8dcf9c..14b5c65132c 100644
--- a/storage/xtradb/buf/buf0lru.c
+++ b/storage/xtradb/buf/buf0lru.c
@@ -239,9 +239,11 @@ buf_LRU_drop_page_hash_batch(
When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page
hash index entries belonging to that table. This function tries to
do that in batch. Note that this is a 'best effort' attempt and does
-not guarantee that ALL hash entries will be removed. */
+not guarantee that ALL hash entries will be removed.
+
+@return number of hashed pages found*/
static
-void
+ulint
buf_LRU_drop_page_hash_for_tablespace(
/*==================================*/
buf_pool_t* buf_pool, /*!< in: buffer pool instance */
@@ -251,13 +253,14 @@ buf_LRU_drop_page_hash_for_tablespace(
ulint* page_arr;
ulint num_entries;
ulint zip_size;
+ ulint num_found = 0;
zip_size = fil_space_get_zip_size(id);
if (UNIV_UNLIKELY(zip_size == ULINT_UNDEFINED)) {
/* Somehow, the tablespace does not exist. Nothing to drop. */
ut_ad(0);
- return;
+ return num_found;
}
page_arr = ut_malloc(
@@ -315,6 +318,7 @@ next_page:
ut_a(num_entries < BUF_LRU_DROP_SEARCH_SIZE);
++num_entries;
+ ++num_found;
if (num_entries < BUF_LRU_DROP_SEARCH_SIZE) {
goto next_page;
@@ -370,6 +374,8 @@ next_page:
/* Drop any remaining batch of search hashed pages. */
buf_LRU_drop_page_hash_batch(id, zip_size, page_arr, num_entries);
ut_free(page_arr);
+
+ return num_found;
}
/******************************************************************//**
@@ -814,8 +820,6 @@ buf_LRU_mark_space_was_deleted(
for (i = 0; i < srv_buf_pool_instances; i++) {
buf_pool_t* buf_pool;
buf_page_t* bpage;
- buf_chunk_t* chunk;
- ulint j, k;
buf_pool = buf_pool_from_array(i);
@@ -832,28 +836,10 @@ buf_LRU_mark_space_was_deleted(
mutex_exit(&buf_pool->LRU_list_mutex);
- btr_search_s_lock_all();
- chunk = buf_pool->chunks;
- for (j = buf_pool->n_chunks; j--; chunk++) {
- buf_block_t* block = chunk->blocks;
- for (k = chunk->size; k--; block++) {
- if (buf_block_get_state(block)
- != BUF_BLOCK_FILE_PAGE
- || !block->index
- || buf_page_get_space(&block->page) != id) {
- continue;
- }
-
- btr_search_s_unlock_all();
-
- rw_lock_x_lock(&block->lock);
- btr_search_drop_page_hash_index(block);
- rw_lock_x_unlock(&block->lock);
-
- btr_search_s_lock_all();
- }
- }
- btr_search_s_unlock_all();
+ /* The AHI entries for the tablespace being deleted should be
+ removed by now. */
+ ut_ad(buf_LRU_drop_page_hash_for_tablespace(buf_pool, id)
+ == 0);
}
}
diff --git a/storage/xtradb/buf/buf0rea.c b/storage/xtradb/buf/buf0rea.c
index 67379d614a0..6d76a488af7 100644
--- a/storage/xtradb/buf/buf0rea.c
+++ b/storage/xtradb/buf/buf0rea.c
@@ -235,6 +235,9 @@ not_to_recover:
sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
((buf_block_t*) bpage)->frame, bpage, trx);
}
+ if(sync) {
+ thd_wait_end(NULL);
+ }
if (*err == DB_TABLESPACE_DELETED) {
buf_read_page_handle_error(bpage);
@@ -250,7 +253,6 @@ not_to_recover:
}
if (sync) {
- thd_wait_end(NULL);
/* The i/o is already completed when we arrive from
fil_read */
if (!buf_page_io_complete(bpage)) {
diff --git a/storage/xtradb/fsp/fsp0fsp.c b/storage/xtradb/fsp/fsp0fsp.c
index d4a2745b90b..5cbc74b0862 100644
--- a/storage/xtradb/fsp/fsp0fsp.c
+++ b/storage/xtradb/fsp/fsp0fsp.c
@@ -3031,7 +3031,11 @@ try_again:
some of them will contain extent descriptor pages, and therefore
will not be free extents */
- n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
+ if (size <= free_limit) {
+ n_free_up = 0;
+ } else {
+ n_free_up = (size - free_limit) / FSP_EXTENT_SIZE;
+ }
if (n_free_up > 0) {
n_free_up--;
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index b37112db773..5a09bb36292 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -381,6 +381,7 @@ static PSI_file_info all_innodb_files[] = {
static INNOBASE_SHARE *get_share(const char *table_name);
static void free_share(INNOBASE_SHARE *share);
static int innobase_close_connection(handlerton *hton, THD* thd);
+static void innobase_kill_query(handlerton *hton, THD* thd, enum thd_kill_levels level);
static void innobase_commit_ordered(handlerton *hton, THD* thd, bool all);
static int innobase_commit(handlerton *hton, THD* thd, bool all);
static int innobase_rollback(handlerton *hton, THD* thd, bool all);
@@ -1053,6 +1054,13 @@ thd_to_trx(
return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr));
}
+my_bool
+ha_innobase::is_fake_change_enabled(THD* thd)
+{
+ trx_t* trx = thd_to_trx(thd);
+ return(trx && trx->fake_changes);
+}
+
/********************************************************************//**
Call this function when mysqld passes control to the client. That is to
avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more
@@ -1117,8 +1125,7 @@ convert_error_code_to_mysql(
return(0);
case DB_INTERRUPTED:
- my_error(ER_QUERY_INTERRUPTED, MYF(0));
- /* fall through */
+ return(HA_ERR_ABORTED_BY_USER);
case DB_FOREIGN_EXCEED_MAX_CASCADE:
push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
@@ -2382,7 +2389,7 @@ trx_is_interrupted(
/*===============*/
trx_t* trx) /*!< in: transaction */
{
- return(trx && trx->mysql_thd && thd_killed((THD*) trx->mysql_thd));
+ return(trx && trx->mysql_thd && thd_kill_level((THD*) trx->mysql_thd));
}
/**********************************************************************//**
@@ -2639,6 +2646,7 @@ innobase_init(
innobase_hton->flags=HTON_NO_FLAGS;
innobase_hton->release_temporary_latches=innobase_release_temporary_latches;
innobase_hton->alter_table_flags = innobase_alter_table_flags;
+ innobase_hton->kill_query = innobase_kill_query;
ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
@@ -3052,6 +3060,14 @@ innobase_change_buffering_inited_ok:
srv_use_checksums = (ibool) innobase_use_checksums;
srv_fast_checksum = (ibool) innobase_fast_checksum;
+ if (innobase_fast_checksum) {
+ fprintf(stderr,
+ "InnoDB: Warning: innodb_fast_checksum is DEPRECATED "
+ "and *WILL* be removed in Percona Server 5.6. Please "
+ "consult the Percona Server 5.6 documentation for "
+ "help in upgrading.\n");
+ }
+
srv_blocking_lru_restore = (ibool) innobase_blocking_lru_restore;
#ifdef HAVE_LARGE_PAGES
@@ -3938,6 +3954,33 @@ innobase_close_connection(
DBUG_RETURN(0);
}
+/*****************************************************************//**
+Cancel any pending lock request associated with the current THD. */
+static
+void
+innobase_kill_query(
+/*======================*/
+ handlerton* hton, /*!< in: innobase handlerton */
+ THD* thd, /*!< in: MySQL thread being killed */
+ enum thd_kill_levels level) /*!< in: kill level */
+{
+ trx_t* trx;
+ DBUG_ENTER("innobase_kill_query");
+ DBUG_ASSERT(hton == innodb_hton_ptr);
+
+ mutex_enter(&kernel_mutex);
+
+ trx = thd_to_trx(thd);
+
+ /* Cancel a pending lock request. */
+ if (trx && trx->wait_lock) {
+ lock_cancel_waiting_and_release(trx->wait_lock);
+ }
+
+ mutex_exit(&kernel_mutex);
+
+ DBUG_VOID_RETURN;
+}
/*************************************************************************//**
** InnoDB database tables
@@ -6280,7 +6323,9 @@ no_commit:
error = row_insert_for_mysql((byte*) record, prebuilt);
#ifdef EXTENDED_FOR_USERSTAT
- if (error == DB_SUCCESS) rows_changed++;
+ if (UNIV_LIKELY(error == DB_SUCCESS && !trx->fake_changes)) {
+ rows_changed++;
+ }
#endif
/* Handle duplicate key errors */
@@ -6645,7 +6690,9 @@ ha_innobase::update_row(
}
#ifdef EXTENDED_FOR_USERSTAT
- if (error == DB_SUCCESS) rows_changed++;
+ if (UNIV_LIKELY(error == DB_SUCCESS && !trx->fake_changes)) {
+ rows_changed++;
+ }
#endif
innodb_srv_conc_exit_innodb(trx);
@@ -6710,7 +6757,9 @@ ha_innobase::delete_row(
error = row_update_for_mysql((byte*) record, prebuilt);
#ifdef EXTENDED_FOR_USERSTAT
- if (error == DB_SUCCESS) rows_changed++;
+ if (UNIV_LIKELY(error == DB_SUCCESS && !trx->fake_changes)) {
+ rows_changed++;
+ }
#endif
innodb_srv_conc_exit_innodb(trx);
@@ -9844,7 +9893,7 @@ ha_innobase::check(
row_mysql_unlock_data_dictionary(prebuilt->trx);
}
- if (thd_killed(user_thd)) {
+ if (thd_kill_level(user_thd)) {
break;
}
@@ -9901,7 +9950,7 @@ ha_innobase::check(
mutex_exit(&kernel_mutex);
prebuilt->trx->op_info = "";
- if (thd_killed(user_thd)) {
+ if (thd_kill_level(user_thd)) {
my_error(ER_QUERY_INTERRUPTED, MYF(0));
}
@@ -12693,6 +12742,8 @@ static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums,
static MYSQL_SYSVAR_BOOL(fast_checksum, innobase_fast_checksum,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "DEPRECATED. #### WARNING #### : This feature is DEPRECATED and WILL "
+ "be removed in Percona Server 5.6. "
"Change the algorithm of checksum for the whole of datapage to 4-bytes word based. "
"The original checksum is checked after the new one. It may be slow for reading page"
" which has orginal checksum. Overwrite the page or recreate the InnoDB database, "
@@ -13126,6 +13177,11 @@ static MYSQL_SYSVAR_BOOL(track_changed_pages, srv_track_changed_pages,
"Track the redo log for changed pages and output a changed page bitmap",
NULL, NULL, FALSE);
+static MYSQL_SYSVAR_ULONGLONG(max_bitmap_file_size, srv_max_bitmap_file_size,
+ PLUGIN_VAR_RQCMDARG,
+ "The maximum size of changed page bitmap files",
+ NULL, NULL, 100*1024*1024ULL, 4096ULL, ULONGLONG_MAX, 0);
+
static MYSQL_SYSVAR_ULONGLONG(changed_pages_limit, srv_changed_pages_limit,
PLUGIN_VAR_RQCMDARG,
"The maximum number of rows for "
@@ -13328,6 +13384,13 @@ static MYSQL_SYSVAR_ULINT(lazy_drop_table, srv_lazy_drop_table,
"e.g. for http://bugs.mysql.com/51325",
NULL, NULL, 0, 0, 1, 0);
+static MYSQL_SYSVAR_BOOL(locking_fake_changes, srv_fake_changes_locks,
+ PLUGIN_VAR_NOCMDARG,
+ "###EXPERIMENTAL### if enabled, transactions will get S row locks instead "
+ "of X locks for fake changes. If disabled, fake change transactions will "
+ "not take any locks at all.",
+ NULL, NULL, TRUE);
+
static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(page_size),
MYSQL_SYSVAR(log_block_size),
@@ -13419,6 +13482,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(use_native_aio),
MYSQL_SYSVAR(change_buffering),
MYSQL_SYSVAR(track_changed_pages),
+ MYSQL_SYSVAR(max_bitmap_file_size),
MYSQL_SYSVAR(changed_pages_limit),
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
MYSQL_SYSVAR(change_buffering_debug),
@@ -13437,6 +13501,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(corrupt_table_action),
MYSQL_SYSVAR(lazy_drop_table),
MYSQL_SYSVAR(fake_changes),
+ MYSQL_SYSVAR(locking_fake_changes),
MYSQL_SYSVAR(merge_sort_block_size),
NULL
};
@@ -13685,7 +13750,7 @@ int ha_innobase::multi_range_read_explain_info(uint mrr_mode, char *str, size_t
bool ha_innobase::is_thd_killed()
{
- return thd_killed(user_thd);
+ return thd_kill_level(user_thd);
}
/**
diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h
index 4d9c0a1ab35..359d0b95367 100644
--- a/storage/xtradb/handler/ha_innodb.h
+++ b/storage/xtradb/handler/ha_innodb.h
@@ -137,6 +137,7 @@ class ha_innobase: public handler
int close(void);
double scan_time();
double read_time(uint index, uint ranges, ha_rows rows);
+ my_bool is_fake_change_enabled(THD *thd);
bool is_corrupt() const;
int write_row(uchar * buf);
diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc
index 29a80594344..4b33d6a780c 100644
--- a/storage/xtradb/handler/i_s.cc
+++ b/storage/xtradb/handler/i_s.cc
@@ -7147,29 +7147,38 @@ static ST_FIELD_INFO i_s_innodb_changed_pages_info[] =
};
/***********************************************************************
- This function parses condition and gets upper bounds for start and end LSN's
- if condition corresponds to certain pattern.
+ This function implements ICP for I_S.INNODB_CHANGED_PAGES by parsing a
+ condition and getting lower and upper bounds for start and end LSNs if the
+ condition corresponds to a certain pattern.
- We can't know right position to avoid scanning bitmap files from the beginning
- to the lower bound. But we can stop scanning bitmap files if we reach upper bound.
+ In the most general form, we understand queries like
- It's expected the most used queries will be like the following:
-
- SELECT * FROM INNODB_CHANGED_PAGES WHERE START_LSN > num1 AND start_lsn < num2;
+ SELECT * FROM INNODB_CHANGED_PAGES
+ WHERE START_LSN > num1 AND START_LSN < num2
+ AND END_LSN > num3 AND END_LSN < num4;
- That's why the pattern is:
+ That's why the pattern syntax is:
pattern: comp | and_comp;
comp: lsn < int_num | lsn <= int_num | int_num > lsn | int_num >= lsn;
lsn: start_lsn | end_lsn;
- and_comp: some_expression AND some_expression | some_expression AND and_comp;
- some_expression: comp | any_other_expression;
+ and_comp: expression AND expression | expression AND and_comp;
+ expression: comp | any_other_expression;
+
+ The two bounds are handled differently: the lower bound is used to find the
+ correct starting _file_, the upper bound the last _block_ that needs reading.
+
+ Lower bound conditions are handled in the following way: start_lsn >= X
+ specifies that the reading must start from the file that has the highest
+ starting LSN less than or equal to X. start_lsn > X is equivalent to
+ start_lsn >= X + 1. For end_lsn, end_lsn >= X is treated as
+ start_lsn >= X - 1 and end_lsn > X as start_lsn >= X.
- Suppose the condition is start_lsn < 100, this means we have to read all
- blocks with start_lsn < 100. Which is equivalent to reading all the blocks
- with end_lsn <= 99, or just end_lsn < 100. That's why it's enough to find
- maximum lsn value, doesn't matter if this is start or end lsn and compare
- it with "start_lsn" field.
+ For the upper bound, suppose the condition is start_lsn < 100, this means we
+ have to read all blocks with start_lsn < 100. Which is equivalent to reading
+ all the blocks with end_lsn <= 99, or just end_lsn < 100. That's why it's
+ enough to find maximum lsn value, doesn't matter if this is start or end lsn
+ and compare it with "start_lsn" field. LSN <= 100 is treated as LSN < 101.
Example:
@@ -7180,92 +7189,130 @@ static ST_FIELD_INFO i_s_innodb_changed_pages_info[] =
555 > end_lsn AND
page_id = 100;
- max_lsn will be set to 555.
+ end_lsn will be set to 555, start_lsn will be set 11.
+
+ Support for other functions (equal, NULL-safe equal, BETWEEN, IN, etc.) will
+ be added on demand.
+
*/
static
void
limit_lsn_range_from_condition(
/*===========================*/
- TABLE* table, /*!<in: table */
- COND* cond, /*!<in: condition */
- ib_uint64_t* max_lsn) /*!<in/out: maximum LSN
- (must be initialized with maximum
- available value) */
+ TABLE* table, /*!<in: table */
+ COND* cond, /*!<in: condition */
+ ib_uint64_t* start_lsn, /*!<in/out: minumum LSN */
+ ib_uint64_t* end_lsn) /*!<in/out: maximum LSN */
{
+ enum Item_func::Functype func_type;
+
if (cond->type() != Item::COND_ITEM &&
cond->type() != Item::FUNC_ITEM)
return;
- switch (((Item_func*) cond)->functype())
+ func_type = ((Item_func*) cond)->functype();
+
+ switch (func_type)
{
- case Item_func::COND_AND_FUNC:
- {
- List_iterator<Item> li(*((Item_cond*) cond)->
- argument_list());
- Item *item;
- while ((item= li++))
- limit_lsn_range_from_condition(table,
- item,
- max_lsn);
- break;
+ case Item_func::COND_AND_FUNC:
+ {
+ List_iterator<Item> li(*((Item_cond*) cond)
+ ->argument_list());
+ Item *item;
+
+ while ((item= li++)) {
+ limit_lsn_range_from_condition(table, item, start_lsn,
+ end_lsn);
+ }
+ break;
+ }
+ case Item_func::LT_FUNC:
+ case Item_func::LE_FUNC:
+ case Item_func::GT_FUNC:
+ case Item_func::GE_FUNC:
+ {
+ Item *left;
+ Item *right;
+ Item_field *item_field;
+ ib_uint64_t tmp_result;
+ ibool is_end_lsn;
+
+ /* a <= b equals to b >= a that's why we just exchange "left"
+ and "right" in the case of ">" or ">=" function. We don't
+ touch the operation itself. */
+ if (((Item_func*) cond)->functype() == Item_func::LT_FUNC
+ || ((Item_func*) cond)->functype() == Item_func::LE_FUNC) {
+ left = ((Item_func*) cond)->arguments()[0];
+ right = ((Item_func*) cond)->arguments()[1];
+ } else {
+ left = ((Item_func*) cond)->arguments()[1];
+ right = ((Item_func*) cond)->arguments()[0];
}
- case Item_func::LT_FUNC:
- case Item_func::LE_FUNC:
- case Item_func::GT_FUNC:
- case Item_func::GE_FUNC:
- {
- Item *left;
- Item *right;
- Item_field *item_field;
- ib_uint64_t tmp_result;
-
- /*
- a <= b equals to b >= a that's why we just exchange
- "left" and "right" in the case of ">" or ">="
- function
- */
- if (((Item_func*) cond)->functype() ==
- Item_func::LT_FUNC ||
- ((Item_func*) cond)->functype() ==
- Item_func::LE_FUNC)
- {
- left = ((Item_func*) cond)->arguments()[0];
- right = ((Item_func*) cond)->arguments()[1];
- } else {
- left = ((Item_func*) cond)->arguments()[1];
- right = ((Item_func*) cond)->arguments()[0];
- }
- if (!left || !right)
- return;
- if (left->type() != Item::FIELD_ITEM)
- return;
- if (right->type() != Item::INT_ITEM)
- return;
+ if (left->type() == Item::FIELD_ITEM) {
+ item_field = (Item_field *)left;
+ } else if (right->type() == Item::FIELD_ITEM) {
+ item_field = (Item_field *)right;
+ } else {
+ return;
+ }
- item_field = (Item_field*)left;
+ /* Check if the current field belongs to our table */
+ if (table != item_field->field->table) {
+ return;
+ }
- if (/* START_LSN */
- table->field[2] != item_field->field &&
- /* END_LSN */
- table->field[3] != item_field->field)
- {
- return;
- }
+ /* Check if the field is START_LSN or END_LSN */
+ /* END_LSN */
+ is_end_lsn = table->field[3]->eq(item_field->field);
+
+ if (/* START_LSN */ !table->field[2]->eq(item_field->field)
+ && !is_end_lsn) {
+ return;
+ }
+
+ if (left->type() == Item::FIELD_ITEM
+ && right->type() == Item::INT_ITEM) {
- /* Check if the current field belongs to our table */
- if (table != item_field->field->table)
- return;
+ /* The case of start_lsn|end_lsn <|<= const, i.e. the
+ upper bound. */
tmp_result = right->val_int();
- if (tmp_result < *max_lsn)
- *max_lsn = tmp_result;
+ if (((func_type == Item_func::LE_FUNC)
+ || (func_type == Item_func::GE_FUNC))
+ && (tmp_result != IB_ULONGLONG_MAX)) {
- break;
+ tmp_result++;
+ }
+ if (tmp_result < *end_lsn) {
+ *end_lsn = tmp_result;
+ }
+
+ } else if (left->type() == Item::INT_ITEM
+ && right->type() == Item::FIELD_ITEM) {
+
+ /* The case of const <|<= start_lsn|end_lsn, i.e. the
+ lower bound */
+
+ tmp_result = left->val_int();
+ if (is_end_lsn && tmp_result != 0) {
+ tmp_result--;
+ }
+ if (((func_type == Item_func::LT_FUNC)
+ || (func_type == Item_func::GT_FUNC))
+ && (tmp_result != IB_ULONGLONG_MAX)) {
+
+ tmp_result++;
+ }
+ if (tmp_result > *start_lsn) {
+ *start_lsn = tmp_result;
+ }
}
- default:;
- }
+ break;
+ }
+ default:;
+ }
}
/***********************************************************************
@@ -7282,40 +7329,55 @@ i_s_innodb_changed_pages_fill(
TABLE* table = (TABLE *) tables->table;
log_bitmap_iterator_t i;
ib_uint64_t output_rows_num = 0UL;
- ib_uint64_t max_lsn = ~0ULL;
+ ib_uint64_t max_lsn = IB_ULONGLONG_MAX;
+ ib_uint64_t min_lsn = 0ULL;
+
+ DBUG_ENTER("i_s_innodb_changed_pages_fill");
- if (!srv_track_changed_pages)
- return 0;
+ /* deny access to non-superusers */
+ if (check_global_access(thd, PROCESS_ACL)) {
+
+ DBUG_RETURN(0);
+ }
- if (!log_online_bitmap_iterator_init(&i))
- return 1;
+ RETURN_IF_INNODB_NOT_STARTED(tables->schema_table_name);
- if (cond)
- limit_lsn_range_from_condition(table, cond, &max_lsn);
+ if (!srv_track_changed_pages) {
+ DBUG_RETURN(0);
+ }
+
+ if (cond) {
+ limit_lsn_range_from_condition(table, cond, &min_lsn,
+ &max_lsn);
+ }
+
+ if (!log_online_bitmap_iterator_init(&i, min_lsn, max_lsn)) {
+ DBUG_RETURN(1);
+ }
while(log_online_bitmap_iterator_next(&i) &&
(!srv_changed_pages_limit ||
output_rows_num < srv_changed_pages_limit) &&
/*
- There is no need to compare both start LSN and end LSN fields
- with maximum value. It's enough to compare only start LSN.
- Example:
-
- max_lsn = 100
- \\\\\\\\\\\\\\\\\\\\\\\\\|\\\\\\\\ - Query 1
- I------I I-------I I-------------I I----I
- ////////////////// | - Query 2
- 1 2 3 4
-
- Query 1:
- SELECT * FROM INNODB_CHANGED_PAGES WHERE start_lsn < 100
- will select 1,2,3 bitmaps
- Query 2:
- SELECT * FROM INNODB_CHANGED_PAGES WHERE end_lsn < 100
- will select 1,2 bitmaps
-
- The condition start_lsn <= 100 will be false after reading
- 1,2,3 bitmaps which suits for both cases.
+ There is no need to compare both start LSN and end LSN fields
+ with maximum value. It's enough to compare only start LSN.
+ Example:
+
+ max_lsn = 100
+ \\\\\\\\\\\\\\\\\\\\\\\\\|\\\\\\\\ - Query 1
+ I------I I-------I I-------------I I----I
+ ////////////////// | - Query 2
+ 1 2 3 4
+
+ Query 1:
+ SELECT * FROM INNODB_CHANGED_PAGES WHERE start_lsn < 100
+ will select 1,2,3 bitmaps
+ Query 2:
+ SELECT * FROM INNODB_CHANGED_PAGES WHERE end_lsn < 100
+ will select 1,2 bitmaps
+
+ The condition start_lsn <= 100 will be false after reading
+ 1,2,3 bitmaps which suits for both cases.
*/
LOG_BITMAP_ITERATOR_START_LSN(i) <= max_lsn)
{
@@ -7330,10 +7392,10 @@ i_s_innodb_changed_pages_fill(
LOG_BITMAP_ITERATOR_PAGE_NUM(i));
/* START_LSN */
table->field[2]->store(
- LOG_BITMAP_ITERATOR_START_LSN(i));
+ LOG_BITMAP_ITERATOR_START_LSN(i), true);
/* END_LSN */
table->field[3]->store(
- LOG_BITMAP_ITERATOR_END_LSN(i));
+ LOG_BITMAP_ITERATOR_END_LSN(i), true);
/*
I_S tables are in-memory tables. If bitmap file is big enough
@@ -7353,14 +7415,14 @@ i_s_innodb_changed_pages_fill(
if (schema_table_store_record(thd, table))
{
log_online_bitmap_iterator_release(&i);
- return 1;
+ DBUG_RETURN(1);
}
++output_rows_num;
}
log_online_bitmap_iterator_release(&i);
- return 0;
+ DBUG_RETURN(0);
}
static
diff --git a/storage/xtradb/ibuf/ibuf0ibuf.c b/storage/xtradb/ibuf/ibuf0ibuf.c
index 78cb6e20176..77305e42fb1 100644
--- a/storage/xtradb/ibuf/ibuf0ibuf.c
+++ b/storage/xtradb/ibuf/ibuf0ibuf.c
@@ -4044,7 +4044,7 @@ updated_in_place:
update)
&& (!page_zip || btr_cur_update_alloc_zip(
page_zip, block, index,
- rec_offs_size(offsets), FALSE, mtr))) {
+ rec_offs_size(offsets), FALSE, mtr, NULL))) {
/* This is the easy case. Do something similar
to btr_cur_update_in_place(). */
row_upd_rec_in_place(rec, index, offsets,
diff --git a/storage/xtradb/include/btr0btr.h b/storage/xtradb/include/btr0btr.h
index 03e89ae3f7d..fb06a774b82 100644
--- a/storage/xtradb/include/btr0btr.h
+++ b/storage/xtradb/include/btr0btr.h
@@ -65,7 +65,10 @@ enum btr_latch_mode {
/** Search the previous record. */
BTR_SEARCH_PREV = 35,
/** Modify the previous record. */
- BTR_MODIFY_PREV = 36
+ BTR_MODIFY_PREV = 36,
+ /** Weaker BTR_MODIFY_TREE that does not lock the leaf page siblings,
+ used for fake changes. */
+ BTR_SEARCH_TREE = 37 /* BTR_MODIFY_TREE | 4 */
};
/* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually exclusive. */
diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h
index cbc6103c2ee..cb44129aeb5 100644
--- a/storage/xtradb/include/btr0cur.h
+++ b/storage/xtradb/include/btr0cur.h
@@ -259,8 +259,9 @@ btr_cur_update_alloc_zip(
ulint length, /*!< in: size needed */
ibool create, /*!< in: TRUE=delete-and-insert,
FALSE=update-in-place */
- mtr_t* mtr) /*!< in: mini-transaction */
- __attribute__((nonnull, warn_unused_result));
+ mtr_t* mtr, /*!< in: mini-transaction */
+ trx_t* trx) /*!< in: NULL or transaction */
+ __attribute__((nonnull (1, 2, 3, 6), warn_unused_result));
/*************************************************************//**
Updates a record when the update causes no size changes in its fields.
@return DB_SUCCESS or error number */
diff --git a/storage/xtradb/include/log0online.h b/storage/xtradb/include/log0online.h
index 0e0ca169f6f..e7c3f301e45 100644
--- a/storage/xtradb/include/log0online.h
+++ b/storage/xtradb/include/log0online.h
@@ -27,6 +27,16 @@ Online database log parsing for changed page tracking
#include "univ.i"
#include "os0file.h"
+/** Single bitmap file information */
+typedef struct log_online_bitmap_file_struct log_online_bitmap_file_t;
+
+/** A set of bitmap files containing some LSN range */
+typedef struct log_online_bitmap_file_range_struct
+log_online_bitmap_file_range_t;
+
+/** An iterator over changed page info */
+typedef struct log_bitmap_iterator_struct log_bitmap_iterator_t;
+
/*********************************************************************//**
Initializes the online log following subsytem. */
UNIV_INTERN
@@ -49,45 +59,32 @@ void
log_online_follow_redo_log();
/*=========================*/
-/** The iterator through all bits of changed pages bitmap blocks */
-struct log_bitmap_iterator_struct
-{
- char in_name[FN_REFLEN]; /*!< the file name for bitmap
- input */
- os_file_t in; /*!< the bitmap input file */
- ib_uint64_t in_offset; /*!< the next write position in the
- bitmap output file */
- ib_uint32_t bit_offset; /*!< bit offset inside of bitmap
- block*/
- ib_uint64_t start_lsn; /*!< Start lsn of the block */
- ib_uint64_t end_lsn; /*!< End lsn of the block */
- ib_uint32_t space_id; /*!< Block space id */
- ib_uint32_t first_page_id; /*!< First block page id */
- ibool changed; /*!< true if current page was changed */
- byte* page; /*!< Bitmap block */
-};
-
-typedef struct log_bitmap_iterator_struct log_bitmap_iterator_t;
-
#define LOG_BITMAP_ITERATOR_START_LSN(i) \
- ((i).start_lsn)
+ ((i).start_lsn)
#define LOG_BITMAP_ITERATOR_END_LSN(i) \
- ((i).end_lsn)
+ ((i).end_lsn)
#define LOG_BITMAP_ITERATOR_SPACE_ID(i) \
- ((i).space_id)
+ ((i).space_id)
#define LOG_BITMAP_ITERATOR_PAGE_NUM(i) \
- ((i).first_page_id + (i).bit_offset)
+ ((i).first_page_id + (i).bit_offset)
#define LOG_BITMAP_ITERATOR_PAGE_CHANGED(i) \
- ((i).changed)
+ ((i).changed)
/*********************************************************************//**
-Initializes log bitmap iterator.
+Initializes log bitmap iterator. The minimum LSN is used for finding the
+correct starting file with records and it there may be records returned by
+the iterator that have LSN less than start_lsn.
+
@return TRUE if the iterator is initialized OK, FALSE otherwise. */
UNIV_INTERN
ibool
log_online_bitmap_iterator_init(
/*============================*/
- log_bitmap_iterator_t *i); /*!<in/out: iterator */
+ log_bitmap_iterator_t *i, /*!<in/out: iterator */
+ ib_uint64_t min_lsn, /*!<in: start LSN for the
+ iterator */
+ ib_uint64_t max_lsn); /*!<in: end LSN for the
+ iterator */
/*********************************************************************//**
Releases log bitmap iterator. */
@@ -108,4 +105,57 @@ log_online_bitmap_iterator_next(
/*============================*/
log_bitmap_iterator_t *i); /*!<in/out: iterator */
+/** Struct for single bitmap file information */
+struct log_online_bitmap_file_struct {
+ char name[FN_REFLEN]; /*!< Name with full path */
+ os_file_t file; /*!< Handle to opened file */
+ ib_uint64_t size; /*!< Size of the file */
+ ib_uint64_t offset; /*!< Offset of the next read,
+ or count of already-read bytes
+ */
+};
+
+/** Struct for a set of bitmap files containing some LSN range */
+struct log_online_bitmap_file_range_struct {
+ size_t count; /*!< Number of files */
+ /*!< Dynamically-allocated array of info about individual files */
+ struct {
+ char name[FN_REFLEN]; /*!< Name of a file */
+ ib_uint64_t start_lsn; /*!< Starting LSN of
+ data in this file */
+ ulong seq_num; /*!< Sequence number of
+ this file */
+ } *files;
+};
+
+/** Struct for an iterator through all bits of changed pages bitmap blocks */
+struct log_bitmap_iterator_struct
+{
+ log_online_bitmap_file_range_t in_files; /*!< The bitmap files
+ for this iterator */
+ size_t in_i; /*!< Currently read
+ file index in in_files
+ */
+ log_online_bitmap_file_t in; /*!< Currently read
+ file */
+ ib_uint32_t bit_offset; /*!< bit offset inside
+ the current bitmap
+ block */
+ ib_uint64_t start_lsn; /*!< Start LSN of the
+ current bitmap block */
+ ib_uint64_t end_lsn; /*!< End LSN of the
+ current bitmap block */
+ ib_uint32_t space_id; /*!< Current block
+ space id */
+ ib_uint32_t first_page_id; /*!< Id of the first
+ page in the current
+ block */
+ ibool last_page_in_run;/*!< "Last page in
+ run" flag value for the
+ current block */
+ ibool changed; /*!< true if current
+ page was changed */
+ byte* page; /*!< Bitmap block */
+};
+
#endif
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index a95eb8a1d58..6c5b61487f2 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -144,7 +144,8 @@ extern char* srv_doublewrite_file;
extern ibool srv_recovery_stats;
-extern my_bool srv_track_changed_pages;
+extern my_bool srv_track_changed_pages;
+extern ib_uint64_t srv_max_bitmap_file_size;
extern
ulonglong srv_changed_pages_limit;
@@ -395,6 +396,10 @@ extern uint srv_auto_lru_dump;
/** Whether startup should be blocked until buffer pool is fully restored */
extern ibool srv_blocking_lru_restore;
+/** When TRUE, fake change transcations take S rather than X row locks.
+When FALSE, row locks are not taken at all. */
+extern my_bool srv_fake_changes_locks;
+
/** Status variables to be passed to MySQL */
typedef struct export_var_struct export_struc;
diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i
index 5d2cd2d0313..77acf54d8dc 100644
--- a/storage/xtradb/include/univ.i
+++ b/storage/xtradb/include/univ.i
@@ -54,7 +54,7 @@ Created 1/20/1994 Heikki Tuuri
#define INNODB_VERSION_BUGFIX 8
#ifndef PERCONA_INNODB_VERSION
-#define PERCONA_INNODB_VERSION 29.1
+#define PERCONA_INNODB_VERSION 29.3
#endif
/* The following is the InnoDB version as shown in
diff --git a/storage/xtradb/include/ut0ut.h b/storage/xtradb/include/ut0ut.h
index 47ab6eb9b74..48f30b33e65 100644
--- a/storage/xtradb/include/ut0ut.h
+++ b/storage/xtradb/include/ut0ut.h
@@ -122,6 +122,15 @@ ut_max(
/*===*/
ulint n1, /*!< in: first number */
ulint n2); /*!< in: second number */
+/******************************************************//**
+Calculates the maximum of two ib_uint64_t values.
+@return the maximum */
+UNIV_INLINE
+ib_uint64_t
+ut_max_uint64(
+/*==========*/
+ ib_uint64_t n1, /*!< in: first number */
+ ib_uint64_t n2); /*!< in: second number */
/****************************************************************//**
Calculates minimum of two ulint-pairs. */
UNIV_INLINE
diff --git a/storage/xtradb/include/ut0ut.ic b/storage/xtradb/include/ut0ut.ic
index 6f55c7e410e..d56deb6266f 100644
--- a/storage/xtradb/include/ut0ut.ic
+++ b/storage/xtradb/include/ut0ut.ic
@@ -49,6 +49,19 @@ ut_max(
return((n1 <= n2) ? n2 : n1);
}
+/******************************************************//**
+Calculates the maximum of two ib_uint64_t values.
+@return the maximum */
+UNIV_INLINE
+ib_uint64_t
+ut_max_uint64(
+/*==========*/
+ ib_uint64_t n1, /*!< in: first number */
+ ib_uint64_t n2) /*!< in: second number */
+{
+ return((n1 <= n2) ? n2 : n1);
+}
+
/****************************************************************//**
Calculates minimum of two ulint-pairs. */
UNIV_INLINE
diff --git a/storage/xtradb/lock/lock0lock.c b/storage/xtradb/lock/lock0lock.c
index 414d3ae2c49..f172ad6695b 100644
--- a/storage/xtradb/lock/lock0lock.c
+++ b/storage/xtradb/lock/lock0lock.c
@@ -5481,8 +5481,13 @@ lock_sec_rec_read_check_and_lock(
return(DB_SUCCESS);
}
- if (thr && thr_get_trx(thr)->fake_changes && mode == LOCK_X) {
- mode = LOCK_S;
+ if (UNIV_UNLIKELY((thr && thr_get_trx(thr)->fake_changes))) {
+ if (!srv_fake_changes_locks) {
+ return(DB_SUCCESS);
+ }
+ if (mode == LOCK_X) {
+ mode = LOCK_S;
+ }
}
heap_no = page_rec_get_heap_no(rec);
@@ -5561,8 +5566,13 @@ lock_clust_rec_read_check_and_lock(
return(DB_SUCCESS);
}
- if (thr && thr_get_trx(thr)->fake_changes && mode == LOCK_X) {
- mode = LOCK_S;
+ if (UNIV_UNLIKELY((thr && thr_get_trx(thr)->fake_changes))) {
+ if (!srv_fake_changes_locks) {
+ return(DB_SUCCESS);
+ }
+ if (mode == LOCK_X) {
+ mode = LOCK_S;
+ }
}
heap_no = page_rec_get_heap_no(rec);
diff --git a/storage/xtradb/log/log0log.c b/storage/xtradb/log/log0log.c
index f2066b49662..e7c7a165b9c 100644
--- a/storage/xtradb/log/log0log.c
+++ b/storage/xtradb/log/log0log.c
@@ -248,7 +248,7 @@ log_check_tracking_margin(
checked for the already-written log. */
{
ib_uint64_t tracked_lsn;
- ulint tracked_lsn_age;
+ ib_uint64_t tracked_lsn_age;
if (!srv_track_changed_pages) {
return FALSE;
@@ -460,7 +460,7 @@ log_close(void)
ib_uint64_t oldest_lsn;
ib_uint64_t lsn;
ib_uint64_t tracked_lsn;
- ulint tracked_lsn_age;
+ ib_uint64_t tracked_lsn_age;
log_t* log = log_sys;
ib_uint64_t checkpoint_age;
diff --git a/storage/xtradb/log/log0online.c b/storage/xtradb/log/log0online.c
index 1d478c467e6..55eb9d17c46 100644
--- a/storage/xtradb/log/log0online.c
+++ b/storage/xtradb/log/log0online.c
@@ -48,10 +48,8 @@ struct log_bitmap_struct {
parsed, it points to the start,
otherwise points immediatelly past the
end of the incomplete log record. */
- char* out_name; /*!< the file name for bitmap output */
- os_file_t out; /*!< the bitmap output file */
- ib_uint64_t out_offset; /*!< the next write position in the
- bitmap output file */
+ log_online_bitmap_file_t out; /*!< The current bitmap file */
+ ulint out_seq_num; /*!< the bitmap file sequence number */
ib_uint64_t start_lsn; /*!< the LSN of the next unparsed
record and the start of the next LSN
interval to be parsed. */
@@ -76,8 +74,13 @@ struct log_bitmap_struct {
/* The log parsing and bitmap output struct instance */
static struct log_bitmap_struct* log_bmp_sys;
-/* File name stem for modified page bitmaps */
-static const char* modified_page_stem = "ib_modified_log.";
+/** File name stem for bitmap files. */
+static const char* bmp_file_name_stem = "ib_modified_log_";
+
+/** File name template for bitmap files. The 1st format tag is a directory
+name, the 2nd tag is the stem, the 3rd tag is a file sequence number, the 4th
+tag is the start LSN for the file. */
+static const char* bmp_file_name_template = "%s%s%lu_%llu.xdb";
/* On server startup with empty database srv_start_lsn == 0, in
which case the first LSN of actual log records will be this. */
@@ -85,7 +88,7 @@ which case the first LSN of actual log records will be this. */
/* Tests if num bit of bitmap is set */
#define IS_BIT_SET(bitmap, num) \
- (*((bitmap) + ((num) >> 3)) & (1UL << ((num) & 7UL)))
+ (*((bitmap) + ((num) >> 3)) & (1UL << ((num) & 7UL)))
/** The bitmap file block size in bytes. All writes will be multiples of this.
*/
@@ -243,10 +246,69 @@ log_online_calc_checksum(
}
/****************************************************************//**
+Read one bitmap data page and check it for corruption.
+
+@return TRUE if page read OK, FALSE if I/O error */
+static
+ibool
+log_online_read_bitmap_page(
+/*========================*/
+ log_online_bitmap_file_t *bitmap_file, /*!<in/out: bitmap
+ file */
+ byte *page, /*!<out: read page.
+ Must be at least
+ MODIFIED_PAGE_BLOCK_SIZE
+ bytes long */
+ ibool *checksum_ok) /*!<out: TRUE if page
+ checksum OK */
+{
+ ulint offset_low = (ulint)(bitmap_file->offset & 0xFFFFFFFF);
+ ulint offset_high = (ulint)(bitmap_file->offset >> 32);
+ ulint checksum;
+ ulint actual_checksum;
+ ibool success;
+
+ ut_a(bitmap_file->size >= MODIFIED_PAGE_BLOCK_SIZE);
+ ut_a(bitmap_file->offset
+ <= bitmap_file->size - MODIFIED_PAGE_BLOCK_SIZE);
+ ut_a(bitmap_file->offset % MODIFIED_PAGE_BLOCK_SIZE == 0);
+
+ success = os_file_read(bitmap_file->file, page, offset_low,
+ offset_high, MODIFIED_PAGE_BLOCK_SIZE);
+
+ if (UNIV_UNLIKELY(!success)) {
+
+ /* The following call prints an error message */
+ os_file_get_last_error(TRUE);
+ fprintf(stderr,
+ "InnoDB: Warning: failed reading changed page bitmap "
+ "file \'%s\'\n", bitmap_file->name);
+ return FALSE;
+ }
+
+ bitmap_file->offset += MODIFIED_PAGE_BLOCK_SIZE;
+ ut_ad(bitmap_file->offset <= bitmap_file->size);
+
+ checksum = mach_read_from_4(page + MODIFIED_PAGE_BLOCK_CHECKSUM);
+ actual_checksum = log_online_calc_checksum(page);
+ *checksum_ok = (checksum == actual_checksum);
+
+ return TRUE;
+}
+
+/****************************************************************//**
Get the last tracked fully LSN from the bitmap file by reading
backwards untile a correct end page is found. Detects incomplete
writes and corrupted data. Sets the start output position for the
written bitmap data.
+
+Multiple bitmap files are handled using the following assumptions:
+1) Only the last file might be corrupted. In case where no good data was found
+in the last file, assume that the next to last file is OK. This assumption
+does not limit crash recovery capability in any way.
+2) If the whole of the last file was corrupted, assume that the start LSN in
+its name is correct and use it for (re-)tracking start.
+
@return the last fully tracked LSN */
static
ib_uint64_t
@@ -254,73 +316,46 @@ log_online_read_last_tracked_lsn()
/*==============================*/
{
byte page[MODIFIED_PAGE_BLOCK_SIZE];
- ib_uint64_t read_offset = log_bmp_sys->out_offset;
- /* Initialize these to nonequal values so that file size == 0 case with
- zero loop repetitions is handled correctly */
- ulint checksum = 0;
- ulint actual_checksum = !checksum;
ibool is_last_page = FALSE;
+ ibool checksum_ok = FALSE;
ib_uint64_t result;
+ ib_uint64_t read_offset = log_bmp_sys->out.offset;
- ut_ad(log_bmp_sys->out_offset % MODIFIED_PAGE_BLOCK_SIZE == 0);
-
- while (checksum != actual_checksum && read_offset > 0 && !is_last_page)
+ while (!checksum_ok && read_offset > 0 && !is_last_page)
{
-
- ulint offset_low, offset_high;
- ibool success;
-
read_offset -= MODIFIED_PAGE_BLOCK_SIZE;
- offset_high = (ulint)(read_offset >> 32);
- offset_low = (ulint)(read_offset & 0xFFFFFFFF);
-
- success = os_file_read(log_bmp_sys->out, page, offset_low,
- offset_high, MODIFIED_PAGE_BLOCK_SIZE);
- if (!success) {
+ log_bmp_sys->out.offset = read_offset;
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
- /* Here and below assume that bitmap file names do not
- contain apostrophes, thus no need for
- ut_print_filename(). */
- fprintf(stderr, "InnoDB: Warning: failed reading "
- "changed page bitmap file \'%s\'\n",
- log_bmp_sys->out_name);
- return MIN_TRACKED_LSN;
+ if (!log_online_read_bitmap_page(&log_bmp_sys->out, page,
+ &checksum_ok)) {
+ checksum_ok = FALSE;
+ result = 0;
+ break;
}
- is_last_page
- = mach_read_from_4(page + MODIFIED_PAGE_IS_LAST_BLOCK);
- checksum = mach_read_from_4(page
- + MODIFIED_PAGE_BLOCK_CHECKSUM);
- actual_checksum = log_online_calc_checksum(page);
- if (checksum != actual_checksum) {
+ if (checksum_ok) {
+ is_last_page
+ = mach_read_from_4
+ (page + MODIFIED_PAGE_IS_LAST_BLOCK);
+ } else {
- fprintf(stderr, "InnoDB: Warning: corruption "
- "detected in \'%s\' at offset %llu\n",
- log_bmp_sys->out_name, read_offset);
+ fprintf(stderr,
+ "InnoDB: Warning: corruption detected in "
+ "\'%s\' at offset %llu\n",
+ log_bmp_sys->out.name, read_offset);
}
-
};
- if (UNIV_LIKELY(checksum == actual_checksum && is_last_page)) {
-
- log_bmp_sys->out_offset = read_offset
- + MODIFIED_PAGE_BLOCK_SIZE;
- result = mach_read_from_8(page + MODIFIED_PAGE_END_LSN);
- }
- else {
- log_bmp_sys->out_offset = read_offset;
- result = 0;
- }
+ result = (checksum_ok && is_last_page)
+ ? mach_read_from_8(page + MODIFIED_PAGE_END_LSN) : 0;
/* Truncate the output file to discard the corrupted bitmap data, if
any */
- if (!os_file_set_eof_at(log_bmp_sys->out,
- log_bmp_sys->out_offset)) {
+ if (!os_file_set_eof_at(log_bmp_sys->out.file,
+ log_bmp_sys->out.offset)) {
fprintf(stderr, "InnoDB: Warning: failed truncating "
"changed page bitmap file \'%s\' to %llu bytes\n",
- log_bmp_sys->out_name, log_bmp_sys->out_offset);
+ log_bmp_sys->out.name, log_bmp_sys->out.offset);
result = 0;
}
return result;
@@ -350,6 +385,37 @@ log_set_tracked_lsn(
#endif
}
+/*********************************************************************//**
+Check if missing, if any, LSN interval can be read and tracked using the
+current LSN value, the LSN value where the tracking stopped, and the log group
+capacity.
+
+@return TRUE if the missing interval can be tracked or if there's no missing
+data. */
+static
+ibool
+log_online_can_track_missing(
+/*=========================*/
+ ib_uint64_t last_tracked_lsn, /*!<in: last tracked LSN */
+ ib_uint64_t tracking_start_lsn) /*!<in: current LSN */
+{
+ /* last_tracked_lsn might be < MIN_TRACKED_LSN in the case of empty
+ bitmap file, handle this too. */
+ last_tracked_lsn = ut_max_uint64(last_tracked_lsn, MIN_TRACKED_LSN);
+
+ if (last_tracked_lsn > tracking_start_lsn) {
+ fprintf(stderr,
+ "InnoDB: Error: last tracked LSN is in future. This "
+ "can be caused by mismatched bitmap files.\n");
+ exit(1);
+ }
+
+ return (last_tracked_lsn == tracking_start_lsn)
+ || (log_sys->lsn - last_tracked_lsn
+ <= log_sys->log_group_capacity);
+}
+
+
/****************************************************************//**
Diagnose a gap in tracked LSN range on server startup due to crash or
very fast shutdown and try to close it by tracking the data
@@ -365,22 +431,20 @@ log_online_track_missing_on_startup(
{
ut_ad(last_tracked_lsn != tracking_start_lsn);
- fprintf(stderr, "InnoDB: last tracked LSN in \'%s\' is %llu, but "
- "last checkpoint LSN is %llu. This might be due to a server "
- "crash or a very fast shutdown. ", log_bmp_sys->out_name,
- last_tracked_lsn, tracking_start_lsn);
-
- /* last_tracked_lsn might be < MIN_TRACKED_LSN in the case of empty
- bitmap file, handle this too. */
- last_tracked_lsn = ut_max(last_tracked_lsn, MIN_TRACKED_LSN);
+ fprintf(stderr, "InnoDB: last tracked LSN is %llu, but the last "
+ "checkpoint LSN is %llu. This might be due to a server "
+ "crash or a very fast shutdown. ", last_tracked_lsn,
+ tracking_start_lsn);
/* See if we can fully recover the missing interval */
- if (log_sys->lsn - last_tracked_lsn < log_sys->log_group_capacity) {
+ if (log_online_can_track_missing(last_tracked_lsn,
+ tracking_start_lsn)) {
fprintf(stderr,
"Reading the log to advance the last tracked LSN.\n");
- log_bmp_sys->start_lsn = last_tracked_lsn;
+ log_bmp_sys->start_lsn = ut_max_uint64(last_tracked_lsn,
+ MIN_TRACKED_LSN);
log_set_tracked_lsn(log_bmp_sys->start_lsn);
log_online_follow_redo_log();
ut_ad(log_bmp_sys->end_lsn >= tracking_start_lsn);
@@ -406,16 +470,101 @@ log_online_track_missing_on_startup(
}
/*********************************************************************//**
+Format a bitmap output file name to log_bmp_sys->out.name. */
+static
+void
+log_online_make_bitmap_name(
+/*=========================*/
+ ib_uint64_t start_lsn) /*!< in: the start LSN name part */
+{
+ ut_snprintf(log_bmp_sys->out.name, FN_REFLEN, bmp_file_name_template,
+ srv_data_home, bmp_file_name_stem,
+ log_bmp_sys->out_seq_num, start_lsn);
+
+}
+
+/*********************************************************************//**
+Create a new empty bitmap output file. */
+static
+void
+log_online_start_bitmap_file()
+/*==========================*/
+{
+ ibool success;
+
+ log_bmp_sys->out.file
+ = os_file_create(innodb_file_bmp_key, log_bmp_sys->out.name,
+ OS_FILE_OVERWRITE, OS_FILE_NORMAL,
+ OS_DATA_FILE, &success);
+ if (UNIV_UNLIKELY(!success)) {
+
+ /* The following call prints an error message */
+ os_file_get_last_error(TRUE);
+ fprintf(stderr,
+ "InnoDB: Error: Cannot create \'%s\'\n",
+ log_bmp_sys->out.name);
+ exit(1);
+ }
+
+ log_bmp_sys->out.offset = 0;
+}
+
+/*********************************************************************//**
+Close the current bitmap output file and create the next one. */
+static
+void
+log_online_rotate_bitmap_file(
+/*===========================*/
+ ib_uint64_t next_file_start_lsn) /*!<in: the start LSN name
+ part */
+{
+ os_file_close(log_bmp_sys->out.file);
+ log_bmp_sys->out_seq_num++;
+ log_online_make_bitmap_name(next_file_start_lsn);
+ log_online_start_bitmap_file();
+}
+
+/*********************************************************************//**
+Check the name of a given file if it's a changed page bitmap file and
+return file sequence and start LSN name components if it is. If is not,
+the values of output parameters are undefined.
+
+@return TRUE if a given file is a changed page bitmap file. */
+static
+ibool
+log_online_is_bitmap_file(
+/*======================*/
+ const os_file_stat_t* file_info, /*!<in: file to
+ check */
+ ulong* bitmap_file_seq_num, /*!<out: bitmap file
+ sequence number */
+ ib_uint64_t* bitmap_file_start_lsn) /*!<out: bitmap file
+ start LSN */
+{
+ char stem[FN_REFLEN];
+
+ ut_ad (strlen(file_info->name) < OS_FILE_MAX_PATH);
+
+ return ((file_info->type == OS_FILE_TYPE_FILE
+ || file_info->type == OS_FILE_TYPE_LINK)
+ && (sscanf(file_info->name, "%[a-z_]%lu_%llu.xdb", stem,
+ bitmap_file_seq_num, bitmap_file_start_lsn) == 3)
+ && (!strcmp(stem, bmp_file_name_stem)));
+}
+
+/*********************************************************************//**
Initialize the online log following subsytem. */
UNIV_INTERN
void
log_online_read_init()
/*==================*/
{
- char buf[FN_REFLEN];
ibool success;
ib_uint64_t tracking_start_lsn
- = ut_max(log_sys->last_checkpoint_lsn, MIN_TRACKED_LSN);
+ = ut_max_uint64(log_sys->last_checkpoint_lsn, MIN_TRACKED_LSN);
+ os_file_dir_t bitmap_dir;
+ os_file_stat_t bitmap_dir_file_info;
+ ib_uint64_t last_file_start_lsn = MIN_TRACKED_LSN;
/* Assert (could be compile-time assert) that bitmap data start and end
in a bitmap block is 8-byte aligned */
@@ -424,82 +573,120 @@ log_online_read_init()
log_bmp_sys = ut_malloc(sizeof(*log_bmp_sys));
- ut_snprintf(buf, FN_REFLEN, "%s%s%d", srv_data_home,
- modified_page_stem, 1);
- log_bmp_sys->out_name = ut_malloc(strlen(buf) + 1);
- ut_strcpy(log_bmp_sys->out_name, buf);
+ /* Enumerate existing bitmap files to either open the last one to get
+ the last tracked LSN either to find that there are none and start
+ tracking from scratch. */
+ log_bmp_sys->out.name[0] = '\0';
+ log_bmp_sys->out_seq_num = 0;
+
+ bitmap_dir = os_file_opendir(srv_data_home, TRUE);
+ ut_a(bitmap_dir);
+ while (!os_file_readdir_next_file(srv_data_home, bitmap_dir,
+ &bitmap_dir_file_info)) {
+
+ ulong file_seq_num;
+ ib_uint64_t file_start_lsn;
+
+ if (!log_online_is_bitmap_file(&bitmap_dir_file_info,
+ &file_seq_num,
+ &file_start_lsn)) {
+ continue;
+ }
+
+ if (file_seq_num > log_bmp_sys->out_seq_num
+ && bitmap_dir_file_info.size > 0) {
+ log_bmp_sys->out_seq_num = file_seq_num;
+ last_file_start_lsn = file_start_lsn;
+ /* No dir component (srv_data_home) here, because
+ that's the cwd */
+ strncpy(log_bmp_sys->out.name,
+ bitmap_dir_file_info.name, FN_REFLEN - 1);
+ log_bmp_sys->out.name[FN_REFLEN - 1] = '\0';
+ }
+ }
+
+ if (os_file_closedir(bitmap_dir)) {
+ os_file_get_last_error(TRUE);
+ fprintf(stderr, "InnoDB: Error: cannot close \'%s\'\n",
+ srv_data_home);
+ exit(1);
+ }
+
+ if (!log_bmp_sys->out_seq_num) {
+ log_bmp_sys->out_seq_num = 1;
+ log_online_make_bitmap_name(0);
+ }
log_bmp_sys->modified_pages = rbt_create(MODIFIED_PAGE_BLOCK_SIZE,
log_online_compare_bmp_keys);
log_bmp_sys->page_free_list = NULL;
- log_bmp_sys->out
+ log_bmp_sys->out.file
= os_file_create_simple_no_error_handling
- (innodb_file_bmp_key, log_bmp_sys->out_name, OS_FILE_OPEN,
+ (innodb_file_bmp_key, log_bmp_sys->out.name, OS_FILE_OPEN,
OS_FILE_READ_WRITE, &success);
if (!success) {
/* New file, tracking from scratch */
- log_bmp_sys->out
- = os_file_create_simple_no_error_handling
- (innodb_file_bmp_key, log_bmp_sys->out_name,
- OS_FILE_CREATE, OS_FILE_READ_WRITE, &success);
- if (!success) {
-
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
- fprintf(stderr,
- "InnoDB: Error: Cannot create \'%s\'\n",
- log_bmp_sys->out_name);
- exit(1);
- }
-
- log_bmp_sys->out_offset = 0;
+ log_online_start_bitmap_file();
}
else {
- /* Old file, read last tracked LSN and continue from there */
+ /* Read the last tracked LSN from the last file */
ulint size_low;
ulint size_high;
ib_uint64_t last_tracked_lsn;
- success = os_file_get_size(log_bmp_sys->out, &size_low,
+ success = os_file_get_size(log_bmp_sys->out.file, &size_low,
&size_high);
ut_a(success);
- log_bmp_sys->out_offset
+ log_bmp_sys->out.size
= ((ib_uint64_t)size_high << 32) | size_low;
+ log_bmp_sys->out.offset = log_bmp_sys->out.size;
- if (log_bmp_sys->out_offset % MODIFIED_PAGE_BLOCK_SIZE != 0) {
+ if (log_bmp_sys->out.offset % MODIFIED_PAGE_BLOCK_SIZE != 0) {
fprintf(stderr,
"InnoDB: Warning: truncated block detected "
"in \'%s\' at offset %llu\n",
- log_bmp_sys->out_name,
- log_bmp_sys->out_offset);
- log_bmp_sys->out_offset -=
- log_bmp_sys->out_offset
+ log_bmp_sys->out.name,
+ log_bmp_sys->out.offset);
+ log_bmp_sys->out.offset -=
+ log_bmp_sys->out.offset
% MODIFIED_PAGE_BLOCK_SIZE;
}
last_tracked_lsn = log_online_read_last_tracked_lsn();
+ if (!last_tracked_lsn) {
+ last_tracked_lsn = last_file_start_lsn;
+ }
+
+ /* Start a new file. Choose the LSN value in its name based on
+ if we can retrack any missing data. */
+ if (log_online_can_track_missing(last_tracked_lsn,
+ tracking_start_lsn)) {
+ log_online_rotate_bitmap_file(last_tracked_lsn);
+ }
+ else {
+ log_online_rotate_bitmap_file(tracking_start_lsn);
+ }
if (last_tracked_lsn < tracking_start_lsn) {
- log_online_track_missing_on_startup(last_tracked_lsn,
- tracking_start_lsn);
+ log_online_track_missing_on_startup
+ (last_tracked_lsn, tracking_start_lsn);
return;
}
if (last_tracked_lsn > tracking_start_lsn) {
- fprintf(stderr, "InnoDB: last tracked LSN in \'%s\' "
- "is %llu, but last checkpoint LSN is %llu. "
+ fprintf(stderr, "InnoDB: last tracked LSN is %llu, "
+ "but last the checkpoint LSN is %llu. "
"The tracking-based incremental backups will "
"work only from the latter LSN!\n",
- log_bmp_sys->out_name, last_tracked_lsn,
- tracking_start_lsn);
+ last_tracked_lsn, tracking_start_lsn);
}
}
@@ -519,7 +706,7 @@ log_online_read_shutdown()
{
ib_rbt_node_t *free_list_node = log_bmp_sys->page_free_list;
- os_file_close(log_bmp_sys->out);
+ os_file_close(log_bmp_sys->out.file);
rbt_free(log_bmp_sys->modified_pages);
@@ -529,7 +716,6 @@ log_online_read_shutdown()
free_list_node = next;
}
- ut_free(log_bmp_sys->out_name);
ut_free(log_bmp_sys);
}
@@ -746,8 +932,8 @@ log_online_follow_log_seg(
/* The next parse LSN is inside the current block, skip
data preceding it. */
skip_already_parsed_len
- = log_bmp_sys->next_parse_lsn
- - block_start_lsn;
+ = (ulint)(log_bmp_sys->next_parse_lsn
+ - block_start_lsn);
}
else {
@@ -819,32 +1005,32 @@ log_online_write_bitmap_page(
{
ibool success;
- success = os_file_write(log_bmp_sys->out_name,log_bmp_sys->out,
+ success = os_file_write(log_bmp_sys->out.name, log_bmp_sys->out.file,
block,
- (ulint)(log_bmp_sys->out_offset & 0xFFFFFFFF),
- (ulint)(log_bmp_sys->out_offset << 32),
+ (ulint)(log_bmp_sys->out.offset & 0xFFFFFFFF),
+ (ulint)(log_bmp_sys->out.offset << 32),
MODIFIED_PAGE_BLOCK_SIZE);
if (UNIV_UNLIKELY(!success)) {
/* The following call prints an error message */
os_file_get_last_error(TRUE);
fprintf(stderr, "InnoDB: Error: failed writing changed page "
- "bitmap file \'%s\'\n", log_bmp_sys->out_name);
+ "bitmap file \'%s\'\n", log_bmp_sys->out.name);
return;
}
- success = os_file_flush(log_bmp_sys->out, FALSE);
+ success = os_file_flush(log_bmp_sys->out.file, FALSE);
if (UNIV_UNLIKELY(!success)) {
/* The following call prints an error message */
os_file_get_last_error(TRUE);
fprintf(stderr, "InnoDB: Error: failed flushing "
"changed page bitmap file \'%s\'\n",
- log_bmp_sys->out_name);
+ log_bmp_sys->out.name);
return;
}
- log_bmp_sys->out_offset += MODIFIED_PAGE_BLOCK_SIZE;
+ log_bmp_sys->out.offset += MODIFIED_PAGE_BLOCK_SIZE;
}
/*********************************************************************//**
@@ -858,6 +1044,10 @@ log_online_write_bitmap()
ib_rbt_node_t *bmp_tree_node;
const ib_rbt_node_t *last_bmp_tree_node;
+ if (log_bmp_sys->out.offset >= srv_max_bitmap_file_size) {
+ log_online_rotate_bitmap_file(log_bmp_sys->start_lsn);
+ }
+
bmp_tree_node = (ib_rbt_node_t *)
rbt_first(log_bmp_sys->modified_pages);
last_bmp_tree_node = rbt_last(log_bmp_sys->modified_pages);
@@ -930,47 +1120,306 @@ log_online_follow_redo_log()
}
/*********************************************************************//**
-Initializes log bitmap iterator.
+List the bitmap files in srv_data_home and setup their range that contains the
+specified LSN interval. This range, if non-empty, will start with a file that
+has the greatest LSN equal to or less than the start LSN and will include all
+the files up to the one with the greatest LSN less than the end LSN. Caller
+must free bitmap_files->files when done if bitmap_files set to non-NULL and
+this function returned TRUE. Field bitmap_files->count might be set to a
+larger value than the actual count of the files, and space for the unused array
+slots will be allocated but cleared to zeroes.
+
+@return TRUE if succeeded
+*/
+static
+ibool
+log_online_setup_bitmap_file_range(
+/*===============================*/
+ log_online_bitmap_file_range_t *bitmap_files, /*!<in/out: bitmap file
+ range */
+ ib_uint64_t range_start, /*!<in: start LSN */
+ ib_uint64_t range_end) /*!<in: end LSN */
+{
+ os_file_dir_t bitmap_dir;
+ os_file_stat_t bitmap_dir_file_info;
+ ulong first_file_seq_num = ULONG_MAX;
+ ib_uint64_t first_file_start_lsn = IB_ULONGLONG_MAX;
+
+ bitmap_files->count = 0;
+ bitmap_files->files = NULL;
+
+ /* 1st pass: size the info array */
+
+ bitmap_dir = os_file_opendir(srv_data_home, FALSE);
+ if (!bitmap_dir) {
+ fprintf(stderr,
+ "InnoDB: Error: "
+ "failed to open bitmap directory \'%s\'\n",
+ srv_data_home);
+ return FALSE;
+ }
+
+ while (!os_file_readdir_next_file(srv_data_home, bitmap_dir,
+ &bitmap_dir_file_info)) {
+
+ ulong file_seq_num;
+ ib_uint64_t file_start_lsn;
+
+ if (!log_online_is_bitmap_file(&bitmap_dir_file_info,
+ &file_seq_num,
+ &file_start_lsn)
+ || file_start_lsn >= range_end) {
+
+ continue;
+ }
+
+ if (file_start_lsn >= range_start
+ || file_start_lsn == first_file_start_lsn
+ || first_file_start_lsn > range_start) {
+
+ /* A file that falls into the range */
+ bitmap_files->count++;
+ if (file_start_lsn < first_file_start_lsn) {
+
+ first_file_start_lsn = file_start_lsn;
+ }
+ if (file_seq_num < first_file_seq_num) {
+
+ first_file_seq_num = file_seq_num;
+ }
+ } else if (file_start_lsn > first_file_start_lsn) {
+
+ /* A file that has LSN closer to the range start
+ but smaller than it, replacing another such file */
+ first_file_start_lsn = file_start_lsn;
+ first_file_seq_num = file_seq_num;
+ }
+ }
+
+ ut_a(first_file_seq_num != ULONG_MAX || bitmap_files->count == 0);
+
+ if (os_file_closedir(bitmap_dir)) {
+ os_file_get_last_error(TRUE);
+ fprintf(stderr, "InnoDB: Error: cannot close \'%s\'\n",
+ srv_data_home);
+ return FALSE;
+ }
+
+ if (!bitmap_files->count) {
+ return TRUE;
+ }
+
+ /* 2nd pass: get the file names in the file_seq_num order */
+
+ bitmap_dir = os_file_opendir(srv_data_home, FALSE);
+ if (!bitmap_dir) {
+ fprintf(stderr, "InnoDB: Error: "
+ "failed to open bitmap directory \'%s\'\n",
+ srv_data_home);
+ return FALSE;
+ }
+
+ bitmap_files->files = ut_malloc(bitmap_files->count
+ * sizeof(bitmap_files->files[0]));
+ memset(bitmap_files->files, 0,
+ bitmap_files->count * sizeof(bitmap_files->files[0]));
+
+ while (!os_file_readdir_next_file(srv_data_home, bitmap_dir,
+ &bitmap_dir_file_info)) {
+
+ ulong file_seq_num;
+ ib_uint64_t file_start_lsn;
+ size_t array_pos;
+
+ if (!log_online_is_bitmap_file(&bitmap_dir_file_info,
+ &file_seq_num,
+ &file_start_lsn)
+ || file_start_lsn >= range_end
+ || file_start_lsn < first_file_start_lsn) {
+ continue;
+ }
+
+ array_pos = file_seq_num - first_file_seq_num;
+ if (file_seq_num > bitmap_files->files[array_pos].seq_num) {
+ bitmap_files->files[array_pos].seq_num = file_seq_num;
+ strncpy(bitmap_files->files[array_pos].name,
+ bitmap_dir_file_info.name, FN_REFLEN);
+ bitmap_files->files[array_pos].name[FN_REFLEN - 1]
+ = '\0';
+ bitmap_files->files[array_pos].start_lsn
+ = file_start_lsn;
+ }
+ }
+
+ if (os_file_closedir(bitmap_dir)) {
+ os_file_get_last_error(TRUE);
+ fprintf(stderr, "InnoDB: Error: cannot close \'%s\'\n",
+ srv_data_home);
+ free(bitmap_files->files);
+ return FALSE;
+ }
+
+#ifdef UNIV_DEBUG
+ ut_ad(bitmap_files->files[0].seq_num == first_file_seq_num);
+ ut_ad(bitmap_files->files[0].start_lsn == first_file_start_lsn);
+ {
+ size_t i;
+ for (i = 1; i < bitmap_files->count; i++) {
+ if (!bitmap_files->files[i].seq_num) {
+ break;
+ }
+ ut_ad(bitmap_files->files[i].seq_num
+ > bitmap_files->files[i - 1].seq_num);
+ ut_ad(bitmap_files->files[i].start_lsn
+ >= bitmap_files->files[i - 1].start_lsn);
+ }
+ }
+#endif
+
+ return TRUE;
+}
+
+/****************************************************************//**
+Open a bitmap file for reading.
+
+@return TRUE if opened successfully */
+static
+ibool
+log_online_open_bitmap_file_read_only(
+/*==================================*/
+ const char* name, /*!<in: bitmap file
+ name without directory,
+ which is assumed to be
+ srv_data_home */
+ log_online_bitmap_file_t* bitmap_file) /*!<out: opened bitmap
+ file */
+{
+ ibool success = FALSE;
+ ulint size_low;
+ ulint size_high;
+
+ ut_snprintf(bitmap_file->name, FN_REFLEN, "%s%s", srv_data_home, name);
+ bitmap_file->file
+ = os_file_create_simple_no_error_handling(innodb_file_bmp_key,
+ bitmap_file->name,
+ OS_FILE_OPEN,
+ OS_FILE_READ_ONLY,
+ &success);
+ if (!success) {
+ /* Here and below assume that bitmap file names do not
+ contain apostrophes, thus no need for ut_print_filename(). */
+ fprintf(stderr,
+ "InnoDB: Warning: error opening the changed page "
+ "bitmap \'%s\'\n", bitmap_file->name);
+ return FALSE;
+ }
+
+ success = os_file_get_size(bitmap_file->file, &size_low, &size_high);
+ bitmap_file->size = (((ib_uint64_t)size_high) << 32) | size_low;
+ bitmap_file->offset = 0;
+
+#ifdef UNIV_LINUX
+ posix_fadvise(bitmap_file->file, 0, 0, POSIX_FADV_SEQUENTIAL);
+ posix_fadvise(bitmap_file->file, 0, 0, POSIX_FADV_NOREUSE);
+#endif
+
+ return TRUE;
+}
+
+/****************************************************************//**
+Diagnose one or both of the following situations if we read close to
+the end of bitmap file:
+1) Warn if the remainder of the file is less than one page.
+2) Error if we cannot read any more full pages but the last read page
+did not have the last-in-run flag set.
+
+@return FALSE for the error */
+static
+ibool
+log_online_diagnose_bitmap_eof(
+/*===========================*/
+ const log_online_bitmap_file_t* bitmap_file, /*!< in: bitmap file */
+ ibool last_page_in_run)/*!< in: "last page in
+ run" flag value in the
+ last read page */
+{
+ /* Check if we are too close to EOF to read a full page */
+ if ((bitmap_file->size < MODIFIED_PAGE_BLOCK_SIZE)
+ || (bitmap_file->offset
+ > bitmap_file->size - MODIFIED_PAGE_BLOCK_SIZE)) {
+
+ if (bitmap_file->offset != bitmap_file->size) {
+ /* If we are not at EOF and we have less than one page
+ to read, it's junk. This error is not fatal in
+ itself. */
+
+ fprintf(stderr,
+ "InnoDB: Warning: junk at the end of changed "
+ "page bitmap file \'%s\'.\n",
+ bitmap_file->name);
+ }
+
+ if (!last_page_in_run) {
+ /* We are at EOF but the last read page did not finish
+ a run */
+ /* It's a "Warning" here because it's not a fatal error
+ for the whole server */
+ fprintf(stderr,
+ "InnoDB: Warning: changed page bitmap "
+ "file \'%s\' does not contain a complete run "
+ "at the end.\n", bitmap_file->name);
+ return FALSE;
+ }
+ }
+ return TRUE;
+}
+
+/*********************************************************************//**
+Initialize the log bitmap iterator for a given range. The records are
+processed at a bitmap block granularity, i.e. all the records in the same block
+share the same start and end LSN values, the exact LSN of each record is
+unavailable (nor is it defined for blocks that are touched more than once in
+the LSN interval contained in the block). Thus min_lsn and max_lsn should be
+set at block boundaries or bigger, otherwise the records at the 1st and the
+last blocks will not be returned. Also note that there might be returned
+records with LSN < min_lsn, as min_lsn is used to select the correct starting
+file but not block.
+
@return TRUE if the iterator is initialized OK, FALSE otherwise. */
UNIV_INTERN
ibool
log_online_bitmap_iterator_init(
/*============================*/
- log_bitmap_iterator_t *i) /*!<in/out: iterator */
+ log_bitmap_iterator_t *i, /*!<in/out: iterator */
+ ib_uint64_t min_lsn,/*!< in: start LSN */
+ ib_uint64_t max_lsn)/*!< in: end LSN */
{
- ibool success;
-
ut_a(i);
- ut_snprintf(i->in_name, FN_REFLEN, "%s%s%d", srv_data_home,
- modified_page_stem, 1);
- i->in_offset = 0;
- /*
- Set up bit offset out of the reasonable limit
- to intiate reading block from file in
- log_online_bitmap_iterator_next()
- */
- i->bit_offset = MODIFIED_PAGE_BLOCK_BITMAP_LEN;
- i->in =
- os_file_create_simple_no_error_handling(innodb_file_bmp_key,
- i->in_name,
- OS_FILE_OPEN,
- OS_FILE_READ_ONLY,
- &success);
- if (!success) {
- /* The following call prints an error message */
- os_file_get_last_error(TRUE);
- fprintf(stderr,
- "InnoDB: Error: Cannot open \'%s\'\n",
- i->in_name);
+ if (!log_online_setup_bitmap_file_range(&i->in_files, min_lsn,
+ max_lsn)) {
+
return FALSE;
}
- i->page = ut_malloc(MODIFIED_PAGE_BLOCK_SIZE);
+ ut_a(i->in_files.count > 0);
+
+ /* Open the 1st bitmap file */
+ i->in_i = 0;
+ if (!log_online_open_bitmap_file_read_only(i->in_files.files[i->in_i].
+ name,
+ &i->in)) {
+ i->in_i = i->in_files.count;
+ free(i->in_files.files);
+ return FALSE;
+ }
+ i->page = ut_malloc(MODIFIED_PAGE_BLOCK_SIZE);
+ i->bit_offset = MODIFIED_PAGE_BLOCK_BITMAP_LEN;
i->start_lsn = i->end_lsn = 0;
i->space_id = 0;
i->first_page_id = 0;
+ i->last_page_in_run = TRUE;
i->changed = FALSE;
return TRUE;
@@ -985,7 +1434,11 @@ log_online_bitmap_iterator_release(
log_bitmap_iterator_t *i) /*!<in/out: iterator */
{
ut_a(i);
- os_file_close(i->in);
+
+ if (i->in_i < i->in_files.count) {
+ os_file_close(i->in.file);
+ }
+ ut_free(i->in_files.files);
ut_free(i->page);
}
@@ -1000,14 +1453,7 @@ log_online_bitmap_iterator_next(
/*============================*/
log_bitmap_iterator_t *i) /*!<in/out: iterator */
{
- ulint offset_low;
- ulint offset_high;
- ulint size_low;
- ulint size_high;
- ulint checksum = 0;
- ulint actual_checksum = !checksum;
-
- ibool success;
+ ibool checksum_ok = FALSE;
ut_a(i);
@@ -1020,66 +1466,51 @@ log_online_bitmap_iterator_next(
return TRUE;
}
- while (checksum != actual_checksum)
+ while (!checksum_ok)
{
- success = os_file_get_size(i->in,
- &size_low,
- &size_high);
- if (!success) {
- os_file_get_last_error(TRUE);
- fprintf(stderr,
- "InnoDB: Warning: can't get size of "
- "page bitmap file \'%s\'\n",
- i->in_name);
- return FALSE;
- }
-
- if (i->in_offset >=
- (ib_uint64_t)(size_low) +
- ((ib_uint64_t)(size_high) << 32))
- return FALSE;
-
- offset_high = (ulint)(i->in_offset >> 32);
- offset_low = (ulint)(i->in_offset & 0xFFFFFFFF);
+ while (i->in.size < MODIFIED_PAGE_BLOCK_SIZE
+ || (i->in.offset
+ > i->in.size - MODIFIED_PAGE_BLOCK_SIZE)) {
+
+ /* Advance file */
+ i->in_i++;
+ os_file_close(i->in.file);
+ log_online_diagnose_bitmap_eof(&i->in,
+ i->last_page_in_run);
+ if (i->in_i == i->in_files.count
+ || i->in_files.files[i->in_i].seq_num == 0) {
+
+ return FALSE;
+ }
- success = os_file_read(
- i->in,
- i->page,
- offset_low,
- offset_high,
- MODIFIED_PAGE_BLOCK_SIZE);
+ if (!log_online_open_bitmap_file_read_only(
+ i->in_files.files[i->in_i].name,
+ &i->in)) {
+ return FALSE;
+ }
+ }
- if (!success) {
+ if (!log_online_read_bitmap_page(&i->in, i->page,
+ &checksum_ok)) {
os_file_get_last_error(TRUE);
fprintf(stderr,
"InnoDB: Warning: failed reading "
"changed page bitmap file \'%s\'\n",
- i->in_name);
+ i->in_files.files[i->in_i].name);
return FALSE;
}
-
- checksum = mach_read_from_4(
- i->page + MODIFIED_PAGE_BLOCK_CHECKSUM);
-
- actual_checksum = log_online_calc_checksum(i->page);
-
- i->in_offset += MODIFIED_PAGE_BLOCK_SIZE;
}
- i->start_lsn =
- mach_read_from_8(i->page + MODIFIED_PAGE_START_LSN);
- i->end_lsn =
- mach_read_from_8(i->page + MODIFIED_PAGE_END_LSN);
- i->space_id =
- mach_read_from_4(i->page + MODIFIED_PAGE_SPACE_ID);
- i->first_page_id =
- mach_read_from_4(i->page + MODIFIED_PAGE_1ST_PAGE_ID);
- i->bit_offset =
- 0;
- i->changed =
- IS_BIT_SET(i->page + MODIFIED_PAGE_BLOCK_BITMAP,
- i->bit_offset);
+ i->start_lsn = mach_read_from_8(i->page + MODIFIED_PAGE_START_LSN);
+ i->end_lsn = mach_read_from_8(i->page + MODIFIED_PAGE_END_LSN);
+ i->space_id = mach_read_from_4(i->page + MODIFIED_PAGE_SPACE_ID);
+ i->first_page_id = mach_read_from_4(i->page
+ + MODIFIED_PAGE_1ST_PAGE_ID);
+ i->last_page_in_run = mach_read_from_4(i->page
+ + MODIFIED_PAGE_IS_LAST_BLOCK);
+ i->bit_offset = 0;
+ i->changed = IS_BIT_SET(i->page + MODIFIED_PAGE_BLOCK_BITMAP,
+ i->bit_offset);
return TRUE;
}
-
diff --git a/storage/xtradb/os/os0file.c b/storage/xtradb/os/os0file.c
index 2555c010027..8e0516a84a9 100644
--- a/storage/xtradb/os/os0file.c
+++ b/storage/xtradb/os/os0file.c
@@ -2158,8 +2158,10 @@ os_file_set_eof_at(
ib_uint64_t new_len)/*!< in: new file length */
{
#ifdef __WIN__
- /* TODO: untested! */
- return(!_chsize_s(file, new_len));
+ LARGE_INTEGER li, li2;
+ li.QuadPart = new_len;
+ return(SetFilePointerEx(file, li, &li2,FILE_BEGIN)
+ && SetEndOfFile(file));
#else
/* TODO: works only with -D_FILE_OFFSET_BITS=64 ? */
return(!ftruncate(file, new_len));
diff --git a/storage/xtradb/row/row0ins.c b/storage/xtradb/row/row0ins.c
index 3ae4c227ddc..61c3720fa2e 100644
--- a/storage/xtradb/row/row0ins.c
+++ b/storage/xtradb/row/row0ins.c
@@ -2012,7 +2012,10 @@ row_ins_index_entry_low(
the function will return in both low_match and up_match of the
cursor sensible values */
- if (dict_index_is_clust(index)) {
+ if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) {
+ search_mode = (mode & BTR_MODIFY_TREE)
+ ? BTR_SEARCH_TREE : BTR_SEARCH_LEAF;
+ } else if (dict_index_is_clust(index)) {
search_mode = mode;
} else if (!(thr_get_trx(thr)->check_unique_secondary)) {
search_mode = mode | BTR_INSERT | BTR_IGNORE_SEC_UNIQUE;
@@ -2021,7 +2024,7 @@ row_ins_index_entry_low(
}
btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
- thr_get_trx(thr)->fake_changes ? BTR_SEARCH_LEAF : search_mode,
+ search_mode,
&cursor, 0, __FILE__, __LINE__, &mtr);
if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
diff --git a/storage/xtradb/row/row0mysql.c b/storage/xtradb/row/row0mysql.c
index 575160501c3..9ab85940760 100644
--- a/storage/xtradb/row/row0mysql.c
+++ b/storage/xtradb/row/row0mysql.c
@@ -1277,17 +1277,19 @@ run_again:
que_thr_stop_for_mysql_no_error(thr, trx);
- prebuilt->table->stat_n_rows++;
+ if (UNIV_LIKELY(!(trx->fake_changes))) {
- srv_n_rows_inserted++;
+ prebuilt->table->stat_n_rows++;
- if (prebuilt->table->stat_n_rows == 0) {
- /* Avoid wrap-over */
- prebuilt->table->stat_n_rows--;
+ if (prebuilt->table->stat_n_rows == 0) {
+ /* Avoid wrap-over */
+ prebuilt->table->stat_n_rows--;
+ }
+
+ srv_n_rows_inserted++;
+ row_update_statistics_if_needed(prebuilt->table);
}
- if (!(trx->fake_changes))
- row_update_statistics_if_needed(prebuilt->table);
trx->op_info = "";
return((int) err);
@@ -1534,6 +1536,11 @@ run_again:
que_thr_stop_for_mysql_no_error(thr, trx);
+ if (UNIV_UNLIKELY(trx->fake_changes)) {
+ trx->op_info = "";
+ return((int) err);
+ }
+
if (node->is_delete) {
if (prebuilt->table->stat_n_rows > 0) {
prebuilt->table->stat_n_rows--;
@@ -1548,7 +1555,6 @@ run_again:
that changes indexed columns, UPDATEs that change only non-indexed
columns would not affect statistics. */
if (node->is_delete || !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {
- if (!(trx->fake_changes))
row_update_statistics_if_needed(prebuilt->table);
}
@@ -1756,6 +1762,11 @@ run_again:
return(err);
}
+ if (UNIV_UNLIKELY((trx->fake_changes))) {
+
+ return(err);
+ }
+
if (node->is_delete) {
if (table->stat_n_rows > 0) {
table->stat_n_rows--;
@@ -1766,7 +1777,6 @@ run_again:
srv_n_rows_updated++;
}
- if (!(trx->fake_changes))
row_update_statistics_if_needed(table);
return(err);
diff --git a/storage/xtradb/row/row0upd.c b/storage/xtradb/row/row0upd.c
index 859b3d415ab..b2a0eb57669 100644
--- a/storage/xtradb/row/row0upd.c
+++ b/storage/xtradb/row/row0upd.c
@@ -2018,7 +2018,8 @@ row_upd_clust_rec(
the same transaction do not modify the record in the meantime.
Therefore we can assert that the restoration of the cursor succeeds. */
- ut_a(btr_pcur_restore_position(thr_get_trx(thr)->fake_changes ? BTR_SEARCH_LEAF : BTR_MODIFY_TREE,
+ ut_a(btr_pcur_restore_position(thr_get_trx(thr)->fake_changes
+ ? BTR_SEARCH_TREE : BTR_MODIFY_TREE,
pcur, mtr));
ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur),
diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c
index 9d479ac6c87..6e210071746 100644
--- a/storage/xtradb/srv/srv0srv.c
+++ b/storage/xtradb/srv/srv0srv.c
@@ -179,8 +179,14 @@ UNIV_INTERN ibool srv_recovery_stats = FALSE;
UNIV_INTERN my_bool srv_track_changed_pages = TRUE;
+UNIV_INTERN ib_uint64_t srv_max_bitmap_file_size = 100 * 1024 * 1024;
+
UNIV_INTERN ulonglong srv_changed_pages_limit = 0;
+/** When TRUE, fake change transcations take S rather than X row locks.
+ When FALSE, row locks are not taken at all. */
+UNIV_INTERN my_bool srv_fake_changes_locks = TRUE;
+
/* if TRUE, then we auto-extend the last data file */
UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
/* if != 0, this tells the max size auto-extending may increase the
diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c
index 65a775b56da..2faa68cb87c 100644
--- a/storage/xtradb/srv/srv0start.c
+++ b/storage/xtradb/srv/srv0start.c
@@ -1148,6 +1148,24 @@ skip_size_check:
return(DB_SUCCESS);
}
+/*********************************************************************//**
+Initializes the log tracking subsystem and starts its thread. */
+static
+void
+init_log_online(void)
+/*=================*/
+{
+ if (srv_track_changed_pages) {
+
+ log_online_read_init();
+
+ /* Create the thread that follows the redo log to output the
+ changed page bitmap */
+ os_thread_create(&srv_redo_log_follow_thread, NULL,
+ thread_ids + 5 + SRV_MAX_N_IO_THREADS);
+ }
+}
+
/********************************************************************
Starts InnoDB and creates a new database if database files
are not found and the user wants.
@@ -1794,6 +1812,8 @@ innobase_start_or_create_for_mysql(void)
trx_sys_file_format_init();
if (create_new_db) {
+ init_log_online();
+
mtr_start(&mtr);
fsp_header_init(0, sum_of_new_sizes, &mtr);
@@ -1893,6 +1913,8 @@ innobase_start_or_create_for_mysql(void)
return(DB_ERROR);
}
+ init_log_online();
+
/* Since the insert buffer init is in dict_boot, and the
insert buffer is needed in any disk i/o, first we call
dict_boot(). Note that trx_sys_init_at_db_start() only needs
@@ -2040,19 +2062,6 @@ innobase_start_or_create_for_mysql(void)
if (srv_auto_lru_dump && srv_blocking_lru_restore)
buf_LRU_file_restore();
- if (srv_track_changed_pages) {
-
- /* Initialize the log tracking subsystem here to block
- server startup until it's completed due to the potential
- need to re-read previous server run's log. */
- log_online_read_init();
-
- /* Create the thread that follows the redo log to output the
- changed page bitmap */
- os_thread_create(&srv_redo_log_follow_thread, NULL,
- thread_ids + 6 + SRV_MAX_N_IO_THREADS);
- }
-
srv_is_being_started = FALSE;
err = dict_create_or_check_foreign_constraint_tables();