summaryrefslogtreecommitdiff
path: root/storage/innobase
diff options
context:
space:
mode:
authorSergei Golubchik <sergii@pisem.net>2013-04-15 15:09:22 +0200
committerSergei Golubchik <sergii@pisem.net>2013-04-15 15:09:22 +0200
commita9035be5b7a7b3865ddb4ef34a5d0cfc65dfc254 (patch)
treea9df7341e91623f62fe37cd47fce139d8888fc95 /storage/innobase
parent3a1c91d87d69ef243b3e78be6089102cafef0a8e (diff)
parentf57ecb7786177e0af3b1e3ec94302720b2e0f967 (diff)
downloadmariadb-git-a9035be5b7a7b3865ddb4ef34a5d0cfc65dfc254.tar.gz
10.0-base merge
Diffstat (limited to 'storage/innobase')
-rw-r--r--storage/innobase/btr/btr0btr.cc38
-rw-r--r--storage/innobase/btr/btr0cur.cc8
-rw-r--r--storage/innobase/buf/buf0buf.cc148
-rw-r--r--storage/innobase/buf/buf0flu.cc24
-rw-r--r--storage/innobase/buf/buf0lru.cc42
-rw-r--r--storage/innobase/dict/dict0load.cc3
-rw-r--r--storage/innobase/fil/fil0fil.cc84
-rw-r--r--storage/innobase/handler/ha_innodb.cc199
-rw-r--r--storage/innobase/handler/handler0alter.cc6
-rw-r--r--storage/innobase/include/btr0cur.h5
-rw-r--r--storage/innobase/include/btr0cur.ic13
-rw-r--r--storage/innobase/include/buf0buf.h29
-rw-r--r--storage/innobase/include/buf0buf.ic8
-rw-r--r--storage/innobase/include/buf0lru.h5
-rw-r--r--storage/innobase/include/data0type.ic31
-rw-r--r--storage/innobase/include/fil0fil.h15
-rw-r--r--storage/innobase/include/lock0lock.h12
-rw-r--r--storage/innobase/include/rem0rec.h80
-rw-r--r--storage/innobase/include/row0merge.h2
-rw-r--r--storage/innobase/include/srv0srv.h18
-rw-r--r--storage/innobase/include/univ.i7
-rw-r--r--storage/innobase/lock/lock0lock.cc99
-rw-r--r--storage/innobase/log/log0recv.cc15
-rw-r--r--storage/innobase/mysql-test/storage_engine/autoinc_secondary.rdiff2
-rw-r--r--storage/innobase/mysql-test/storage_engine/insert_delayed.rdiff6
-rw-r--r--storage/innobase/mysql-test/storage_engine/parts/repair_table.rdiff16
-rw-r--r--storage/innobase/mysql-test/storage_engine/repair_table.rdiff6
-rw-r--r--storage/innobase/mysql-test/storage_engine/type_char_indexes.rdiff8
-rw-r--r--storage/innobase/mysql-test/storage_engine/type_spatial_indexes.rdiff66
-rw-r--r--storage/innobase/mysql-test/storage_engine/vcol.rdiff8
-rw-r--r--storage/innobase/os/os0file.cc72
-rw-r--r--storage/innobase/rem/rem0rec.cc254
-rw-r--r--storage/innobase/row/row0ins.cc5
-rw-r--r--storage/innobase/row/row0merge.cc114
-rw-r--r--storage/innobase/row/row0mysql.cc7
-rw-r--r--storage/innobase/row/row0umod.cc4
-rw-r--r--storage/innobase/row/row0undo.cc2
-rw-r--r--storage/innobase/srv/srv0srv.cc22
-rw-r--r--storage/innobase/sync/sync0sync.cc4
-rw-r--r--storage/innobase/trx/trx0purge.cc10
-rw-r--r--storage/innobase/trx/trx0rec.cc19
41 files changed, 1073 insertions, 443 deletions
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index 8b7a19777ab..8eae3c7e3bc 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -3098,6 +3098,8 @@ btr_lift_page_up(
buf_block_t* blocks[BTR_MAX_LEVELS];
ulint n_blocks; /*!< last used index in blocks[] */
ulint i;
+ ibool lift_father_up = FALSE;
+ buf_block_t* block_orig = block;
ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
@@ -3108,11 +3110,13 @@ btr_lift_page_up(
{
btr_cur_t cursor;
- mem_heap_t* heap = mem_heap_create(100);
- ulint* offsets;
+ ulint* offsets = NULL;
+ mem_heap_t* heap = mem_heap_create(
+ sizeof(*offsets)
+ * (REC_OFFS_HEADER_SIZE + 1 + 1 + index->n_fields));
buf_block_t* b;
- offsets = btr_page_get_father_block(NULL, heap, index,
+ offsets = btr_page_get_father_block(offsets, heap, index,
block, mtr, &cursor);
father_block = btr_cur_get_block(&cursor);
father_page_zip = buf_block_get_page_zip(father_block);
@@ -3136,6 +3140,29 @@ btr_lift_page_up(
blocks[n_blocks++] = b = btr_cur_get_block(&cursor);
}
+ if (n_blocks && page_level == 0) {
+ /* The father page also should be the only on its level (not
+ root). We should lift up the father page at first.
+ Because the leaf page should be lifted up only for root page.
+ The freeing page is based on page_level (==0 or !=0)
+ to choose segment. If the page_level is changed ==0 from !=0,
+ later freeing of the page doesn't find the page allocation
+ to be freed.*/
+
+ lift_father_up = TRUE;
+ block = father_block;
+ page = buf_block_get_frame(block);
+ page_level = btr_page_get_level(page, mtr);
+
+ ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL);
+ ut_ad(btr_page_get_next(page, mtr) == FIL_NULL);
+ ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX));
+
+ father_block = blocks[0];
+ father_page_zip = buf_block_get_page_zip(father_block);
+ father_page = buf_block_get_frame(father_block);
+ }
+
mem_heap_free(heap);
}
@@ -3143,6 +3170,7 @@ btr_lift_page_up(
/* Make the father empty */
btr_page_empty(father_block, father_page_zip, index, page_level, mtr);
+ page_level++;
/* Copy the records to the father page one by one. */
if (0
@@ -3174,7 +3202,7 @@ btr_lift_page_up(
lock_update_copy_and_discard(father_block, block);
/* Go upward to root page, decrementing levels by one. */
- for (i = 0; i < n_blocks; i++, page_level++) {
+ for (i = lift_father_up ? 1 : 0; i < n_blocks; i++, page_level++) {
page_t* page = buf_block_get_frame(blocks[i]);
page_zip_des_t* page_zip= buf_block_get_page_zip(blocks[i]);
@@ -3196,7 +3224,7 @@ btr_lift_page_up(
ut_ad(page_validate(father_page, index));
ut_ad(btr_check_node_ptr(index, father_block, mtr));
- return(father_block);
+ return(lift_father_up ? block_orig : father_block);
}
/*************************************************************//**
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index aeb16200f80..56cce411bba 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -97,6 +97,11 @@ srv_refresh_innodb_monitor_stats(). Referenced by
srv_printf_innodb_monitor(). */
UNIV_INTERN ulint btr_cur_n_sea_old = 0;
+#ifdef UNIV_DEBUG
+/* Flag to limit optimistic insert records */
+UNIV_INTERN uint btr_cur_limit_optimistic_insert_debug = 0;
+#endif /* UNIV_DEBUG */
+
/** In the optimistic insert, if the insert does not fit, but this much space
can be released by page reorganize, then it is reorganized */
#define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32)
@@ -1276,6 +1281,9 @@ btr_cur_optimistic_insert(
}
}
+ LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page),
+ goto fail);
+
/* If there have been many consecutive inserts, and we are on the leaf
level, check if we have to split the page to reserve enough free space
for future updates of records. */
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 96821478e60..28d5a472531 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -383,6 +383,33 @@ buf_get_total_list_len(
}
/********************************************************************//**
+Get total list size in bytes from all buffer pools. */
+UNIV_INTERN
+void
+buf_get_total_list_size_in_bytes(
+/*=============================*/
+ buf_pools_list_size_t* buf_pools_list_size) /*!< out: list sizes
+ in all buffer pools */
+{
+ ulint i;
+ ut_ad(buf_pools_list_size);
+ memset(buf_pools_list_size, 0, sizeof(*buf_pools_list_size));
+
+ for (i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool_t* buf_pool;
+
+ buf_pool = buf_pool_from_array(i);
+ /* We don't need mutex protection since this is
+ for statistics purpose */
+ buf_pools_list_size->LRU_bytes += buf_pool->stat.LRU_bytes;
+ buf_pools_list_size->unzip_LRU_bytes +=
+ UT_LIST_GET_LEN(buf_pool->unzip_LRU) * UNIV_PAGE_SIZE;
+ buf_pools_list_size->flush_list_bytes +=
+ buf_pool->stat.flush_list_bytes;
+ }
+}
+
+/********************************************************************//**
Get total buffer pool statistics. */
UNIV_INTERN
void
@@ -1802,34 +1829,24 @@ buf_page_make_young(
}
/********************************************************************//**
-Sets the time of the first access of a page and moves a page to the
-start of the buffer pool LRU list if it is too old. This high-level
-function can be used to prevent an important page from slipping
-out of the buffer pool. */
+Moves a page to the start of the buffer pool LRU list if it is too old.
+This high-level function can be used to prevent an important page from
+slipping out of the buffer pool. */
static
void
-buf_page_set_accessed_make_young(
-/*=============================*/
- buf_page_t* bpage, /*!< in/out: buffer block of a
+buf_page_make_young_if_needed(
+/*==========================*/
+ buf_page_t* bpage) /*!< in/out: buffer block of a
file page */
- unsigned access_time) /*!< in: bpage->access_time
- read under mutex protection,
- or 0 if unknown */
{
+#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
-
ut_ad(!buf_pool_mutex_own(buf_pool));
+#endif /* UNIV_DEBUG */
ut_a(buf_page_in_file(bpage));
if (buf_page_peek_if_too_old(bpage)) {
- buf_pool_mutex_enter(buf_pool);
- buf_LRU_make_block_young(bpage);
- buf_pool_mutex_exit(buf_pool);
- } else if (!access_time) {
- ulint time_ms = ut_time_ms();
- buf_pool_mutex_enter(buf_pool);
- buf_page_set_accessed(bpage, time_ms);
- buf_pool_mutex_exit(buf_pool);
+ buf_page_make_young(bpage);
}
}
@@ -1978,7 +1995,6 @@ buf_page_get_zip(
rw_lock_t* hash_lock;
ibool discard_attempted = FALSE;
ibool must_read;
- unsigned access_time;
buf_pool_t* buf_pool = buf_pool_get(space, offset);
buf_pool->stat.n_page_gets++;
@@ -2051,15 +2067,17 @@ err_exit:
got_block:
must_read = buf_page_get_io_fix(bpage) == BUF_IO_READ;
- access_time = buf_page_is_accessed(bpage);
rw_lock_s_unlock(hash_lock);
#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
ut_a(!bpage->file_page_was_freed);
#endif
+
+ buf_page_set_accessed(bpage);
+
mutex_exit(block_mutex);
- buf_page_set_accessed_make_young(bpage, access_time);
+ buf_page_make_young_if_needed(bpage);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(++buf_dbg_counter % 5771 || buf_validate());
@@ -2671,22 +2689,29 @@ wait_until_unfixed:
UNIV_MEM_INVALID(bpage, sizeof *bpage);
rw_lock_x_unlock(hash_lock);
- mutex_exit(&block->mutex);
- mutex_exit(&buf_pool->zip_mutex);
buf_pool->n_pend_unzip++;
-
buf_pool_mutex_exit(buf_pool);
+ access_time = buf_page_is_accessed(&block->page);
+ mutex_exit(&block->mutex);
+ mutex_exit(&buf_pool->zip_mutex);
+
buf_page_free_descriptor(bpage);
- /* Decompress the page and apply buffered operations
- while not holding buf_pool->mutex or block->mutex. */
+ /* Decompress the page while not holding
+ buf_pool->mutex or block->mutex. */
ut_a(buf_zip_decompress(block, TRUE));
if (UNIV_LIKELY(!recv_no_ibuf_operations)) {
- ibuf_merge_or_delete_for_page(block, space, offset,
- zip_size, TRUE);
+ if (access_time) {
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ ut_a(ibuf_count_get(space, offset) == 0);
+#endif /* UNIV_IBUF_COUNT_DEBUG */
+ } else {
+ ibuf_merge_or_delete_for_page(
+ block, space, offset, zip_size, TRUE);
+ }
}
/* Unfix and unlatch the block. */
@@ -2799,14 +2824,15 @@ wait_until_unfixed:
ut_a(mode == BUF_GET_POSSIBLY_FREED
|| !block->page.file_page_was_freed);
#endif
- mutex_exit(&block->mutex);
-
/* Check if this is the first access to the page */
-
access_time = buf_page_is_accessed(&block->page);
- if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL)) {
- buf_page_set_accessed_make_young(&block->page, access_time);
+ buf_page_set_accessed(&block->page);
+
+ mutex_exit(&block->mutex);
+
+ if (mode != BUF_PEEK_IF_IN_POOL) {
+ buf_page_make_young_if_needed(&block->page);
}
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
@@ -2857,7 +2883,7 @@ wait_until_unfixed:
mtr_memo_push(mtr, block, fix_type);
- if (UNIV_LIKELY(mode != BUF_PEEK_IF_IN_POOL) && !access_time) {
+ if (mode != BUF_PEEK_IF_IN_POOL && !access_time) {
/* In the case of a first access, try to apply linear
read-ahead */
@@ -2912,15 +2938,13 @@ buf_page_optimistic_get(
buf_block_buf_fix_inc(block, file, line);
- mutex_exit(&block->mutex);
+ access_time = buf_page_is_accessed(&block->page);
- /* Check if this is the first access to the page.
- We do a dirty read on purpose, to avoid mutex contention.
- This field is only used for heuristic purposes; it does not
- affect correctness. */
+ buf_page_set_accessed(&block->page);
- access_time = buf_page_is_accessed(&block->page);
- buf_page_set_accessed_make_young(&block->page, access_time);
+ mutex_exit(&block->mutex);
+
+ buf_page_make_young_if_needed(&block->page);
ut_ad(!ibuf_inside(mtr)
|| ibuf_page(buf_block_get_space(block),
@@ -2975,7 +2999,7 @@ buf_page_optimistic_get(
mutex_exit(&block->mutex);
#endif
- if (UNIV_UNLIKELY(!access_time)) {
+ if (!access_time) {
/* In the case of a first access, try to apply linear
read-ahead */
@@ -3038,24 +3062,14 @@ buf_page_get_known_nowait(
buf_block_buf_fix_inc(block, file, line);
+ buf_page_set_accessed(&block->page);
+
mutex_exit(&block->mutex);
buf_pool = buf_pool_from_block(block);
- if (mode == BUF_MAKE_YOUNG && buf_page_peek_if_too_old(&block->page)) {
- buf_pool_mutex_enter(buf_pool);
- buf_LRU_make_block_young(&block->page);
- buf_pool_mutex_exit(buf_pool);
- } else if (!buf_page_is_accessed(&block->page)) {
- /* Above, we do a dirty read on purpose, to avoid
- mutex contention. The field buf_page_t::access_time
- is only used for heuristic purposes. Writes to the
- field must be protected by mutex, however. */
- ulint time_ms = ut_time_ms();
-
- buf_pool_mutex_enter(buf_pool);
- buf_page_set_accessed(&block->page, time_ms);
- buf_pool_mutex_exit(buf_pool);
+ if (mode == BUF_MAKE_YOUNG) {
+ buf_page_make_young_if_needed(&block->page);
}
ut_ad(!ibuf_inside(mtr) || mode == BUF_KEEP_OLD);
@@ -3234,6 +3248,7 @@ buf_page_init(
ulint offset, /*!< in: offset of the page within space
in units of a page */
ulint fold, /*!< in: buf_page_address_fold(space,offset) */
+ ulint zip_size,/*!< in: compressed page size, or 0 */
buf_block_t* block) /*!< in/out: block to init */
{
buf_page_t* hash_page;
@@ -3302,6 +3317,9 @@ buf_page_init(
ut_d(block->page.in_page_hash = TRUE);
HASH_INSERT(buf_page_t, hash, buf_pool->page_hash,
fold, &block->page);
+ if (zip_size) {
+ page_zip_set_size(&block->page.zip, zip_size);
+ }
}
/********************************************************************//**
@@ -3407,7 +3425,7 @@ err_exit:
ut_ad(buf_pool_from_bpage(bpage) == buf_pool);
- buf_page_init(buf_pool, space, offset, fold, block);
+ buf_page_init(buf_pool, space, offset, fold, zip_size, block);
rw_lock_x_unlock(hash_lock);
/* The block must be put to the LRU list, to the old blocks */
@@ -3426,8 +3444,6 @@ err_exit:
buf_page_set_io_fix(bpage, BUF_IO_READ);
if (zip_size) {
- page_zip_set_size(&block->page.zip, zip_size);
-
/* buf_pool->mutex may be released and
reacquired by buf_buddy_alloc(). Thus, we
must release block->mutex in order not to
@@ -3528,7 +3544,8 @@ err_exit:
rw_lock_x_unlock(hash_lock);
- /* The block must be put to the LRU list, to the old blocks */
+ /* The block must be put to the LRU list, to the old blocks
+ The zip_size is already set into the page zip */
buf_LRU_add_block(bpage, TRUE/* to old blocks */);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
buf_LRU_insert_zip_clean(bpage);
@@ -3578,7 +3595,6 @@ buf_page_create(
buf_block_t* block;
ulint fold;
buf_block_t* free_block = NULL;
- ulint time_ms = ut_time_ms();
buf_pool_t* buf_pool = buf_pool_get(space, offset);
rw_lock_t* hash_lock;
@@ -3630,7 +3646,7 @@ buf_page_create(
mutex_enter(&block->mutex);
- buf_page_init(buf_pool, space, offset, fold, block);
+ buf_page_init(buf_pool, space, offset, fold, zip_size, block);
rw_lock_x_unlock(hash_lock);
@@ -3650,8 +3666,6 @@ buf_page_create(
buf_page_set_io_fix(&block->page, BUF_IO_READ);
rw_lock_x_lock(&block->lock);
-
- page_zip_set_size(&block->page.zip, zip_size);
mutex_exit(&block->mutex);
/* buf_pool->mutex may be released and reacquired by
buf_buddy_alloc(). Thus, we must release block->mutex
@@ -3675,12 +3689,12 @@ buf_page_create(
rw_lock_x_unlock(&block->lock);
}
- buf_page_set_accessed(&block->page, time_ms);
-
buf_pool_mutex_exit(buf_pool);
mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX);
+ buf_page_set_accessed(&block->page);
+
mutex_exit(&block->mutex);
/* Delete possible entries for the page from the insert buffer:
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 023ed766c62..577878ef964 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -104,6 +104,23 @@ in thrashing. */
/* @} */
+/******************************************************************//**
+Increases flush_list size in bytes with zip_size for compressed page,
+UNIV_PAGE_SIZE for uncompressed page in inline function */
+static inline
+void
+incr_flush_list_size_in_bytes(
+/*==========================*/
+ buf_block_t* block, /*!< in: control block */
+ buf_pool_t* buf_pool) /*!< in: buffer pool instance */
+{
+ ulint zip_size;
+ ut_ad(buf_flush_list_mutex_own(buf_pool));
+ zip_size = page_zip_get_size(&block->page.zip);
+ buf_pool->stat.flush_list_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
+ ut_ad(buf_pool->stat.flush_list_bytes <= buf_pool->curr_pool_size);
+}
+
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/******************************************************************//**
Validates the flush list.
@@ -333,6 +350,7 @@ buf_flush_insert_into_flush_list(
ut_d(block->page.in_flush_list = TRUE);
block->page.oldest_modification = lsn;
UT_LIST_ADD_FIRST(list, buf_pool->flush_list, &block->page);
+ incr_flush_list_size_in_bytes(block, buf_pool);
#ifdef UNIV_DEBUG_VALGRIND
{
@@ -439,6 +457,8 @@ buf_flush_insert_sorted_into_flush_list(
MONITOR_INC(MONITOR_PAGE_INFLUSH);
+ incr_flush_list_size_in_bytes(block, buf_pool);
+
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
ut_a(buf_flush_validate_low(buf_pool));
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
@@ -538,6 +558,7 @@ buf_flush_remove(
buf_page_t* bpage) /*!< in: pointer to the block in question */
{
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ ulint zip_size;
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
@@ -576,6 +597,9 @@ buf_flush_remove(
because we assert on in_flush_list in comparison function. */
ut_d(bpage->in_flush_list = FALSE);
+ zip_size = page_zip_get_size(&bpage->zip);
+ buf_pool->stat.flush_list_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
+
bpage->oldest_modification = 0;
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc
index c35d84cb985..5f0c0cae96c 100644
--- a/storage/innobase/buf/buf0lru.cc
+++ b/storage/innobase/buf/buf0lru.cc
@@ -158,6 +158,23 @@ buf_LRU_block_free_hashed_page(
be in a state where it can be freed */
/******************************************************************//**
+Increases LRU size in bytes with zip_size for compressed page,
+UNIV_PAGE_SIZE for uncompressed page in inline function */
+static inline
+void
+incr_LRU_size_in_bytes(
+/*===================*/
+ buf_page_t* bpage, /*!< in: control block */
+ buf_pool_t* buf_pool) /*!< in: buffer pool instance */
+{
+ ulint zip_size;
+ ut_ad(buf_pool_mutex_own(buf_pool));
+ zip_size = page_zip_get_size(&bpage->zip);
+ buf_pool->stat.LRU_bytes += zip_size ? zip_size : UNIV_PAGE_SIZE;
+ ut_ad(buf_pool->stat.LRU_bytes <= buf_pool->curr_pool_size);
+}
+
+/******************************************************************//**
Determines if the unzip_LRU list should be used for evicting a victim
instead of the general LRU list.
@return TRUE if should use unzip_LRU */
@@ -1107,6 +1124,7 @@ buf_LRU_remove_block(
buf_page_t* bpage) /*!< in: control block */
{
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ ulint zip_size;
ut_ad(buf_pool);
ut_ad(bpage);
@@ -1142,6 +1160,9 @@ buf_LRU_remove_block(
UT_LIST_REMOVE(LRU, buf_pool->LRU, bpage);
ut_d(bpage->in_LRU_list = FALSE);
+ zip_size = page_zip_get_size(&bpage->zip);
+ buf_pool->stat.LRU_bytes -= zip_size ? zip_size : UNIV_PAGE_SIZE;
+
buf_unzip_LRU_remove_block_if_needed(bpage);
/* If the LRU list is so short that LRU_old is not defined,
@@ -1202,7 +1223,10 @@ buf_unzip_LRU_add_block(
}
/******************************************************************//**
-Adds a block to the LRU list end. */
+Adds a block to the LRU list end. Please make sure that the zip_size is
+already set into the page zip when invoking the function, so that we
+can get correct zip_size from the buffer page when adding a block
+into LRU */
UNIV_INLINE
void
buf_LRU_add_block_to_end_low(
@@ -1221,6 +1245,8 @@ buf_LRU_add_block_to_end_low(
UT_LIST_ADD_LAST(LRU, buf_pool->LRU, bpage);
ut_d(bpage->in_LRU_list = TRUE);
+ incr_LRU_size_in_bytes(bpage, buf_pool);
+
if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
ut_ad(buf_pool->LRU_old);
@@ -1249,7 +1275,10 @@ buf_LRU_add_block_to_end_low(
}
/******************************************************************//**
-Adds a block to the LRU list. */
+Adds a block to the LRU list. Please make sure that the zip_size is
+already set into the page zip when invoking the function, so that we
+can get correct zip_size from the buffer page when adding a block
+into LRU */
UNIV_INLINE
void
buf_LRU_add_block_low(
@@ -1291,6 +1320,8 @@ buf_LRU_add_block_low(
ut_d(bpage->in_LRU_list = TRUE);
+ incr_LRU_size_in_bytes(bpage, buf_pool);
+
if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) {
ut_ad(buf_pool->LRU_old);
@@ -1318,7 +1349,10 @@ buf_LRU_add_block_low(
}
/******************************************************************//**
-Adds a block to the LRU list. */
+Adds a block to the LRU list. Please make sure that the zip_size is
+already set into the page zip when invoking the function, so that we
+can get correct zip_size from the buffer page when adding a block
+into LRU */
UNIV_INTERN
void
buf_LRU_add_block(
@@ -1540,6 +1574,8 @@ func_exit:
UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU,
prev_b, b);
+ incr_LRU_size_in_bytes(b, buf_pool);
+
if (buf_page_is_old(b)) {
buf_pool->LRU_old_len++;
if (UNIV_UNLIKELY
diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc
index ff93be3e76a..95bc022de8b 100644
--- a/storage/innobase/dict/dict0load.cc
+++ b/storage/innobase/dict/dict0load.cc
@@ -2380,7 +2380,8 @@ dict_load_foreigns(
ibool check_charsets) /*!< in: TRUE=check charset
compatibility */
{
- char tuple_buf[DTUPLE_EST_ALLOC(1)];
+ ulint tuple_buf[(DTUPLE_EST_ALLOC(1) + sizeof(ulint) - 1)
+ / sizeof(ulint)];
btr_pcur_t pcur;
dtuple_t* tuple;
dfield_t* dfield;
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 2e6835fe0c0..9aafeb4f69c 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -1493,7 +1493,7 @@ fil_space_get_size(
ut_ad(fil_system);
- fil_mutex_enter_and_prepare_for_io(id);
+ mutex_enter(&fil_system->mutex);
space = fil_space_get_by_id(id);
@@ -1508,6 +1508,23 @@ fil_space_get_size(
ut_a(1 == UT_LIST_GET_LEN(space->chain));
+ mutex_exit(&fil_system->mutex);
+
+ /* It is possible that the space gets evicted at this point
+ before the fil_mutex_enter_and_prepare_for_io() acquires
+ the fil_system->mutex. Check for this after completing the
+ call to fil_mutex_enter_and_prepare_for_io(). */
+ fil_mutex_enter_and_prepare_for_io(id);
+
+ /* We are still holding the fil_system->mutex. Check if
+ the space is still in memory cache. */
+ space = fil_space_get_by_id(id);
+
+ if (space == NULL) {
+ mutex_exit(&fil_system->mutex);
+ return(0);
+ }
+
node = UT_LIST_GET_FIRST(space->chain);
/* It must be a single-table tablespace and we have not opened
@@ -1545,7 +1562,7 @@ fil_space_get_flags(
return(0);
}
- fil_mutex_enter_and_prepare_for_io(id);
+ mutex_enter(&fil_system->mutex);
space = fil_space_get_by_id(id);
@@ -1560,6 +1577,23 @@ fil_space_get_flags(
ut_a(1 == UT_LIST_GET_LEN(space->chain));
+ mutex_exit(&fil_system->mutex);
+
+ /* It is possible that the space gets evicted at this point
+ before the fil_mutex_enter_and_prepare_for_io() acquires
+ the fil_system->mutex. Check for this after completing the
+ call to fil_mutex_enter_and_prepare_for_io(). */
+ fil_mutex_enter_and_prepare_for_io(id);
+
+ /* We are still holding the fil_system->mutex. Check if
+ the space is still in memory cache. */
+ space = fil_space_get_by_id(id);
+
+ if (space == NULL) {
+ mutex_exit(&fil_system->mutex);
+ return(0);
+ }
+
node = UT_LIST_GET_FIRST(space->chain);
/* It must be a single-table tablespace and we have not opened
@@ -2745,7 +2779,7 @@ retry:
mutex_exit(&fil_system->mutex);
#ifndef UNIV_HOTBACKUP
- if (success) {
+ if (success && !recv_recovery_on) {
mtr_t mtr;
mtr_start(&mtr);
@@ -4047,6 +4081,21 @@ retry:
start_page_no = space->size;
file_start_page_no = space->size - node->size;
+#ifdef HAVE_POSIX_FALLOCATE
+ if (srv_use_posix_fallocate) {
+ mutex_exit(&fil_system->mutex);
+ success = os_file_set_size(node->name, node->handle,
+ size_after_extend * page_size);
+ mutex_enter(&fil_system->mutex);
+ if (success) {
+ node->size += (size_after_extend - start_page_no);
+ space->size += (size_after_extend - start_page_no);
+ os_has_said_disk_full = FALSE;
+ }
+ goto complete_io;
+ }
+#endif
+
/* Extend at most 64 pages at a time */
buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size));
@@ -4102,6 +4151,10 @@ retry:
node->size += pages_added;
node->being_extended = FALSE;
+#ifdef HAVE_POSIX_FALLOCATE
+complete_io:
+#endif
+
fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
*actual_size = space->size;
@@ -5025,3 +5078,28 @@ fil_close(void)
fil_system = NULL;
}
+
+/****************************************************************//**
+Generate redo logs for swapping two .ibd files */
+UNIV_INTERN
+void
+fil_mtr_rename_log(
+/*===============*/
+ ulint old_space_id, /*!< in: tablespace id of the old
+ table. */
+ const char* old_name, /*!< in: old table name */
+ ulint new_space_id, /*!< in: tablespace id of the new
+ table */
+ const char* new_name, /*!< in: new table name */
+ const char* tmp_name) /*!< in: temp table name used while
+ swapping */
+{
+ mtr_t mtr;
+ mtr_start(&mtr);
+ fil_op_write_log(MLOG_FILE_RENAME, old_space_id,
+ 0, 0, old_name, tmp_name, &mtr);
+ fil_op_write_log(MLOG_FILE_RENAME, new_space_id,
+ 0, 0, new_name, old_name, &mtr);
+ mtr_commit(&mtr);
+}
+
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index e19fe47e81a..e3c2204affb 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -167,6 +167,8 @@ static my_bool innobase_file_format_check = TRUE;
static my_bool innobase_log_archive = FALSE;
static char* innobase_log_arch_dir = NULL;
#endif /* UNIV_LOG_ARCHIVE */
+static my_bool innobase_use_atomic_writes = FALSE;
+static my_bool innobase_use_fallocate = TRUE;
static my_bool innobase_use_doublewrite = TRUE;
static my_bool innobase_use_checksums = TRUE;
static my_bool innobase_locks_unsafe_for_binlog = FALSE;
@@ -471,8 +473,12 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_buffer_pool_load_status, SHOW_CHAR},
{"buffer_pool_pages_data",
(char*) &export_vars.innodb_buffer_pool_pages_data, SHOW_LONG},
+ {"buffer_pool_bytes_data",
+ (char*) &export_vars.innodb_buffer_pool_bytes_data, SHOW_LONG},
{"buffer_pool_pages_dirty",
(char*) &export_vars.innodb_buffer_pool_pages_dirty, SHOW_LONG},
+ {"buffer_pool_bytes_dirty",
+ (char*) &export_vars.innodb_buffer_pool_bytes_dirty, SHOW_LONG},
{"buffer_pool_pages_flushed",
(char*) &export_vars.innodb_buffer_pool_pages_flushed, SHOW_LONG},
{"buffer_pool_pages_free",
@@ -567,6 +573,12 @@ static SHOW_VAR innodb_status_variables[]= {
(char*) &export_vars.innodb_truncated_status_writes, SHOW_LONG},
{"available_undo_logs",
(char*) &export_vars.innodb_available_undo_logs, SHOW_LONG},
+#ifdef UNIV_DEBUG
+ {"purge_trx_id_age",
+ (char*) &export_vars.innodb_purge_trx_id_age, SHOW_LONG},
+ {"purge_view_trx_id_age",
+ (char*) &export_vars.innodb_purge_view_trx_id_age, SHOW_LONG},
+#endif /* UNIV_DEBUG */
{NullS, NullS, SHOW_LONG}
};
@@ -1407,6 +1419,8 @@ convert_error_code_to_mysql(
return(HA_ERR_INDEX_CORRUPT);
case DB_UNDO_RECORD_TOO_BIG:
return(HA_ERR_UNDO_REC_TOO_BIG);
+ case DB_OUT_OF_MEMORY:
+ return(HA_ERR_OUT_OF_MEM);
}
}
@@ -1621,15 +1635,6 @@ innobase_get_lower_case_table_names(void)
return(lower_case_table_names);
}
-#if defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN)
-extern MYSQL_PLUGIN_IMPORT MY_TMPDIR mysql_tmpdir_list;
-/*******************************************************************//**
-Map an OS error to an errno value. The OS error number is stored in
-_doserrno and the mapped value is stored in errno) */
-void __cdecl
-_dosmaperr(
- unsigned long); /*!< in: OS error value */
-
/*********************************************************************//**
Creates a temporary file.
@return temporary file descriptor, or < 0 on error */
@@ -1638,92 +1643,16 @@ int
innobase_mysql_tmpfile(void)
/*========================*/
{
- int fd; /* handle of opened file */
- HANDLE osfh; /* OS handle of opened file */
- char* tmpdir; /* point to the directory
- where to create file */
- TCHAR path_buf[MAX_PATH - 14]; /* buffer for tmp file path.
- The length cannot be longer
- than MAX_PATH - 14, or
- GetTempFileName will fail. */
- char filename[MAX_PATH]; /* name of the tmpfile */
- DWORD fileaccess = GENERIC_READ /* OS file access */
- | GENERIC_WRITE
- | DELETE;
- DWORD fileshare = FILE_SHARE_READ /* OS file sharing mode */
- | FILE_SHARE_WRITE
- | FILE_SHARE_DELETE;
- DWORD filecreate = CREATE_ALWAYS; /* OS method of open/create */
- DWORD fileattrib = /* OS file attribute flags */
- FILE_ATTRIBUTE_NORMAL
- | FILE_FLAG_DELETE_ON_CLOSE
- | FILE_ATTRIBUTE_TEMPORARY
- | FILE_FLAG_SEQUENTIAL_SCAN;
-
- DBUG_ENTER("innobase_mysql_tmpfile");
-
- tmpdir = my_tmpdir(&mysql_tmpdir_list);
-
- /* The tmpdir parameter can not be NULL for GetTempFileName. */
- if (!tmpdir) {
- uint ret;
-
- /* Use GetTempPath to determine path for temporary files. */
- ret = GetTempPath(sizeof(path_buf), path_buf);
- if (ret > sizeof(path_buf) || (ret == 0)) {
-
- _dosmaperr(GetLastError()); /* map error */
- DBUG_RETURN(-1);
- }
-
- tmpdir = path_buf;
- }
-
- /* Use GetTempFileName to generate a unique filename. */
- if (!GetTempFileName(tmpdir, "ib", 0, filename)) {
-
- _dosmaperr(GetLastError()); /* map error */
- DBUG_RETURN(-1);
- }
-
- DBUG_PRINT("info", ("filename: %s", filename));
-
- /* Open/Create the file. */
- osfh = CreateFile(filename, fileaccess, fileshare, NULL,
- filecreate, fileattrib, NULL);
- if (osfh == INVALID_HANDLE_VALUE) {
-
- /* open/create file failed! */
- _dosmaperr(GetLastError()); /* map error */
- DBUG_RETURN(-1);
- }
-
- do {
- /* Associates a CRT file descriptor with the OS file handle. */
- fd = _open_osfhandle((intptr_t) osfh, 0);
- } while (fd == -1 && errno == EINTR);
+ int fd2 = -1;
+ File fd;
- if (fd == -1) {
- /* Open failed, close the file handle. */
+ DBUG_EXECUTE_IF(
+ "innobase_tmpfile_creation_failure",
+ return(-1);
+ );
- _dosmaperr(GetLastError()); /* map error */
- CloseHandle(osfh); /* no need to check if
- CloseHandle fails */
- }
+ fd = mysql_tmpfile("ib");
- DBUG_RETURN(fd);
-}
-#else
-/*********************************************************************//**
-Creates a temporary file.
-@return temporary file descriptor, or < 0 on error */
-UNIV_INTERN
-int
-innobase_mysql_tmpfile(void)
-/*========================*/
-{
- int fd2 = -1;
- File fd = mysql_tmpfile("ib");
if (fd >= 0) {
/* Copy the file descriptor, so that the additional resources
allocated by create_temp_file() can be freed by invoking
@@ -1767,7 +1696,6 @@ innobase_mysql_tmpfile(void)
}
return(fd2);
}
-#endif /* defined (__WIN__) && defined (MYSQL_DYNAMIC_PLUGIN) */
/*********************************************************************//**
Wrapper around MySQL's copy_and_convert function.
@@ -2635,6 +2563,13 @@ ha_innobase::init_table_handle_for_HANDLER(void)
reset_template();
}
+/****************************************************************//**
+Gives the file extension of an InnoDB single-table tablespace. */
+static const char* ha_innobase_exts[] = {
+ ".ibd",
+ NullS
+};
+
/*********************************************************************//**
Opens an InnoDB database.
@return 0 on success, error code on failure */
@@ -2691,6 +2626,9 @@ innobase_init(
innobase_hton->alter_table_flags = innobase_alter_table_flags;
innobase_hton->kill_query = innobase_kill_query;
+ if (srv_file_per_table)
+ innobase_hton->tablefile_extensions = ha_innobase_exts;
+
ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
#ifndef DBUG_OFF
@@ -3026,6 +2964,38 @@ innobase_change_buffering_inited_ok:
innobase_commit_concurrency_init_default();
+#ifdef HAVE_POSIX_FALLOCATE
+ srv_use_posix_fallocate = 0 && (ibool) innobase_use_fallocate;
+#endif
+ srv_use_atomic_writes = (ibool) innobase_use_atomic_writes;
+ if (innobase_use_atomic_writes) {
+ fprintf(stderr, "InnoDB: using atomic writes.\n");
+
+ /* Force doublewrite buffer off, atomic writes replace it. */
+ if (srv_use_doublewrite_buf) {
+ fprintf(stderr, "InnoDB: Switching off doublewrite buffer "
+ "because of atomic writes.\n");
+ innobase_use_doublewrite = srv_use_doublewrite_buf = FALSE;
+ }
+
+ /* Force O_DIRECT on Unixes (on Windows writes are always unbuffered)*/
+#ifndef _WIN32
+ if(!innobase_file_flush_method ||
+ !strstr(innobase_file_flush_method, "O_DIRECT")) {
+ innobase_file_flush_method =
+ srv_file_flush_method_str = (char*)"O_DIRECT";
+ fprintf(stderr, "InnoDB: using O_DIRECT due to atomic writes.\n");
+ }
+#endif
+#ifdef HAVE_POSIX_FALLOCATE
+ /* Due to a bug in directFS, using atomics needs
+ * posix_fallocate to extend the file
+ * pwrite() past end of the file won't work
+ */
+ srv_use_posix_fallocate = TRUE;
+#endif
+ }
+
#ifdef HAVE_PSI_INTERFACE
/* Register keys with MySQL performance schema */
int count;
@@ -3924,13 +3894,6 @@ ha_innobase::table_flags() const
}
/****************************************************************//**
-Gives the file extension of an InnoDB single-table tablespace. */
-static const char* ha_innobase_exts[] = {
- ".ibd",
- NullS
-};
-
-/****************************************************************//**
Returns the table type (storage engine name).
@return table type */
UNIV_INTERN
@@ -14619,6 +14582,20 @@ static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
"Disable with --skip-innodb-doublewrite.",
NULL, NULL, TRUE);
+static MYSQL_SYSVAR_BOOL(use_atomic_writes, innobase_use_atomic_writes,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Prevent partial page writes, via atomic writes."
+ "The option is used to prevent partial writes in case of a crash/poweroff, "
+ "as faster alternative to doublewrite buffer."
+ "Currently this option works only "
+ "on Linux only with FusionIO device, and directFS filesystem.",
+ NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_BOOL(use_fallocate, innobase_use_fallocate,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Preallocate files fast, using operating system functionality. On POSIX systems, posix_fallocate system call is used.",
+ NULL, NULL, TRUE);
+
static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
PLUGIN_VAR_RQCMDARG,
"Number of IOPs the server can do. Tunes the background IO rate",
@@ -15179,11 +15156,23 @@ static MYSQL_SYSVAR_BOOL(print_all_deadlocks, srv_print_all_deadlocks,
"Print all deadlocks to MySQL error log (off by default)",
NULL, NULL, FALSE);
-#ifdef UNIV_DEBUG_never
+#ifdef UNIV_DEBUG
static MYSQL_SYSVAR_UINT(trx_rseg_n_slots_debug, trx_rseg_n_slots_debug,
- PLUGIN_VAR_RQCMDARG,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_NOCMDOPT,
"Debug flags for InnoDB to limit TRX_RSEG_N_SLOTS for trx_rsegf_undo_find_free()",
NULL, NULL, 0, 0, 1024, 0);
+
+static MYSQL_SYSVAR_UINT(limit_optimistic_insert_debug,
+ btr_cur_limit_optimistic_insert_debug, PLUGIN_VAR_RQCMDARG,
+ "Artificially limit the number of records per B-tree page (0=unlimited).",
+ NULL, NULL, 0, 0, UINT_MAX32, 0);
+
+static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
+ srv_purge_view_update_only_debug, PLUGIN_VAR_NOCMDOPT,
+ "Pause actual purging any delete-marked records, but merely update the purge view. "
+ "It is to create artificially the situation the purge view have been updated "
+ "but the each purges were not done yet.",
+ NULL, NULL, FALSE);
#endif /* UNIV_DEBUG */
static struct st_mysql_sys_var* innobase_system_variables[]= {
@@ -15206,6 +15195,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(data_file_path),
MYSQL_SYSVAR(data_home_dir),
MYSQL_SYSVAR(doublewrite),
+ MYSQL_SYSVAR(use_atomic_writes),
+ MYSQL_SYSVAR(use_fallocate),
MYSQL_SYSVAR(fast_shutdown),
MYSQL_SYSVAR(file_io_threads),
MYSQL_SYSVAR(read_io_threads),
@@ -15299,8 +15290,10 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(undo_directory),
MYSQL_SYSVAR(undo_tablespaces),
MYSQL_SYSVAR(sync_array_size),
-#ifdef UNIV_DEBUG_never /* disable this flag. --innodb-trx becomes ambiguous */
+#ifdef UNIV_DEBUG
MYSQL_SYSVAR(trx_rseg_n_slots_debug),
+ MYSQL_SYSVAR(limit_optimistic_insert_debug),
+ MYSQL_SYSVAR(trx_purge_view_update_only_debug),
#endif /* UNIV_DEBUG */
NULL
};
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 1468bc79c04..e1a10ade9ad 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -102,8 +102,6 @@ innobase_col_to_mysql(
ut_ad(flen >= len);
ut_ad(DATA_MBMAXLEN(col->mbminmaxlen)
>= DATA_MBMINLEN(col->mbminmaxlen));
- ut_ad(DATA_MBMAXLEN(col->mbminmaxlen)
- > DATA_MBMINLEN(col->mbminmaxlen) || flen == len);
memcpy(dest, data, len);
break;
@@ -315,7 +313,7 @@ innobase_check_index_keys(
}
}
- my_error(ER_WRONG_KEY_COLUMN, MYF(0),
+ my_error(ER_WRONG_KEY_COLUMN, MYF(0), "InnoDB",
field->field_name);
return(ER_WRONG_KEY_COLUMN);
}
@@ -329,7 +327,7 @@ innobase_check_index_keys(
continue;
}
- my_error(ER_WRONG_KEY_COLUMN, MYF(0),
+ my_error(ER_WRONG_KEY_COLUMN, MYF(0), "InnoDB",
key_part1.field->field_name);
return(ER_WRONG_KEY_COLUMN);
}
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index f437575579e..80c62185fb0 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -822,6 +822,11 @@ srv_printf_innodb_monitor(). */
extern ulint btr_cur_n_sea_old;
#endif /* !UNIV_HOTBACKUP */
+#ifdef UNIV_DEBUG
+/* Flag to limit optimistic insert records */
+extern uint btr_cur_limit_optimistic_insert_debug;
+#endif /* UNIV_DEBUG */
+
#ifndef UNIV_NONINL
#include "btr0cur.ic"
#endif
diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic
index 540417e3062..080866c7465 100644
--- a/storage/innobase/include/btr0cur.ic
+++ b/storage/innobase/include/btr0cur.ic
@@ -27,6 +27,16 @@ Created 10/16/1994 Heikki Tuuri
#include "btr0btr.h"
#ifdef UNIV_DEBUG
+# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)\
+if (btr_cur_limit_optimistic_insert_debug\
+ && (NREC) >= (ulint)btr_cur_limit_optimistic_insert_debug) {\
+ CODE;\
+}
+#else
+# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)
+#endif /* UNIV_DEBUG */
+
+#ifdef UNIV_DEBUG
/*********************************************************//**
Returns the page cursor component of a tree cursor.
@return pointer to page cursor component */
@@ -135,6 +145,9 @@ btr_cur_compress_recommendation(
page = btr_cur_get_page(cursor);
+ LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2,
+ return(FALSE));
+
if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT)
|| ((btr_page_get_next(page, mtr) == FIL_NULL)
&& (btr_page_get_prev(page, mtr) == FIL_NULL))) {
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 2284f21e3ab..d56f1790ae4 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -205,6 +205,15 @@ struct buf_pool_info_struct{
typedef struct buf_pool_info_struct buf_pool_info_t;
+/** The occupied bytes of lists in all buffer pools */
+struct buf_pools_list_size_struct {
+ ulint LRU_bytes; /*!< LRU size in bytes */
+ ulint unzip_LRU_bytes; /*!< unzip_LRU size in bytes */
+ ulint flush_list_bytes; /*!< flush_list size in bytes */
+};
+
+typedef struct buf_pools_list_size_struct buf_pools_list_size_t;
+
#ifndef UNIV_HOTBACKUP
/********************************************************************//**
Acquire mutex on all buffer pool instances */
@@ -1010,8 +1019,7 @@ UNIV_INLINE
void
buf_page_set_accessed(
/*==================*/
- buf_page_t* bpage, /*!< in/out: control block */
- ulint time_ms) /*!< in: ut_time_ms() */
+ buf_page_t* bpage) /*!< in/out: control block */
__attribute__((nonnull));
/*********************************************************************//**
Gets the buf_block_t handle of a buffered file block if an uncompressed
@@ -1368,6 +1376,14 @@ buf_get_total_list_len(
ulint* free_len, /*!< out: length of all free lists */
ulint* flush_list_len);/*!< out: length of all flush lists */
/********************************************************************//**
+Get total list size in bytes from all buffer pools. */
+UNIV_INTERN
+void
+buf_get_total_list_size_in_bytes(
+/*=============================*/
+ buf_pools_list_size_t* buf_pools_list_size); /*!< out: list sizes
+ in all buffer pools */
+/********************************************************************//**
Get total buffer pool statistics. */
UNIV_INTERN
void
@@ -1547,10 +1563,11 @@ struct buf_page_struct{
to read this for heuristic
purposes without holding any
mutex or latch */
- unsigned access_time:32; /*!< time of first access, or
- 0 if the block was never accessed
- in the buffer pool */
/* @} */
+ unsigned access_time; /*!< time of first access, or
+ 0 if the block was never accessed
+ in the buffer pool. Protected by
+ block mutex */
# if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
ibool file_page_was_freed;
/*!< this is set to TRUE when
@@ -1730,6 +1747,8 @@ struct buf_pool_stat_struct{
young because the first access
was not long enough ago, in
buf_page_peek_if_too_old() */
+ ulint LRU_bytes; /*!< LRU size in bytes */
+ ulint flush_list_bytes;/*!< flush_list size in bytes */
};
/** Statistics of buddy blocks of a given size. */
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
index 88c29ab5603..d0a6df4eb40 100644
--- a/storage/innobase/include/buf0buf.ic
+++ b/storage/innobase/include/buf0buf.ic
@@ -614,18 +614,18 @@ UNIV_INLINE
void
buf_page_set_accessed(
/*==================*/
- buf_page_t* bpage, /*!< in/out: control block */
- ulint time_ms) /*!< in: ut_time_ms() */
+ buf_page_t* bpage) /*!< in/out: control block */
{
#ifdef UNIV_DEBUG
buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
- ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(!buf_pool_mutex_own(buf_pool));
+ ut_ad(mutex_own(buf_page_get_mutex(bpage)));
#endif
ut_a(buf_page_in_file(bpage));
if (!bpage->access_time) {
/* Make this the time of the first access. */
- bpage->access_time = time_ms;
+ bpage->access_time = ut_time_ms();
}
}
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
index 527852da758..74c5525c2e5 100644
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@@ -157,7 +157,10 @@ buf_LRU_block_free_non_file_page(
/*=============================*/
buf_block_t* block); /*!< in: block, must not contain a file page */
/******************************************************************//**
-Adds a block to the LRU list. */
+Adds a block to the LRU list. Please make sure that the zip_size is
+already set into the page zip when invoking the function, so that we
+can get correct zip_size from the buffer page when adding a block
+into LRU */
UNIV_INTERN
void
buf_LRU_add_block(
diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic
index a5e94a8edff..d489bef89a8 100644
--- a/storage/innobase/include/data0type.ic
+++ b/storage/innobase/include/data0type.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -556,35 +556,18 @@ dtype_get_fixed_size_low(
} else if (!comp) {
return(len);
} else {
- /* We play it safe here and ask MySQL for
- mbminlen and mbmaxlen. Although
- mbminlen and mbmaxlen are
- initialized if and only if prtype
- is (in one of the 3 functions in this file),
- it could be that none of these functions
- has been called. */
-
+#ifdef UNIV_DEBUG
ulint i_mbminlen, i_mbmaxlen;
innobase_get_cset_width(
dtype_get_charset_coll(prtype),
&i_mbminlen, &i_mbmaxlen);
- if (DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen)
- != mbminmaxlen) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: "
- "mbminlen=%lu, "
- "mbmaxlen=%lu, "
- "type->mbminlen=%lu, "
- "type->mbmaxlen=%lu\n",
- (ulong) i_mbminlen,
- (ulong) i_mbmaxlen,
- (ulong) DATA_MBMINLEN(mbminmaxlen),
- (ulong) DATA_MBMAXLEN(mbminmaxlen));
- }
- if (i_mbminlen == i_mbmaxlen) {
+ ut_ad(DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen)
+ == mbminmaxlen);
+#endif /* UNIV_DEBUG */
+ if (DATA_MBMINLEN(mbminmaxlen)
+ == DATA_MBMAXLEN(mbminmaxlen)) {
return(len);
}
}
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 1e2b8049860..4bd9f9fa51f 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -739,6 +739,21 @@ fil_tablespace_is_being_deleted(
/*============================*/
ulint id); /*!< in: space id */
+/****************************************************************//**
+Generate redo logs for swapping two .ibd files */
+UNIV_INTERN
+void
+fil_mtr_rename_log(
+/*===============*/
+ ulint old_space_id, /*!< in: tablespace id of the old
+ table. */
+ const char* old_name, /*!< in: old table name */
+ ulint new_space_id, /*!< in: tablespace id of the new
+ table */
+ const char* new_name, /*!< in: new table name */
+ const char* tmp_name); /*!< in: temp table name used while
+ swapping */
+
typedef struct fil_space_struct fil_space_t;
#endif /* !UNIV_INNOCHECKSUM */
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index a1ffe87d5bd..f97a11b9483 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -864,14 +864,22 @@ lock_trx_has_sys_table_locks(
remains set when the waiting lock is granted,
or if the lock is inherited to a neighboring
record */
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_MODE_MASK
+#define LOCK_CONV_BY_OTHER 4096 /*!< this bit is set when the lock is created
+ by other transaction */
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_CONV_BY_OTHER)&LOCK_MODE_MASK
# error
#endif
-#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION)&LOCK_TYPE_MASK
+#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_CONV_BY_OTHER)&LOCK_TYPE_MASK
# error
#endif
/* @} */
+/** Checks if this is a waiting lock created by lock->trx itself.
+@param type_mode lock->type_mode
+@return whether it is a waiting lock belonging to lock->trx */
+#define lock_is_wait_not_by_other(type_mode) \
+ ((type_mode & (LOCK_CONV_BY_OTHER | LOCK_WAIT)) == LOCK_WAIT)
+
/** Lock operation struct */
typedef struct lock_op_struct lock_op_t;
/** Lock operation struct */
diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h
index c6c70bb5f09..671f8052afa 100644
--- a/storage/innobase/include/rem0rec.h
+++ b/storage/innobase/include/rem0rec.h
@@ -362,24 +362,6 @@ rec_get_offsets_func(
rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__)
/******************************************************//**
-Determine the offset to each field in a leaf-page record
-in ROW_FORMAT=COMPACT. This is a special case of
-rec_init_offsets() and rec_get_offsets_func(). */
-UNIV_INTERN
-void
-rec_init_offsets_comp_ordinary(
-/*===========================*/
- const rec_t* rec, /*!< in: physical record in
- ROW_FORMAT=COMPACT */
- ulint extra, /*!< in: number of bytes to reserve
- between the record header and
- the data payload
- (usually REC_N_NEW_EXTRA_BYTES) */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets);/*!< in/out: array of offsets;
- in: n=rec_offs_n_fields(offsets) */
-
-/******************************************************//**
The following function determines the offsets to each field
in the record. It can reuse a previously allocated array. */
UNIV_INTERN
@@ -648,8 +630,48 @@ rec_copy(
/*=====*/
void* buf, /*!< in: buffer */
const rec_t* rec, /*!< in: physical record */
- const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+ const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
+ __attribute__((nonnull));
#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Determines the size of a data tuple prefix in a temporary file.
+@return total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_temp(
+/*========================*/
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dfield_t* fields, /*!< in: array of data fields */
+ ulint n_fields,/*!< in: number of data fields */
+ ulint* extra) /*!< out: extra size */
+ __attribute__((warn_unused_result, nonnull));
+
+/******************************************************//**
+Determine the offset to each field in temporary file.
+@see rec_convert_dtuple_to_temp() */
+UNIV_INTERN
+void
+rec_init_offsets_temp(
+/*==================*/
+ const rec_t* rec, /*!< in: temporary file record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint* offsets)/*!< in/out: array of offsets;
+ in: n=rec_offs_n_fields(offsets) */
+ __attribute__((nonnull));
+
+/*********************************************************//**
+Builds a temporary file record out of a data tuple.
+@see rec_init_offsets_temp() */
+UNIV_INTERN
+void
+rec_convert_dtuple_to_temp(
+/*=======================*/
+ rec_t* rec, /*!< out: record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dfield_t* fields, /*!< in: array of data fields */
+ ulint n_fields) /*!< in: number of fields */
+ __attribute__((nonnull));
+
/**************************************************************//**
Copies the first n fields of a physical record to a new physical record in
a buffer.
@@ -684,21 +706,6 @@ rec_fold(
__attribute__((pure));
#endif /* !UNIV_HOTBACKUP */
/*********************************************************//**
-Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
-UNIV_INTERN
-void
-rec_convert_dtuple_to_rec_comp(
-/*===========================*/
- rec_t* rec, /*!< in: origin of record */
- ulint extra, /*!< in: number of bytes to
- reserve between the record
- header and the data payload
- (normally REC_N_NEW_EXTRA_BYTES) */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint status, /*!< in: status bits of the record */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields);/*!< in: number of data fields */
-/*********************************************************//**
Builds a physical record out of a data tuple and
stores it into the given buffer.
@return pointer to the origin of physical record */
@@ -731,10 +738,7 @@ UNIV_INTERN
ulint
rec_get_converted_size_comp_prefix(
/*===============================*/
- const dict_index_t* index, /*!< in: record descriptor;
- dict_table_is_comp() is
- assumed to hold, even if
- it does not */
+ const dict_index_t* index, /*!< in: record descriptor */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields,/*!< in: number of data fields */
ulint* extra); /*!< out: extra size */
diff --git a/storage/innobase/include/row0merge.h b/storage/innobase/include/row0merge.h
index c4e2f5ddf41..95c6d85075c 100644
--- a/storage/innobase/include/row0merge.h
+++ b/storage/innobase/include/row0merge.h
@@ -294,7 +294,7 @@ row_merge_buf_empty(
/*********************************************************************//**
Create a merge file. */
UNIV_INTERN
-void
+int
row_merge_file_create(
/*==================*/
merge_file_t* merge_file); /*!< out: merge file structure */
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 99cff251e3c..65257baa4bb 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -252,6 +252,11 @@ extern ibool srv_use_doublewrite_buf;
extern ulong srv_doublewrite_batch_size;
extern ulong srv_checksum_algorithm;
+extern ibool srv_use_atomic_writes;
+#ifdef HAVE_POSIX_FALLOCATE
+extern ibool srv_use_posix_fallocate;
+#endif
+
extern ulong srv_max_buf_pool_modified_pct;
extern ulong srv_max_purge_lag;
extern ulong srv_max_purge_lag_delay;
@@ -312,6 +317,10 @@ extern ibool srv_print_latch_waits;
extern ulint srv_fatal_semaphore_wait_threshold;
extern ulint srv_dml_needed_delay;
+#ifdef UNIV_DEBUG
+extern my_bool srv_purge_view_update_only_debug;
+#endif /* UNIV_DEBUG */
+
#ifndef HAVE_ATOMIC_BUILTINS
/** Mutex protecting some server global variables. */
extern mutex_t server_mutex;
@@ -379,7 +388,7 @@ extern ulint srv_buf_pool_flushed;
reading of a disk page */
extern ulint srv_buf_pool_reads;
-/* print all user-level transactions deadlocks to mysqld stderr */
+/** print all user-level transactions deadlocks to mysqld stderr */
extern my_bool srv_print_all_deadlocks;
/** Status variables to be passed to MySQL */
@@ -727,7 +736,9 @@ struct export_var_struct{
char innodb_buffer_pool_load_status[512];/*!< Buf pool load status */
ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */
ulint innodb_buffer_pool_pages_data; /*!< Data pages */
+ ulint innodb_buffer_pool_bytes_data; /*!< File bytes used */
ulint innodb_buffer_pool_pages_dirty; /*!< Dirty data pages */
+ ulint innodb_buffer_pool_bytes_dirty; /*!< File bytes modified */
ulint innodb_buffer_pool_pages_misc; /*!< Miscellanous pages */
ulint innodb_buffer_pool_pages_free; /*!< Free pages */
#ifdef UNIV_DEBUG
@@ -771,6 +782,11 @@ struct export_var_struct{
ulint innodb_num_open_files; /*!< fil_n_file_opened */
ulint innodb_truncated_status_writes; /*!< srv_truncated_status_writes */
ulint innodb_available_undo_logs; /*!< srv_available_undo_logs */
+#ifdef UNIV_DEBUG
+ ulint innodb_purge_trx_id_age; /*!< max_trx_id - purged trx_id */
+ ulint innodb_purge_view_trx_id_age; /*!< rw_max_trx_id
+ - purged view's min trx_id */
+#endif /* UNIV_DEBUG */
};
/** Thread slot in the thread table. */
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 60eb1fede91..422828e76f4 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -42,6 +42,8 @@ Created 1/20/1994 Heikki Tuuri
#define _IB_TO_STR(s) #s
#define IB_TO_STR(s) _IB_TO_STR(s)
+#include <mysql_version.h>
+
#define INNODB_VERSION_MAJOR 1
#define INNODB_VERSION_MINOR 2
#define INNODB_VERSION_BUGFIX MYSQL_VERSION_PATCH
@@ -55,10 +57,7 @@ component, i.e. we show M.N.P as M.N */
#define INNODB_VERSION_SHORT \
(INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR)
-#define INNODB_VERSION_STR \
- IB_TO_STR(INNODB_VERSION_MAJOR) "." \
- IB_TO_STR(INNODB_VERSION_MINOR) "." \
- IB_TO_STR(INNODB_VERSION_BUGFIX)
+#define INNODB_VERSION_STR MYSQL_SERVER_VERSION
#define REFMAN "http://dev.mysql.com/doc/refman/" \
IB_TO_STR(MYSQL_VERSION_MAJOR) "." \
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index 476b305ca70..fff59852704 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -858,13 +858,17 @@ lock_reset_lock_and_trx_wait(
/*=========================*/
lock_t* lock) /*!< in/out: record lock */
{
- ut_ad(lock->trx->lock.wait_lock == lock);
ut_ad(lock_get_wait(lock));
ut_ad(lock_mutex_own());
/* Reset the back pointer in trx to this waiting lock request */
- lock->trx->lock.wait_lock = NULL;
+ if (!(lock->type_mode & LOCK_CONV_BY_OTHER)) {
+ ut_ad(lock->trx->lock.wait_lock == lock);
+ lock->trx->lock.wait_lock = NULL;
+ } else {
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
+ }
lock->type_mode &= ~LOCK_WAIT;
}
@@ -1476,7 +1480,7 @@ Checks if a transaction has a GRANTED explicit lock on rec stronger or equal
to precise_mode.
@return lock or NULL */
UNIV_INLINE
-const lock_t*
+lock_t*
lock_rec_has_expl(
/*==============*/
ulint precise_mode,/*!< in: LOCK_S or LOCK_X
@@ -1489,7 +1493,7 @@ lock_rec_has_expl(
ulint heap_no,/*!< in: heap number of the record */
const trx_t* trx) /*!< in: transaction */
{
- const lock_t* lock;
+ lock_t* lock;
ut_ad(lock_mutex_own());
ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S
@@ -1498,14 +1502,14 @@ lock_rec_has_expl(
for (lock = lock_rec_get_first(block, heap_no);
lock != NULL;
- lock = lock_rec_get_next_const(heap_no, lock)) {
+ lock = lock_rec_get_next(heap_no, lock)) {
if (lock->trx == trx
+ && !lock_is_wait_not_by_other(lock->type_mode)
&& lock_mode_stronger_or_eq(
lock_get_mode(lock),
static_cast<enum lock_mode>(
precise_mode & LOCK_MODE_MASK))
- && !lock_get_wait(lock)
&& (!lock_rec_get_rec_not_gap(lock)
|| (precise_mode & LOCK_REC_NOT_GAP)
|| heap_no == PAGE_HEAP_NO_SUPREMUM)
@@ -1813,7 +1817,7 @@ lock_rec_create(
}
ut_ad(trx_mutex_own(trx));
- if (UNIV_UNLIKELY(type_mode & LOCK_WAIT)) {
+ if (lock_is_wait_not_by_other(type_mode)) {
lock_set_lock_and_trx_wait(lock, trx);
}
@@ -1853,11 +1857,12 @@ lock_rec_enqueue_waiting(
const buf_block_t* block, /*!< in: buffer block containing
the record */
ulint heap_no,/*!< in: heap number of the record */
+ lock_t* lock, /*!< in: lock object; NULL if a new
+ one should be created. */
dict_index_t* index, /*!< in: index of record */
que_thr_t* thr) /*!< in: query thread */
{
trx_t* trx;
- lock_t* lock;
trx_id_t victim_trx_id;
ut_ad(lock_mutex_own());
@@ -1893,10 +1898,20 @@ lock_rec_enqueue_waiting(
ut_ad(0);
}
- /* Enqueue the lock request that will wait to be granted, note that
- we already own the trx mutex. */
- lock = lock_rec_create(
- type_mode | LOCK_WAIT, block, heap_no, index, trx, TRUE);
+ if (lock == NULL) {
+ /* Enqueue the lock request that will wait
+ to be granted, note that we already own
+ the trx mutex. */
+ lock = lock_rec_create(
+ type_mode | LOCK_WAIT, block, heap_no,
+ index, trx, TRUE);
+ } else {
+ ut_ad(lock->type_mode & LOCK_WAIT);
+ ut_ad(lock->type_mode & LOCK_CONV_BY_OTHER);
+
+ lock->type_mode &= ~LOCK_CONV_BY_OTHER;
+ lock_set_lock_and_trx_wait(lock, trx);
+ }
/* Release the mutex to obey the latching order.
This is safe, because lock_deadlock_check_and_resolve()
@@ -2163,6 +2178,7 @@ lock_rec_lock_slow(
que_thr_t* thr) /*!< in: query thread */
{
trx_t* trx;
+ lock_t* lock;
enum db_err err = DB_SUCCESS;
ut_ad(lock_mutex_own());
@@ -2180,7 +2196,27 @@ lock_rec_lock_slow(
trx_mutex_enter(trx);
- if (lock_rec_has_expl(mode, block, heap_no, trx)) {
+ lock = lock_rec_has_expl(mode, block, heap_no, trx);
+ if (lock) {
+ if (lock->type_mode & LOCK_CONV_BY_OTHER) {
+ /* This lock or lock waiting was created by the other
+ transaction, not by the transaction (trx) itself.
+ So, the transaction (trx) should treat it collectly
+ according as whether granted or not. */
+
+ if (lock->type_mode & LOCK_WAIT) {
+ /* This lock request was not granted yet.
+ Should wait for granted. */
+
+ goto enqueue_waiting;
+ } else {
+ /* This lock request was already granted.
+ Just clearing the flag. */
+
+ lock->type_mode &= ~LOCK_CONV_BY_OTHER;
+ }
+ }
+
/* The trx already has a strong enough lock on rec: do
nothing */
@@ -2193,8 +2229,10 @@ lock_rec_lock_slow(
have a lock strong enough already granted on the
record, we have to wait. */
+ ut_ad(lock == NULL);
+enqueue_waiting:
err = lock_rec_enqueue_waiting(
- mode, block, heap_no, index, thr);
+ mode, block, heap_no, lock, index, thr);
} else if (!impl) {
/* Set the requested lock on the record, note that
@@ -2348,7 +2386,8 @@ lock_grant(
TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait
for it */
- if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+ if (!(lock->type_mode & LOCK_CONV_BY_OTHER)
+ && lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
que_thr_t* thr;
thr = que_thr_end_lock_wait(lock->trx);
@@ -2375,6 +2414,7 @@ lock_rec_cancel(
ut_ad(lock_mutex_own());
ut_ad(lock_get_type_low(lock) == LOCK_REC);
+ ut_ad(!(lock->type_mode & LOCK_CONV_BY_OTHER));
/* Reset the bit (there can be only one set bit) in the lock bitmap */
lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock));
@@ -2541,8 +2581,12 @@ lock_rec_reset_and_release_wait(
lock != NULL;
lock = lock_rec_get_next(heap_no, lock)) {
- if (lock_get_wait(lock)) {
+ if (lock_is_wait_not_by_other(lock->type_mode)) {
lock_rec_cancel(lock);
+ } else if (lock_get_wait(lock)) {
+ /* just reset LOCK_WAIT */
+ lock_rec_reset_nth_bit(lock, heap_no);
+ lock_reset_lock_and_trx_wait(lock);
} else {
lock_rec_reset_nth_bit(lock, heap_no);
}
@@ -4017,6 +4061,7 @@ lock_table_create(
ut_ad(table && trx);
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(trx));
+ ut_ad(!(type_mode & LOCK_CONV_BY_OTHER));
/* Non-locking autocommit read-only transactions should not set
any locks. */
@@ -5850,7 +5895,7 @@ lock_rec_insert_check_and_lock(
err = lock_rec_enqueue_waiting(
LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION,
- block, next_rec_heap_no, index, thr);
+ block, next_rec_heap_no, NULL, index, thr);
trx_mutex_exit(trx);
} else {
@@ -5943,10 +5988,25 @@ lock_rec_convert_impl_to_expl(
if (impl_trx != NULL
&& !lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, block,
heap_no, impl_trx)) {
+ ulint type_mode = (LOCK_REC | LOCK_X
+ | LOCK_REC_NOT_GAP);
+
+ /* If the delete-marked record was locked already,
+ we should reserve lock waiting for impl_trx as
+ implicit lock. Because cannot lock at this moment.*/
+
+ if (rec_get_deleted_flag(rec, rec_offs_comp(offsets))
+ && lock_rec_other_has_conflicting(
+ static_cast<enum lock_mode>
+ (LOCK_X | LOCK_REC_NOT_GAP), block,
+ heap_no, impl_trx)) {
+
+ type_mode |= (LOCK_WAIT | LOCK_CONV_BY_OTHER);
+ }
lock_rec_add_to_queue(
- LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP,
- block, heap_no, index, impl_trx, FALSE);
+ type_mode, block, heap_no, index,
+ impl_trx, FALSE);
}
lock_mutex_exit();
@@ -6581,6 +6641,7 @@ lock_cancel_waiting_and_release(
ut_ad(lock_mutex_own());
ut_ad(trx_mutex_own(lock->trx));
+ ut_ad(!(lock->type_mode & LOCK_CONV_BY_OTHER));
lock->trx->lock.cancel = TRUE;
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index f914fc2676c..c2b9c152a44 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -958,8 +958,11 @@ recv_parse_or_apply_log_rec_body(
not NULL, then the log record is
applied to the page, and the log
record should be complete then */
- mtr_t* mtr) /*!< in: mtr or NULL; should be non-NULL
+ mtr_t* mtr, /*!< in: mtr or NULL; should be non-NULL
if and only if block is non-NULL */
+ ulint space_id)
+ /*!< in: tablespace id obtained by
+ parsing initial log record */
{
dict_index_t* index = NULL;
page_t* page;
@@ -1231,8 +1234,11 @@ recv_parse_or_apply_log_rec_body(
ut_ad(!page || page_type != FIL_PAGE_TYPE_ALLOCATED);
ptr = mlog_parse_string(ptr, end_ptr, page, page_zip);
break;
- case MLOG_FILE_CREATE:
case MLOG_FILE_RENAME:
+ ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type,
+ space_id, 0);
+ break;
+ case MLOG_FILE_CREATE:
case MLOG_FILE_DELETE:
case MLOG_FILE_CREATE2:
ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, 0, 0);
@@ -1611,7 +1617,8 @@ recv_recover_page_func(
recv_parse_or_apply_log_rec_body(recv->type, buf,
buf + recv->len,
- block, &mtr);
+ block, &mtr,
+ recv_addr->space);
end_lsn = recv->start_lsn + recv->len;
mach_write_to_8(FIL_PAGE_LSN + page, end_lsn);
@@ -2079,7 +2086,7 @@ recv_parse_log_rec(
#endif /* UNIV_LOG_LSN_DEBUG */
new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
- NULL, NULL);
+ NULL, NULL, *space);
if (UNIV_UNLIKELY(new_ptr == NULL)) {
return(0);
diff --git a/storage/innobase/mysql-test/storage_engine/autoinc_secondary.rdiff b/storage/innobase/mysql-test/storage_engine/autoinc_secondary.rdiff
index c24594c5024..00cda7c4435 100644
--- a/storage/innobase/mysql-test/storage_engine/autoinc_secondary.rdiff
+++ b/storage/innobase/mysql-test/storage_engine/autoinc_secondary.rdiff
@@ -8,7 +8,7 @@
-SELECT LAST_INSERT_ID();
-LAST_INSERT_ID()
-1
--SELECT * FROM t1;
+-SELECT a,b FROM t1;
-a b
-a 1
-a 2
diff --git a/storage/innobase/mysql-test/storage_engine/insert_delayed.rdiff b/storage/innobase/mysql-test/storage_engine/insert_delayed.rdiff
index 62895fa928f..9e6cddf03f0 100644
--- a/storage/innobase/mysql-test/storage_engine/insert_delayed.rdiff
+++ b/storage/innobase/mysql-test/storage_engine/insert_delayed.rdiff
@@ -1,5 +1,5 @@
---- suite/storage_engine/insert_delayed.result 2012-07-12 20:04:07.143544998 +0400
-+++ suite/storage_engine/insert_delayed.reject 2012-07-15 17:49:34.551810189 +0400
+--- suite/storage_engine/insert_delayed.result 2013-01-23 01:23:49.461254916 +0400
++++ suite/storage_engine/insert_delayed.reject 2013-01-23 01:47:05.975698364 +0400
@@ -5,7 +5,16 @@
connect con0,localhost,root,,;
SET lock_wait_timeout = 1;
@@ -14,7 +14,7 @@
+# -------------------------------------------
INSERT DELAYED INTO t1 SET a=4, b='d';
+ERROR HY000: DELAYED option not supported for table 't1'
- INSERT DELAYED INTO t1 SELECT 5, 'e';
+ INSERT DELAYED INTO t1 (a,b) SELECT 5, 'e';
ERROR HY000: Lock wait timeout exceeded; try restarting transaction
disconnect con0;
@@ -20,6 +29,4 @@
diff --git a/storage/innobase/mysql-test/storage_engine/parts/repair_table.rdiff b/storage/innobase/mysql-test/storage_engine/parts/repair_table.rdiff
index aab866fde83..7ddc57e0ead 100644
--- a/storage/innobase/mysql-test/storage_engine/parts/repair_table.rdiff
+++ b/storage/innobase/mysql-test/storage_engine/parts/repair_table.rdiff
@@ -1,12 +1,12 @@
---- suite/storage_engine/parts/repair_table.result 2012-07-15 01:22:58.861853325 +0400
-+++ suite/storage_engine/parts/repair_table.reject 2012-07-15 20:07:11.268009209 +0400
+--- suite/storage_engine/parts/repair_table.result 2013-01-23 01:35:44.388267080 +0400
++++ suite/storage_engine/parts/repair_table.reject 2013-01-23 01:44:40.337529283 +0400
@@ -9,27 +9,27 @@
INSERT INTO t2 (a,b) SELECT a, b FROM t1;
ALTER TABLE t1 REPAIR PARTITION p0;
Table Op Msg_type Msg_text
-test.t1 repair status OK
+test.t1 repair note The storage engine for the table doesn't support repair
- INSERT INTO t1 VALUES (3,'c');
+ INSERT INTO t1 (a,b) VALUES (3,'c');
ALTER TABLE t1 REPAIR PARTITION NO_WRITE_TO_BINLOG p0, p1;
Table Op Msg_type Msg_text
-test.t1 repair status OK
@@ -21,7 +21,7 @@
Table Op Msg_type Msg_text
-test.t1 repair status OK
+test.t1 repair note The storage engine for the table doesn't support repair
- INSERT INTO t1 VALUES (10,'j');
+ INSERT INTO t1 (a,b) VALUES (10,'j');
ALTER TABLE t1 REPAIR PARTITION p1 QUICK USE_FRM;
Table Op Msg_type Msg_text
-test.t1 repair status OK
@@ -112,7 +112,7 @@
-test.t1 check error Size of datafile is: 26 Should be: 39
-test.t1 check error Partition p0 returned error
-test.t1 check error Corrupt
--SELECT * FROM t1;
+-SELECT a,b FROM t1;
-a b
-8 h
-10 j
@@ -136,7 +136,7 @@
-test.t1 check warning Found 3 key parts. Should be: 2
-test.t1 check error Partition p0 returned error
-test.t1 check error Corrupt
--SELECT * FROM t1;
+-SELECT a,b FROM t1;
-a b
-8 h
-10 j
@@ -160,7 +160,7 @@
-test.t1 check error Size of datafile is: 39 Should be: 52
-test.t1 check error Partition p1 returned error
-test.t1 check error Corrupt
--SELECT * FROM t1;
+-SELECT a,b FROM t1;
-a b
-8 h
-10 j
@@ -187,7 +187,7 @@
-test.t1 check warning Found 4 key parts. Should be: 3
-test.t1 check error Partition p1 returned error
-test.t1 check error Corrupt
--SELECT * FROM t1;
+-SELECT a,b FROM t1;
-a b
-8 h
-10 j
diff --git a/storage/innobase/mysql-test/storage_engine/repair_table.rdiff b/storage/innobase/mysql-test/storage_engine/repair_table.rdiff
index 9c51fea47ff..ae5b863eae1 100644
--- a/storage/innobase/mysql-test/storage_engine/repair_table.rdiff
+++ b/storage/innobase/mysql-test/storage_engine/repair_table.rdiff
@@ -57,7 +57,7 @@
-test.t1 check Error Incorrect file format 't1'
-test.t1 check error Corrupt
+test.t1 check status OK
- SELECT * FROM t1;
+ SELECT a,b FROM t1;
-ERROR HY000: Incorrect file format 't1'
+a b
+1 a
@@ -107,7 +107,7 @@
-Table Op Msg_type Msg_text
-test.t1 check error Size of datafile is: 39 Should be: 65
-test.t1 check error Corrupt
--SELECT * FROM t1;
+-SELECT a,b FROM t1;
-ERROR HY000: Incorrect key file for table 't1'; try to repair it
-# Statement ended with one of expected results (0,ER_NOT_KEYFILE,144).
-# If you got a difference in error message, just add it to rdiff file
@@ -122,7 +122,7 @@
-test.t1 check warning Table is marked as crashed and last repair failed
-test.t1 check error Size of datafile is: 39 Should be: 65
-test.t1 check error Corrupt
--SELECT * FROM t1;
+-SELECT a,b FROM t1;
-ERROR HY000: Table './test/t1' is marked as crashed and last (automatic?) repair failed
-# Statement ended with one of expected results (0,ER_NOT_KEYFILE,144).
-# If you got a difference in error message, just add it to rdiff file
diff --git a/storage/innobase/mysql-test/storage_engine/type_char_indexes.rdiff b/storage/innobase/mysql-test/storage_engine/type_char_indexes.rdiff
index 7fce0a108e9..53241fe331e 100644
--- a/storage/innobase/mysql-test/storage_engine/type_char_indexes.rdiff
+++ b/storage/innobase/mysql-test/storage_engine/type_char_indexes.rdiff
@@ -2,19 +2,19 @@
+++ suite/storage_engine/type_char_indexes.reject 2012-07-15 17:51:55.810034331 +0400
@@ -98,7 +98,7 @@
SET SESSION optimizer_switch = 'engine_condition_pushdown=on';
- EXPLAIN SELECT * FROM t1 WHERE c > 'a';
+ EXPLAIN SELECT c,c20,v16,v128 FROM t1 WHERE c > 'a';
id select_type table type possible_keys key key_len ref rows Extra
-# # # range c_v c_v # # # Using index condition
+# # # range c_v c_v # # # Using where
- SELECT * FROM t1 WHERE c > 'a';
+ SELECT c,c20,v16,v128 FROM t1 WHERE c > 'a';
c c20 v16 v128
b char3 varchar1a varchar1b
@@ -135,7 +135,7 @@
r3a
- EXPLAIN SELECT * FROM t1 WHERE v16 = 'varchar1a' OR v16 = 'varchar3a' ORDER BY v16;
+ EXPLAIN SELECT c,c20,v16,v128 FROM t1 WHERE v16 = 'varchar1a' OR v16 = 'varchar3a' ORDER BY v16;
id select_type table type possible_keys key key_len ref rows Extra
-# # # range # v16 # # # #
+# # # ALL # NULL # # # #
- SELECT * FROM t1 WHERE v16 = 'varchar1a' OR v16 = 'varchar3a' ORDER BY v16;
+ SELECT c,c20,v16,v128 FROM t1 WHERE v16 = 'varchar1a' OR v16 = 'varchar3a' ORDER BY v16;
c c20 v16 v128
a char1 varchar1a varchar1b
diff --git a/storage/innobase/mysql-test/storage_engine/type_spatial_indexes.rdiff b/storage/innobase/mysql-test/storage_engine/type_spatial_indexes.rdiff
index 9a9566deafb..9061900182e 100644
--- a/storage/innobase/mysql-test/storage_engine/type_spatial_indexes.rdiff
+++ b/storage/innobase/mysql-test/storage_engine/type_spatial_indexes.rdiff
@@ -1,5 +1,5 @@
---- suite/storage_engine/type_spatial_indexes.result 2012-07-12 04:52:40.840023344 +0400
-+++ suite/storage_engine/type_spatial_indexes.reject 2012-07-15 19:27:32.761911079 +0400
+--- suite/storage_engine/type_spatial_indexes.result 2013-01-23 01:25:45.367797786 +0400
++++ suite/storage_engine/type_spatial_indexes.reject 2013-01-23 01:46:17.560307029 +0400
@@ -702,699 +702,15 @@
DROP DATABASE IF EXISTS gis_ogs;
CREATE DATABASE gis_ogs;
@@ -79,43 +79,43 @@
-Field Type Null Key Default Extra
-fid int(11) YES NULL
-g geometry NO NULL
--INSERT INTO gis_point VALUES
+-INSERT INTO gis_point (fid,g) VALUES
-(101, PointFromText('POINT(10 10)')),
-(102, PointFromText('POINT(20 10)')),
-(103, PointFromText('POINT(20 20)')),
-(104, PointFromWKB(AsWKB(PointFromText('POINT(10 20)'))));
--INSERT INTO gis_line VALUES
+-INSERT INTO gis_line (fid,g) VALUES
-(105, LineFromText('LINESTRING(0 0,0 10,10 0)')),
-(106, LineStringFromText('LINESTRING(10 10,20 10,20 20,10 20,10 10)')),
-(107, LineStringFromWKB(AsWKB(LineString(Point(10, 10), Point(40, 10)))));
--INSERT INTO gis_polygon VALUES
+-INSERT INTO gis_polygon (fid,g) VALUES
-(108, PolygonFromText('POLYGON((10 10,20 10,20 20,10 20,10 10))')),
-(109, PolyFromText('POLYGON((0 0,50 0,50 50,0 50,0 0), (10 10,20 10,20 20,10 20,10 10))')),
-(110, PolyFromWKB(AsWKB(Polygon(LineString(Point(0, 0), Point(30, 0), Point(30, 30), Point(0, 0))))));
--INSERT INTO gis_multi_point VALUES
+-INSERT INTO gis_multi_point (fid,g) VALUES
-(111, MultiPointFromText('MULTIPOINT(0 0,10 10,10 20,20 20)')),
-(112, MPointFromText('MULTIPOINT(1 1,11 11,11 21,21 21)')),
-(113, MPointFromWKB(AsWKB(MultiPoint(Point(3, 6), Point(4, 10)))));
--INSERT INTO gis_multi_line VALUES
+-INSERT INTO gis_multi_line (fid,g) VALUES
-(114, MultiLineStringFromText('MULTILINESTRING((10 48,10 21,10 0),(16 0,16 23,16 48))')),
-(115, MLineFromText('MULTILINESTRING((10 48,10 21,10 0))')),
-(116, MLineFromWKB(AsWKB(MultiLineString(LineString(Point(1, 2), Point(3, 5)), LineString(Point(2, 5), Point(5, 8), Point(21, 7))))));
--INSERT INTO gis_multi_polygon VALUES
+-INSERT INTO gis_multi_polygon (fid,g) VALUES
-(117, MultiPolygonFromText('MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))')),
-(118, MPolyFromText('MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),(52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))')),
-(119, MPolyFromWKB(AsWKB(MultiPolygon(Polygon(LineString(Point(0, 3), Point(3, 3), Point(3, 0), Point(0, 3)))))));
--INSERT INTO gis_geometrycollection VALUES
+-INSERT INTO gis_geometrycollection (fid,g) VALUES
-(120, GeomCollFromText('GEOMETRYCOLLECTION(POINT(0 0), LINESTRING(0 0,10 10))')),
-(121, GeometryFromWKB(AsWKB(GeometryCollection(Point(44, 6), LineString(Point(3, 6), Point(7, 9)))))),
-(122, GeomFromText('GeometryCollection()')),
-(123, GeomFromText('GeometryCollection EMPTY'));
--INSERT into gis_geometry SELECT * FROM gis_point;
--INSERT into gis_geometry SELECT * FROM gis_line;
--INSERT into gis_geometry SELECT * FROM gis_polygon;
--INSERT into gis_geometry SELECT * FROM gis_multi_point;
--INSERT into gis_geometry SELECT * FROM gis_multi_line;
--INSERT into gis_geometry SELECT * FROM gis_multi_polygon;
--INSERT into gis_geometry SELECT * FROM gis_geometrycollection;
+-INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_point;
+-INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_line;
+-INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_polygon;
+-INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_multi_point;
+-INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_multi_line;
+-INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_multi_polygon;
+-INSERT into gis_geometry (fid,g) SELECT fid,g FROM gis_geometrycollection;
-SELECT fid, AsText(g) FROM gis_point;
-fid AsText(g)
-101 POINT(10 10)
@@ -433,7 +433,7 @@
-DROP TABLE gis_point, gis_line, gis_polygon, gis_multi_point, gis_multi_line, gis_multi_polygon, gis_geometrycollection, gis_geometry;
-USE gis_ogs;
-# Lakes
--INSERT INTO lakes VALUES (
+-INSERT INTO lakes (fid,name,shore) VALUES (
-101, 'BLUE LAKE',
-PolyFromText(
-'POLYGON(
@@ -442,68 +442,68 @@
- )',
-101));
-# Road Segments
--INSERT INTO road_segments VALUES(102, 'Route 5', NULL, 2,
+-INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(102, 'Route 5', NULL, 2,
-LineFromText(
-'LINESTRING( 0 18, 10 21, 16 23, 28 26, 44 31 )' ,101));
--INSERT INTO road_segments VALUES(103, 'Route 5', 'Main Street', 4,
+-INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(103, 'Route 5', 'Main Street', 4,
-LineFromText(
-'LINESTRING( 44 31, 56 34, 70 38 )' ,101));
--INSERT INTO road_segments VALUES(104, 'Route 5', NULL, 2,
+-INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(104, 'Route 5', NULL, 2,
-LineFromText(
-'LINESTRING( 70 38, 72 48 )' ,101));
--INSERT INTO road_segments VALUES(105, 'Main Street', NULL, 4,
+-INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(105, 'Main Street', NULL, 4,
-LineFromText(
-'LINESTRING( 70 38, 84 42 )' ,101));
--INSERT INTO road_segments VALUES(106, 'Dirt Road by Green Forest', NULL,
+-INSERT INTO road_segments (fid,name,aliases,num_lanes,centerline) VALUES(106, 'Dirt Road by Green Forest', NULL,
-1,
-LineFromText(
-'LINESTRING( 28 26, 28 0 )',101));
-# DividedRoutes
--INSERT INTO divided_routes VALUES(119, 'Route 75', 4,
+-INSERT INTO divided_routes (fid,name,num_lanes,centerlines) VALUES(119, 'Route 75', 4,
-MLineFromText(
-'MULTILINESTRING((10 48,10 21,10 0),
- (16 0,16 23,16 48))', 101));
-# Forests
--INSERT INTO forests VALUES(109, 'Green Forest',
+-INSERT INTO forests (fid,name,boundary) VALUES(109, 'Green Forest',
-MPolyFromText(
-'MULTIPOLYGON(((28 26,28 0,84 0,84 42,28 26),
- (52 18,66 23,73 9,48 6,52 18)),((59 18,67 18,67 13,59 13,59 18)))',
-101));
-# Bridges
--INSERT INTO bridges VALUES(110, 'Cam Bridge', PointFromText(
+-INSERT INTO bridges (fid,name,position) VALUES(110, 'Cam Bridge', PointFromText(
-'POINT( 44 31 )', 101));
-# Streams
--INSERT INTO streams VALUES(111, 'Cam Stream',
+-INSERT INTO streams (fid,name,centerline) VALUES(111, 'Cam Stream',
-LineFromText(
-'LINESTRING( 38 48, 44 41, 41 36, 44 31, 52 18 )', 101));
--INSERT INTO streams VALUES(112, NULL,
+-INSERT INTO streams (fid,name,centerline) VALUES(112, NULL,
-LineFromText(
-'LINESTRING( 76 0, 78 4, 73 9 )', 101));
-# Buildings
--INSERT INTO buildings VALUES(113, '123 Main Street',
+-INSERT INTO buildings (fid,name,position,footprint) VALUES(113, '123 Main Street',
-PointFromText(
-'POINT( 52 30 )', 101),
-PolyFromText(
-'POLYGON( ( 50 31, 54 31, 54 29, 50 29, 50 31) )', 101));
--INSERT INTO buildings VALUES(114, '215 Main Street',
+-INSERT INTO buildings (fid,name,position,footprint) VALUES(114, '215 Main Street',
-PointFromText(
-'POINT( 64 33 )', 101),
-PolyFromText(
-'POLYGON( ( 66 34, 62 34, 62 32, 66 32, 66 34) )', 101));
-# Ponds
--INSERT INTO ponds VALUES(120, NULL, 'Stock Pond',
+-INSERT INTO ponds (fid,name,type,shores) VALUES(120, NULL, 'Stock Pond',
-MPolyFromText(
-'MULTIPOLYGON( ( ( 24 44, 22 42, 24 40, 24 44) ),
- ( ( 26 44, 26 40, 28 42, 26 44) ) )', 101));
-# Named Places
--INSERT INTO named_places VALUES(117, 'Ashton',
+-INSERT INTO named_places (fid,name,boundary) VALUES(117, 'Ashton',
-PolyFromText(
-'POLYGON( ( 62 48, 84 48, 84 30, 56 30, 56 34, 62 48) )', 101));
--INSERT INTO named_places VALUES(118, 'Goose Island',
+-INSERT INTO named_places (fid,name,boundary) VALUES(118, 'Goose Island',
-PolyFromText(
-'POLYGON( ( 67 13, 67 18, 59 18, 59 13, 67 13) )', 101));
-# Map Neatlines
--INSERT INTO map_neatlines VALUES(115,
+-INSERT INTO map_neatlines (fid,neatline) VALUES(115,
-PolyFromText(
-'POLYGON( ( 0 0, 0 48, 84 48, 84 0, 0 0 ) )', 101));
-SELECT Dimension(shore)
diff --git a/storage/innobase/mysql-test/storage_engine/vcol.rdiff b/storage/innobase/mysql-test/storage_engine/vcol.rdiff
index 2226062834e..23b26a52228 100644
--- a/storage/innobase/mysql-test/storage_engine/vcol.rdiff
+++ b/storage/innobase/mysql-test/storage_engine/vcol.rdiff
@@ -12,7 +12,7 @@
-Warnings:
-Warning 1906 The value specified for computed column 'b' in table 't1' ignored
-Warning 1906 The value specified for computed column 'b' in table 't1' ignored
--SELECT * FROM t1;
+-SELECT a,b FROM t1;
-a b
-1 2
-2 3
@@ -29,7 +29,7 @@
-Warnings:
-Warning 1906 The value specified for computed column 'b' in table 't1' ignored
-Warning 1906 The value specified for computed column 'b' in table 't1' ignored
--SELECT * FROM t1;
+-SELECT a,b FROM t1;
-a b
-1 2
-2 3
@@ -46,7 +46,7 @@
-Warnings:
-Warning 1906 The value specified for computed column 'b' in table 't1' ignored
-Warning 1906 The value specified for computed column 'b' in table 't1' ignored
--SELECT * FROM t1;
+-SELECT a,b FROM t1;
-a b
-1 2
-2 3
@@ -63,7 +63,7 @@
-Warnings:
-Warning 1906 The value specified for computed column 'b' in table 't1' ignored
-Warning 1906 The value specified for computed column 'b' in table 't1' ignored
--SELECT * FROM t1;
+-SELECT a,b FROM t1;
-a b
-1 2
-2 3
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 62cde1cf728..57ff14b9f95 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -1402,6 +1402,43 @@ os_file_set_nocache(
#endif
}
+
+#ifdef __linux__
+#include <sys/ioctl.h>
+#ifndef DFS_IOCTL_ATOMIC_WRITE_SET
+#define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint)
+#endif
+static int os_file_set_atomic_writes(os_file_t file, const char *name)
+{
+ static int first_time = 1;
+ int atomic_option = 1;
+
+ int ret = ioctl (file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic_option);
+
+ if (ret) {
+ fprintf(stderr,
+ "InnoDB : can't use atomic write on %s, errno %d\n",
+ name, errno);
+ return ret;
+ }
+ return ret;
+}
+#else
+static int os_file_set_atomic_writes(os_file_t file, const char *name)
+{
+ fprintf(stderr,
+ "InnoDB : can't use atomic writes on %s - not implemented on this platform."
+ "innodb_use_atomic_writes needs to be 0.\n",
+ name);
+#ifdef _WIN32
+ SetLastError(ERROR_INVALID_FUNCTION);
+#else
+ errno = EINVAL;
+#endif
+ return -1;
+}
+#endif
+
/****************************************************************//**
NOTE! Use the corresponding macro os_file_create(), not directly
this function!
@@ -1552,6 +1589,13 @@ try_again:
*success = TRUE;
}
+ if (srv_use_atomic_writes && type == OS_DATA_FILE &&
+ os_file_set_atomic_writes(file, name)) {
+ CloseHandle(file);
+ *success = FALSE;
+ file = INVALID_HANDLE_VALUE;
+ }
+
return(file);
#else /* __WIN__ */
os_file_t file;
@@ -1668,6 +1712,12 @@ try_again:
file = -1;
}
#endif /* USE_FILE_LOCK */
+ if (srv_use_atomic_writes && type == OS_DATA_FILE
+ && os_file_set_atomic_writes(file, name)) {
+ close(file);
+ *success = FALSE;
+ file = -1;
+ }
return(file);
#endif /* __WIN__ */
@@ -1965,6 +2015,28 @@ os_file_set_size(
current_size = 0;
+#ifdef HAVE_POSIX_FALLOCATE
+ if (srv_use_posix_fallocate) {
+ if (posix_fallocate(file, current_size, size) == -1) {
+ fprintf(stderr,
+ "InnoDB: Error: preallocating data for"
+ " file %s failed at\n"
+ "InnoDB: offset 0 size %lld. Operating system"
+ " error number %d.\n"
+ "InnoDB: Check that the disk is not full"
+ " or a disk quota exceeded.\n"
+ "InnoDB: Some operating system error numbers"
+ " are described at\n"
+ "InnoDB: "
+ REFMAN "operating-system-error-codes.html\n",
+ name, (longlong)size, errno);
+
+ return (FALSE);
+ }
+ return (TRUE);
+ }
+#endif
+
/* Write up to 1 megabyte at a time. */
buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))
* UNIV_PAGE_SIZE;
diff --git a/storage/innobase/rem/rem0rec.cc b/storage/innobase/rem/rem0rec.cc
index 5a864f122a3..d56eb59e0bb 100644
--- a/storage/innobase/rem/rem0rec.cc
+++ b/storage/innobase/rem/rem0rec.cc
@@ -168,7 +168,6 @@ rec_get_n_extern_new(
{
const byte* nulls;
const byte* lens;
- dict_field_t* field;
ulint null_mask;
ulint n_extern;
ulint i;
@@ -189,10 +188,13 @@ rec_get_n_extern_new(
/* read the lengths of fields 0..n */
do {
- ulint len;
+ const dict_field_t* field
+ = dict_index_get_nth_field(index, i);
+ const dict_col_t* col
+ = dict_field_get_col(field);
+ ulint len;
- field = dict_index_get_nth_field(index, i);
- if (!(dict_field_get_col(field)->prtype & DATA_NOT_NULL)) {
+ if (!(col->prtype & DATA_NOT_NULL)) {
/* nullable field => read the null flag */
if (UNIV_UNLIKELY(!(byte) null_mask)) {
@@ -210,8 +212,6 @@ rec_get_n_extern_new(
if (UNIV_UNLIKELY(!field->fixed_len)) {
/* Variable-length field: read the length */
- const dict_col_t* col
- = dict_field_get_col(field);
len = *lens--;
/* If the maximum length of the field is up
to 255 bytes, the actual length is always
@@ -240,16 +240,15 @@ rec_get_n_extern_new(
Determine the offset to each field in a leaf-page record
in ROW_FORMAT=COMPACT. This is a special case of
rec_init_offsets() and rec_get_offsets_func(). */
-UNIV_INTERN
+UNIV_INLINE __attribute__((nonnull))
void
rec_init_offsets_comp_ordinary(
/*===========================*/
const rec_t* rec, /*!< in: physical record in
ROW_FORMAT=COMPACT */
- ulint extra, /*!< in: number of bytes to reserve
- between the record header and
- the data payload
- (usually REC_N_NEW_EXTRA_BYTES) */
+ ibool temp, /*!< in: whether to use the
+ format for temporary files in
+ index creation */
const dict_index_t* index, /*!< in: record descriptor */
ulint* offsets)/*!< in/out: array of offsets;
in: n=rec_offs_n_fields(offsets) */
@@ -257,27 +256,38 @@ rec_init_offsets_comp_ordinary(
ulint i = 0;
ulint offs = 0;
ulint any_ext = 0;
- const byte* nulls = rec - (extra + 1);
+ const byte* nulls = temp
+ ? rec - 1
+ : rec - (1 + REC_N_NEW_EXTRA_BYTES);
const byte* lens = nulls
- UT_BITS_IN_BYTES(index->n_nullable);
- dict_field_t* field;
ulint null_mask = 1;
#ifdef UNIV_DEBUG
- /* We cannot invoke rec_offs_make_valid() here, because it can hold
- that extra != REC_N_NEW_EXTRA_BYTES. Similarly, rec_offs_validate()
- will fail in that case, because it invokes rec_get_status(). */
+ /* We cannot invoke rec_offs_make_valid() here if temp=TRUE.
+ Similarly, rec_offs_validate() will fail in that case, because
+ it invokes rec_get_status(). */
offsets[2] = (ulint) rec;
offsets[3] = (ulint) index;
#endif /* UNIV_DEBUG */
+ ut_ad(temp || dict_table_is_comp(index->table));
+
+ if (temp && dict_table_is_comp(index->table)) {
+ /* No need to do adjust fixed_len=0. We only need to
+ adjust it for ROW_FORMAT=REDUNDANT. */
+ temp = FALSE;
+ }
+
/* read the lengths of fields 0..n */
do {
- ulint len;
+ const dict_field_t* field
+ = dict_index_get_nth_field(index, i);
+ const dict_col_t* col
+ = dict_field_get_col(field);
+ ulint len;
- field = dict_index_get_nth_field(index, i);
- if (!(dict_field_get_col(field)->prtype
- & DATA_NOT_NULL)) {
+ if (!(col->prtype & DATA_NOT_NULL)) {
/* nullable field => read the null flag */
if (UNIV_UNLIKELY(!(byte) null_mask)) {
@@ -297,10 +307,9 @@ rec_init_offsets_comp_ordinary(
null_mask <<= 1;
}
- if (UNIV_UNLIKELY(!field->fixed_len)) {
+ if (!field->fixed_len
+ || (temp && !dict_col_get_fixed_size(col, temp))) {
/* Variable-length field: read the length */
- const dict_col_t* col
- = dict_field_get_col(field);
len = *lens--;
/* If the maximum length of the field is up
to 255 bytes, the actual length is always
@@ -394,9 +403,8 @@ rec_init_offsets(
= dict_index_get_n_unique_in_tree(index);
break;
case REC_STATUS_ORDINARY:
- rec_init_offsets_comp_ordinary(rec,
- REC_N_NEW_EXTRA_BYTES,
- index, offsets);
+ rec_init_offsets_comp_ordinary(
+ rec, FALSE, index, offsets);
return;
}
@@ -774,17 +782,19 @@ rec_get_nth_field_offs_old(
/**********************************************************//**
Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
@return total size */
-UNIV_INTERN
+UNIV_INLINE __attribute__((warn_unused_result, nonnull(1,2)))
ulint
-rec_get_converted_size_comp_prefix(
-/*===============================*/
+rec_get_converted_size_comp_prefix_low(
+/*===================================*/
const dict_index_t* index, /*!< in: record descriptor;
dict_table_is_comp() is
assumed to hold, even if
it does not */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields,/*!< in: number of data fields */
- ulint* extra) /*!< out: extra size */
+ ulint* extra, /*!< out: extra size */
+ ibool temp) /*!< in: whether this is a
+ temporary file record */
{
ulint extra_size;
ulint data_size;
@@ -793,15 +803,25 @@ rec_get_converted_size_comp_prefix(
ut_ad(fields);
ut_ad(n_fields > 0);
ut_ad(n_fields <= dict_index_get_n_fields(index));
+ ut_ad(!temp || extra);
- extra_size = REC_N_NEW_EXTRA_BYTES
+ extra_size = temp
+ ? UT_BITS_IN_BYTES(index->n_nullable)
+ : REC_N_NEW_EXTRA_BYTES
+ UT_BITS_IN_BYTES(index->n_nullable);
data_size = 0;
+ if (temp && dict_table_is_comp(index->table)) {
+ /* No need to do adjust fixed_len=0. We only need to
+ adjust it for ROW_FORMAT=REDUNDANT. */
+ temp = FALSE;
+ }
+
/* read the lengths of fields 0..n */
for (i = 0; i < n_fields; i++) {
const dict_field_t* field;
ulint len;
+ ulint fixed_len;
const dict_col_t* col;
field = dict_index_get_nth_field(index, i);
@@ -820,6 +840,11 @@ rec_get_converted_size_comp_prefix(
ut_ad(len <= col->len || col->mtype == DATA_BLOB
|| (col->len == 0 && col->mtype == DATA_VARCHAR));
+ fixed_len = field->fixed_len;
+ if (temp && fixed_len
+ && !dict_col_get_fixed_size(col, temp)) {
+ fixed_len = 0;
+ }
/* If the maximum length of a variable-length field
is up to 255 bytes, the actual length is always stored
in one byte. If the maximum length is more than 255
@@ -827,11 +852,20 @@ rec_get_converted_size_comp_prefix(
0..127. The length will be encoded in two bytes when
it is 128 or more, or when the field is stored externally. */
- if (field->fixed_len) {
- ut_ad(len == field->fixed_len);
+ if (fixed_len) {
+#ifdef UNIV_DEBUG
+ ulint mbminlen = DATA_MBMINLEN(col->mbminmaxlen);
+ ulint mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen);
+
+ ut_ad(len <= fixed_len);
+
+ ut_ad(!mbmaxlen || len >= mbminlen
+ * (fixed_len / mbmaxlen));
+
/* dict_index_add_col() should guarantee this */
ut_ad(!field->prefix_len
- || field->fixed_len == field->prefix_len);
+ || fixed_len == field->prefix_len);
+#endif /* UNIV_DEBUG */
} else if (dfield_is_ext(&fields[i])) {
ut_ad(col->len >= 256 || col->mtype == DATA_BLOB);
extra_size += 2;
@@ -848,7 +882,7 @@ rec_get_converted_size_comp_prefix(
data_size += len;
}
- if (UNIV_LIKELY_NULL(extra)) {
+ if (extra) {
*extra = extra_size;
}
@@ -856,6 +890,23 @@ rec_get_converted_size_comp_prefix(
}
/**********************************************************//**
+Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
+@return total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_comp_prefix(
+/*===============================*/
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dfield_t* fields, /*!< in: array of data fields */
+ ulint n_fields,/*!< in: number of data fields */
+ ulint* extra) /*!< out: extra size */
+{
+ ut_ad(dict_table_is_comp(index->table));
+ return(rec_get_converted_size_comp_prefix_low(
+ index, fields, n_fields, extra, FALSE));
+}
+
+/**********************************************************//**
Determines the size of a data tuple in ROW_FORMAT=COMPACT.
@return total size */
UNIV_INTERN
@@ -899,8 +950,8 @@ rec_get_converted_size_comp(
return(ULINT_UNDEFINED);
}
- return(size + rec_get_converted_size_comp_prefix(index, fields,
- n_fields, extra));
+ return(size + rec_get_converted_size_comp_prefix_low(
+ index, fields, n_fields, extra, FALSE));
}
/***********************************************************//**
@@ -1077,19 +1128,18 @@ rec_convert_dtuple_to_rec_old(
/*********************************************************//**
Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
-UNIV_INTERN
+UNIV_INLINE __attribute__((nonnull))
void
rec_convert_dtuple_to_rec_comp(
/*===========================*/
rec_t* rec, /*!< in: origin of record */
- ulint extra, /*!< in: number of bytes to
- reserve between the record
- header and the data payload
- (normally REC_N_NEW_EXTRA_BYTES) */
const dict_index_t* index, /*!< in: record descriptor */
- ulint status, /*!< in: status bits of the record */
const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields)/*!< in: number of data fields */
+ ulint n_fields,/*!< in: number of data fields */
+ ulint status, /*!< in: status bits of the record */
+ ibool temp) /*!< in: whether to use the
+ format for temporary files in
+ index creation */
{
const dfield_t* field;
const dtype_t* type;
@@ -1101,31 +1151,44 @@ rec_convert_dtuple_to_rec_comp(
ulint n_node_ptr_field;
ulint fixed_len;
ulint null_mask = 1;
- ut_ad(extra == 0 || dict_table_is_comp(index->table));
- ut_ad(extra == 0 || extra == REC_N_NEW_EXTRA_BYTES);
+ ut_ad(temp || dict_table_is_comp(index->table));
ut_ad(n_fields > 0);
- switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
- case REC_STATUS_ORDINARY:
+ if (temp) {
+ ut_ad(status == REC_STATUS_ORDINARY);
ut_ad(n_fields <= dict_index_get_n_fields(index));
n_node_ptr_field = ULINT_UNDEFINED;
- break;
- case REC_STATUS_NODE_PTR:
- ut_ad(n_fields == dict_index_get_n_unique_in_tree(index) + 1);
- n_node_ptr_field = n_fields - 1;
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- ut_ad(n_fields == 1);
- n_node_ptr_field = ULINT_UNDEFINED;
- break;
- default:
- ut_error;
- return;
+ nulls = rec - 1;
+ if (dict_table_is_comp(index->table)) {
+ /* No need to do adjust fixed_len=0. We only
+ need to adjust it for ROW_FORMAT=REDUNDANT. */
+ temp = FALSE;
+ }
+ } else {
+ nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
+
+ switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
+ case REC_STATUS_ORDINARY:
+ ut_ad(n_fields <= dict_index_get_n_fields(index));
+ n_node_ptr_field = ULINT_UNDEFINED;
+ break;
+ case REC_STATUS_NODE_PTR:
+ ut_ad(n_fields
+ == dict_index_get_n_unique_in_tree(index) + 1);
+ n_node_ptr_field = n_fields - 1;
+ break;
+ case REC_STATUS_INFIMUM:
+ case REC_STATUS_SUPREMUM:
+ ut_ad(n_fields == 1);
+ n_node_ptr_field = ULINT_UNDEFINED;
+ break;
+ default:
+ ut_error;
+ return;
+ }
}
end = rec;
- nulls = rec - (extra + 1);
lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
/* clear the SQL-null flags */
memset(lens + 1, 0, nulls - lens);
@@ -1171,6 +1234,10 @@ rec_convert_dtuple_to_rec_comp(
ifield = dict_index_get_nth_field(index, i);
fixed_len = ifield->fixed_len;
+ if (temp && fixed_len
+ && !dict_col_get_fixed_size(ifield->col, temp)) {
+ fixed_len = 0;
+ }
/* If the maximum length of a variable-length field
is up to 255 bytes, the actual length is always stored
in one byte. If the maximum length is more than 255
@@ -1178,8 +1245,17 @@ rec_convert_dtuple_to_rec_comp(
0..127. The length will be encoded in two bytes when
it is 128 or more, or when the field is stored externally. */
if (fixed_len) {
- ut_ad(len == fixed_len);
+#ifdef UNIV_DEBUG
+ ulint mbminlen = DATA_MBMINLEN(
+ ifield->col->mbminmaxlen);
+ ulint mbmaxlen = DATA_MBMAXLEN(
+ ifield->col->mbminmaxlen);
+
+ ut_ad(len <= fixed_len);
+ ut_ad(!mbmaxlen || len >= mbminlen
+ * (fixed_len / mbmaxlen));
ut_ad(!dfield_is_ext(field));
+#endif /* UNIV_DEBUG */
} else if (dfield_is_ext(field)) {
ut_ad(ifield->col->len >= 256
|| ifield->col->mtype == DATA_BLOB);
@@ -1233,8 +1309,7 @@ rec_convert_dtuple_to_rec_new(
rec = buf + extra_size;
rec_convert_dtuple_to_rec_comp(
- rec, REC_N_NEW_EXTRA_BYTES, index, status,
- dtuple->fields, dtuple->n_fields);
+ rec, index, dtuple->fields, dtuple->n_fields, status, FALSE);
/* Set the info bits of the record */
rec_set_info_and_status_bits(rec, dtuple_get_info_bits(dtuple));
@@ -1296,6 +1371,54 @@ rec_convert_dtuple_to_rec(
return(rec);
}
+#ifndef UNIV_HOTBACKUP
+/**********************************************************//**
+Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT.
+@return total size */
+UNIV_INTERN
+ulint
+rec_get_converted_size_temp(
+/*========================*/
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dfield_t* fields, /*!< in: array of data fields */
+ ulint n_fields,/*!< in: number of data fields */
+ ulint* extra) /*!< out: extra size */
+{
+ return(rec_get_converted_size_comp_prefix_low(
+ index, fields, n_fields, extra, TRUE));
+}
+
+/******************************************************//**
+Determine the offset to each field in temporary file.
+@see rec_convert_dtuple_to_temp() */
+UNIV_INTERN
+void
+rec_init_offsets_temp(
+/*==================*/
+ const rec_t* rec, /*!< in: temporary file record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ ulint* offsets)/*!< in/out: array of offsets;
+ in: n=rec_offs_n_fields(offsets) */
+{
+ rec_init_offsets_comp_ordinary(rec, TRUE, index, offsets);
+}
+
+/*********************************************************//**
+Builds a temporary file record out of a data tuple.
+@see rec_init_offsets_temp() */
+UNIV_INTERN
+void
+rec_convert_dtuple_to_temp(
+/*=======================*/
+ rec_t* rec, /*!< out: record */
+ const dict_index_t* index, /*!< in: record descriptor */
+ const dfield_t* fields, /*!< in: array of data fields */
+ ulint n_fields) /*!< in: number of fields */
+{
+ rec_convert_dtuple_to_rec_comp(rec, index, fields, n_fields,
+ REC_STATUS_ORDINARY, TRUE);
+}
+
/**************************************************************//**
Copies the first n fields of a physical record to a data tuple. The fields
are copied to the memory heap. */
@@ -1506,6 +1629,7 @@ rec_copy_prefix_to_buf(
return(*buf + (rec - (lens + 1)));
}
+#endif /* UNIV_HOTBACKUP */
/***************************************************************//**
Validates the consistency of an old-style physical record.
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index e79518e24de..e8d15fb539c 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -2394,7 +2394,10 @@ row_ins_index_entry(
err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry,
n_ext, thr);
if (err != DB_FAIL) {
-
+ if (index == dict_table_get_first_index(index->table)
+ && thr_get_trx(thr)->mysql_thd != 0) {
+ DEBUG_SYNC_C("row_ins_clust_index_entry_leaf_after");
+ }
return(err);
}
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index cf662cb1f88..244aa0a69f1 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -294,6 +294,7 @@ row_merge_buf_add(
ulint len;
const dict_col_t* col;
ulint col_no;
+ ulint fixed_len;
const dfield_t* row_field;
ibool col_adjusted;
@@ -435,9 +436,30 @@ row_merge_buf_add(
ut_ad(len <= col->len || col->mtype == DATA_BLOB);
- if (ifield->fixed_len) {
- ut_ad(len == ifield->fixed_len);
+ fixed_len = ifield->fixed_len;
+ if (fixed_len && !dict_table_is_comp(index->table)
+ && DATA_MBMINLEN(col->mbminmaxlen)
+ != DATA_MBMAXLEN(col->mbminmaxlen)) {
+ /* CHAR in ROW_FORMAT=REDUNDANT is always
+ fixed-length, but in the temporary file it is
+ variable-length for variable-length character
+ sets. */
+ fixed_len = 0;
+ }
+
+ if (fixed_len) {
+#ifdef UNIV_DEBUG
+ ulint mbminlen = DATA_MBMINLEN(col->mbminmaxlen);
+ ulint mbmaxlen = DATA_MBMAXLEN(col->mbminmaxlen);
+
+ /* len should be between size calcualted base on
+ mbmaxlen and mbminlen */
+ ut_ad(len <= fixed_len);
+ ut_ad(!mbmaxlen || len >= mbminlen
+ * (fixed_len / mbmaxlen));
+
ut_ad(!dfield_is_ext(field));
+#endif /* UNIV_DEBUG */
} else if (dfield_is_ext(field)) {
extra_size += 2;
} else if (len < 128
@@ -464,12 +486,11 @@ row_merge_buf_add(
ulint size;
ulint extra;
- size = rec_get_converted_size_comp(index,
- REC_STATUS_ORDINARY,
- entry, n_fields, &extra);
+ size = rec_get_converted_size_temp(
+ index, entry, n_fields, &extra);
- ut_ad(data_size + extra_size + REC_N_NEW_EXTRA_BYTES == size);
- ut_ad(extra_size + REC_N_NEW_EXTRA_BYTES == extra);
+ ut_ad(data_size + extra_size == size);
+ ut_ad(extra_size == extra);
}
#endif /* UNIV_DEBUG */
@@ -660,14 +681,9 @@ row_merge_buf_write(
ulint extra_size;
const dfield_t* entry = buf->tuples[i];
- size = rec_get_converted_size_comp(index,
- REC_STATUS_ORDINARY,
- entry, n_fields,
- &extra_size);
+ size = rec_get_converted_size_temp(
+ index, entry, n_fields, &extra_size);
ut_ad(size >= extra_size);
- ut_ad(extra_size >= REC_N_NEW_EXTRA_BYTES);
- extra_size -= REC_N_NEW_EXTRA_BYTES;
- size -= REC_N_NEW_EXTRA_BYTES;
/* Encode extra_size + 1 */
if (extra_size + 1 < 0x80) {
@@ -680,9 +696,8 @@ row_merge_buf_write(
ut_ad(b + size < &block[srv_sort_buf_size]);
- rec_convert_dtuple_to_rec_comp(b + extra_size, 0, index,
- REC_STATUS_ORDINARY,
- entry, n_fields);
+ rec_convert_dtuple_to_temp(b + extra_size, index,
+ entry, n_fields);
b += size;
@@ -790,6 +805,8 @@ row_merge_read(
os_offset_t ofs = ((os_offset_t) offset) * srv_sort_buf_size;
ibool success;
+ DBUG_EXECUTE_IF("row_merge_read_failure", return(FALSE););
+
#ifdef UNIV_DEBUG
if (row_merge_print_block_read) {
fprintf(stderr, "row_merge_read fd=%d ofs=%lu\n",
@@ -839,6 +856,8 @@ row_merge_write(
ret = os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf, ofs, buf_len);
+ DBUG_EXECUTE_IF("row_merge_write_failure", return(FALSE););
+
#ifdef UNIV_DEBUG
if (row_merge_print_block_write) {
fprintf(stderr, "row_merge_write fd=%d ofs=%lu\n",
@@ -951,7 +970,7 @@ err_exit:
*mrec = *buf + extra_size;
- rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
+ rec_init_offsets_temp(*mrec, index, offsets);
data_size = rec_offs_data_size(offsets);
@@ -970,7 +989,7 @@ err_exit:
*mrec = b + extra_size;
- rec_init_offsets_comp_ordinary(*mrec, 0, index, offsets);
+ rec_init_offsets_temp(*mrec, index, offsets);
data_size = rec_offs_data_size(offsets);
ut_ad(extra_size + data_size < sizeof *buf);
@@ -2449,7 +2468,7 @@ row_merge_drop_temp_indexes(void)
/*********************************************************************//**
Creates temporary merge files, and if UNIV_PFS_IO defined, register
the file descriptor with Performance Schema.
-@return File descriptor */
+@return file descriptor, or -1 on failure */
UNIV_INLINE
int
row_merge_file_create_low(void)
@@ -2471,13 +2490,19 @@ row_merge_file_create_low(void)
#ifdef UNIV_PFS_IO
register_pfs_file_open_end(locker, fd);
#endif
+ if (fd < 0) {
+ fprintf(stderr,
+ "InnoDB: Error: Cannot create temporary merge file\n");
+ return(-1);
+ }
return(fd);
}
/*********************************************************************//**
-Create a merge file. */
+Create a merge file.
+@return file descriptor, or -1 on failure */
UNIV_INTERN
-void
+int
row_merge_file_create(
/*==================*/
merge_file_t* merge_file) /*!< out: merge file structure */
@@ -2488,6 +2513,7 @@ row_merge_file_create(
}
merge_file->offset = 0;
merge_file->n_rec = 0;
+ return(merge_file->fd);
}
/*********************************************************************//**
@@ -2761,6 +2787,28 @@ row_merge_rename_tables(
goto err_exit;
}
+ /* Generate the redo logs for file operations */
+ fil_mtr_rename_log(old_table->space, old_name,
+ new_table->space, new_table->name, tmp_name);
+
+ /* What if the redo logs are flushed to disk here? This is
+ tested with following crash point */
+ DBUG_EXECUTE_IF("bug14669848_precommit", log_buffer_flush_to_disk();
+ DBUG_SUICIDE(););
+
+ /* File operations cannot be rolled back. So, before proceeding
+ with file operations, commit the dictionary changes.*/
+ trx_commit_for_mysql(trx);
+
+ /* If server crashes here, the dictionary in InnoDB and MySQL
+ will differ. The .ibd files and the .frm files must be swapped
+ manually by the administrator. No loss of data. */
+ DBUG_EXECUTE_IF("bug14669848", DBUG_SUICIDE(););
+
+ /* Ensure that the redo logs are flushed to disk. The config
+ innodb_flush_log_at_trx_commit must not affect this. */
+ log_buffer_flush_to_disk();
+
/* The following calls will also rename the .ibd data files if
the tables are stored in a single-table tablespace */
@@ -2935,7 +2983,7 @@ row_merge_build_indexes(
ulint i;
ulint j;
ulint error;
- int tmpfd;
+ int tmpfd = -1;
dict_index_t* fts_sort_idx = NULL;
fts_psort_t* psort_info = NULL;
fts_psort_t* merge_info = NULL;
@@ -2959,9 +3007,21 @@ row_merge_build_indexes(
block = static_cast<row_merge_block_t*>(
os_mem_alloc_large(&block_size));
+ /* Initialize all the merge file descriptors, so that we
+ don't call row_merge_file_destroy() on uninitialized
+ merge file descriptor */
+
for (i = 0; i < n_indexes; i++) {
+ merge_files[i].fd = -1;
+ }
- row_merge_file_create(&merge_files[i]);
+ for (i = 0; i < n_indexes; i++) {
+
+ if (row_merge_file_create(&merge_files[i]) < 0)
+ {
+ error = DB_OUT_OF_MEMORY;
+ goto func_exit;
+ }
if (indexes[i]->type & DICT_FTS) {
ibool opt_doc_id_size = FALSE;
@@ -2982,6 +3042,12 @@ row_merge_build_indexes(
tmpfd = row_merge_file_create_low();
+ if (tmpfd < 0)
+ {
+ error = DB_OUT_OF_MEMORY;
+ goto func_exit;
+ }
+
/* Reset the MySQL row buffer that is used when reporting
duplicate keys. */
innobase_rec_reset(table);
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index f1811a664c2..7a07833fa16 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -4532,6 +4532,13 @@ end:
trx->error_state = DB_SUCCESS;
trx_rollback_to_savepoint(trx, NULL);
trx->error_state = DB_SUCCESS;
+ } else {
+ if (old_is_tmp && !new_is_tmp) {
+ /* After ALTER TABLE the table statistics
+ needs to be rebuilt. It will be rebuilt
+ when the table is loaded again. */
+ table->stat_initialized = FALSE;
+ }
}
}
diff --git a/storage/innobase/row/row0umod.cc b/storage/innobase/row/row0umod.cc
index 4869909f5a6..42034c5b80d 100644
--- a/storage/innobase/row/row0umod.cc
+++ b/storage/innobase/row/row0umod.cc
@@ -518,7 +518,7 @@ row_undo_mod_upd_del_sec(
ulint err = DB_SUCCESS;
ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
-
+ ut_ad(!node->undo_row);
heap = mem_heap_create(1024);
while (node->index != NULL) {
@@ -576,6 +576,8 @@ row_undo_mod_del_mark_sec(
mem_heap_t* heap;
ulint err = DB_SUCCESS;
+ ut_ad(!node->undo_row);
+
heap = mem_heap_create(1024);
while (node->index != NULL) {
diff --git a/storage/innobase/row/row0undo.cc b/storage/innobase/row/row0undo.cc
index 757d3544ba4..a73f858599d 100644
--- a/storage/innobase/row/row0undo.cc
+++ b/storage/innobase/row/row0undo.cc
@@ -217,7 +217,7 @@ row_undo_search_clust_to_pcur(
node->row = row_build(ROW_COPY_DATA, clust_index, rec,
offsets, NULL, ext, node->heap);
- if (node->update) {
+ if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
node->undo_row = dtuple_copy(node->row, node->heap);
row_upd_replace(node->undo_row, &node->undo_ext,
clust_index, node->update, node->heap);
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index e64cc006f02..30e0698eab9 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -39,6 +39,8 @@ Created 10/8/1995 Heikki Tuuri
*******************************************************/
/* Dummy comment */
+#include "m_string.h" /* for my_sys.h */
+#include "my_sys.h" /* DEBUG_SYNC_C */
#include "srv0srv.h"
#include "ut0mem.h"
@@ -354,6 +356,11 @@ UNIV_INTERN unsigned long long srv_stats_persistent_sample_pages = 20;
UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
+UNIV_INTERN ibool srv_use_atomic_writes = FALSE;
+#ifdef HAVE_POSIX_FALLOCATE
+UNIV_INTERN ibool srv_use_posix_fallocate = TRUE;
+#endif
+
/** doublewrite buffer is 1MB is size i.e.: it can hold 128 16K pages.
The following parameter is the size of the buffer that is used for
batch flushing i.e.: LRU flushing and flush_list flushing. The rest
@@ -1308,13 +1315,15 @@ void
srv_export_innodb_status(void)
/*==========================*/
{
- buf_pool_stat_t stat;
- ulint LRU_len;
- ulint free_len;
- ulint flush_list_len;
+ buf_pool_stat_t stat;
+ buf_pools_list_size_t buf_pools_list_size;
+ ulint LRU_len;
+ ulint free_len;
+ ulint flush_list_len;
buf_get_total_stat(&stat);
buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
+ buf_get_total_list_size_in_bytes(&buf_pools_list_size);
mutex_enter(&srv_innodb_monitor_mutex);
@@ -1343,7 +1352,12 @@ srv_export_innodb_status(void)
export_vars.innodb_buffer_pool_read_ahead_evicted
= stat.n_ra_pages_evicted;
export_vars.innodb_buffer_pool_pages_data = LRU_len;
+ export_vars.innodb_buffer_pool_bytes_data =
+ buf_pools_list_size.LRU_bytes
+ + buf_pools_list_size.unzip_LRU_bytes;
export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
+ export_vars.innodb_buffer_pool_bytes_dirty =
+ buf_pools_list_size.flush_list_bytes;
export_vars.innodb_buffer_pool_pages_free = free_len;
#ifdef UNIV_DEBUG
export_vars.innodb_buffer_pool_pages_latched
diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc
index af64d011db2..c492edf89b6 100644
--- a/storage/innobase/sync/sync0sync.cc
+++ b/storage/innobase/sync/sync0sync.cc
@@ -309,9 +309,9 @@ mutex_create_func(
/* NOTE! The very first mutexes are not put to the mutex list */
- if ((mutex == &mutex_list_mutex)
+ if (mutex == &mutex_list_mutex
#ifdef UNIV_SYNC_DEBUG
- || (mutex == &sync_thread_mutex)
+ || mutex == &sync_thread_mutex
#endif /* UNIV_SYNC_DEBUG */
) {
diff --git a/storage/innobase/trx/trx0purge.cc b/storage/innobase/trx/trx0purge.cc
index 62c140879aa..d050e7461e7 100644
--- a/storage/innobase/trx/trx0purge.cc
+++ b/storage/innobase/trx/trx0purge.cc
@@ -69,6 +69,10 @@ UNIV_INTERN mysql_pfs_key_t trx_purge_latch_key;
UNIV_INTERN mysql_pfs_key_t purge_sys_bh_mutex_key;
#endif /* UNIV_PFS_MUTEX */
+#ifdef UNIV_DEBUG
+UNIV_INTERN my_bool srv_purge_view_update_only_debug;
+#endif /* UNIV_DEBUG */
+
/********************************************************************//**
Fetches the next undo log record from the history list to purge. It must be
released with the corresponding release function.
@@ -1215,6 +1219,12 @@ trx_purge(
rw_lock_x_unlock(&purge_sys->latch);
+#ifdef UNIV_DEBUG
+ if (srv_purge_view_update_only_debug) {
+ return(0);
+ }
+#endif
+
/* Fetch the UNDO recs that need to be purged. */
n_pages_handled = trx_purge_attach_undo_recs(
n_purge_threads, purge_sys, &purge_sys->limit, batch_size);
diff --git a/storage/innobase/trx/trx0rec.cc b/storage/innobase/trx/trx0rec.cc
index b87eac9362e..203139f23fd 100644
--- a/storage/innobase/trx/trx0rec.cc
+++ b/storage/innobase/trx/trx0rec.cc
@@ -1588,6 +1588,25 @@ trx_undo_prev_version_build(
if (row_upd_changes_field_size_or_external(index, offsets, update)) {
ulint n_ext;
+ /* We should confirm the existence of disowned external data,
+ if the previous version record is delete marked. If the trx_id
+ of the previous record is seen by purge view, we should treat
+ it as missing history, because the disowned external data
+ might be purged already.
+
+ The inherited external data (BLOBs) can be freed (purged)
+ after trx_id was committed, provided that no view was started
+ before trx_id. If the purge view can see the committed
+ delete-marked record by trx_id, no transactions need to access
+ the BLOB. */
+
+ if ((update->info_bits & REC_INFO_DELETED_FLAG)
+ && read_view_sees_trx_id(purge_sys->view, trx_id)) {
+ /* treat as a fresh insert, not to
+ cause assertion error at the caller. */
+ return(DB_SUCCESS);
+ }
+
/* We have to set the appropriate extern storage bits in the
old version of the record: the extern bits in rec for those
fields that update does NOT update, as well as the bits for