diff options
author | Sergei Golubchik <sergii@pisem.net> | 2014-05-06 10:21:34 +0200 |
---|---|---|
committer | Sergei Golubchik <sergii@pisem.net> | 2014-05-06 10:21:34 +0200 |
commit | b968363aac2bf75d014482f8405e6a9fed73ccd6 (patch) | |
tree | 7f2aa9432e293ffe4dfcc772c06991d0b604ca24 /storage | |
parent | e2e5d07b2807706fb9187f00c049474a01ab15da (diff) | |
parent | 05fd3e63665597d3bcdf1094bb111df014087936 (diff) | |
download | mariadb-git-b968363aac2bf75d014482f8405e6a9fed73ccd6.tar.gz |
MDEV-6184 10.0.11 merge
XtraDB 5.6.16-64.2
Diffstat (limited to 'storage')
70 files changed, 3073 insertions, 1195 deletions
diff --git a/storage/xtradb/api/api0api.cc b/storage/xtradb/api/api0api.cc index d2f1a468f25..c5299156d7a 100644 --- a/storage/xtradb/api/api0api.cc +++ b/storage/xtradb/api/api0api.cc @@ -3870,6 +3870,7 @@ ib_table_truncate( ib_err_t trunc_err; ib_trx_t ib_trx = NULL; ib_crsr_t ib_crsr = NULL; + ib_ulint_t memcached_sync = 0; ib_trx = ib_trx_begin(IB_TRX_SERIALIZABLE, true, false); @@ -3885,6 +3886,13 @@ ib_table_truncate( err = DB_TABLE_NOT_FOUND; } + /* Remember the memcached_sync_count and set it to 0, so the + truncate can be executed. */ + if (table != NULL && err == DB_SUCCESS) { + memcached_sync = table->memcached_sync_count; + table->memcached_sync_count = 0; + } + dict_mutex_exit_for_mysql(); if (err == DB_SUCCESS) { @@ -3910,6 +3918,15 @@ ib_table_truncate( ut_a(err == DB_SUCCESS); } + /* Set the memcached_sync_count back. */ + if (table != NULL && memcached_sync != 0) { + dict_mutex_enter_for_mysql(); + + table->memcached_sync_count = memcached_sync; + + dict_mutex_exit_for_mysql(); + } + return(trunc_err); } @@ -3972,3 +3989,51 @@ ib_cfg_get_cfg() return(cfg_status); } + +/*****************************************************************//** +Increase/decrease the memcached sync count of table to sync memcached +DML with SQL DDLs. +@return DB_SUCCESS or error number */ +UNIV_INTERN +ib_err_t +ib_cursor_set_memcached_sync( +/*=========================*/ + ib_crsr_t ib_crsr, /*!< in: cursor */ + ib_bool_t flag) /*!< in: true for increase */ +{ + const ib_cursor_t* cursor = (const ib_cursor_t*) ib_crsr; + row_prebuilt_t* prebuilt = cursor->prebuilt; + dict_table_t* table = prebuilt->table; + ib_err_t err = DB_SUCCESS; + + if (table != NULL) { + /* If memcached_sync_count is -1, means table is + doing DDL, we just return error. */ + if (table->memcached_sync_count == DICT_TABLE_IN_DDL) { + return(DB_ERROR); + } + + if (flag) { +#ifdef HAVE_ATOMIC_BUILTINS + os_atomic_increment_lint(&table->memcached_sync_count, 1); +#else + dict_mutex_enter_for_mysql(); + ++table->memcached_sync_count; + dict_mutex_exit_for_mysql(); +#endif + } else { +#ifdef HAVE_ATOMIC_BUILTINS + os_atomic_decrement_lint(&table->memcached_sync_count, 1); +#else + dict_mutex_enter_for_mysql(); + --table->memcached_sync_count; + dict_mutex_exit_for_mysql(); +#endif + ut_a(table->memcached_sync_count >= 0); + } + } else { + err = DB_TABLE_NOT_FOUND; + } + + return(err); +} diff --git a/storage/xtradb/btr/btr0cur.cc b/storage/xtradb/btr/btr0cur.cc index a180649fa1e..fb12aac18b1 100644 --- a/storage/xtradb/btr/btr0cur.cc +++ b/storage/xtradb/btr/btr0cur.cc @@ -2236,8 +2236,7 @@ btr_cur_optimistic_update( contain trx id and roll ptr fields */ ulint cmpl_info,/*!< in: compiler info on secondary index updates */ - que_thr_t* thr, /*!< in: query thread, or NULL if - appropriate flags are set */ + que_thr_t* thr, /*!< in: query thread */ trx_id_t trx_id, /*!< in: transaction id */ mtr_t* mtr) /*!< in/out: mini-transaction; if this is a secondary index, the caller must @@ -2537,8 +2536,7 @@ btr_cur_pessimistic_update( the values in update vector have no effect */ ulint cmpl_info,/*!< in: compiler info on secondary index updates */ - que_thr_t* thr, /*!< in: query thread, or NULL if - appropriate flags are set */ + que_thr_t* thr, /*!< in: query thread */ trx_id_t trx_id, /*!< in: transaction id */ mtr_t* mtr) /*!< in/out: mini-transaction; must be committed before latching any further pages */ diff --git a/storage/xtradb/buf/buf0buddy.cc b/storage/xtradb/buf/buf0buddy.cc index 3f8f339a81a..442ee80235f 100644 --- a/storage/xtradb/buf/buf0buddy.cc +++ b/storage/xtradb/buf/buf0buddy.cc @@ -545,10 +545,8 @@ buf_buddy_relocate( { buf_page_t* bpage; const ulint size = BUF_BUDDY_LOW << i; - ib_mutex_t* mutex; ulint space; ulint offset; - prio_rw_lock_t* hash_lock; ut_ad(mutex_own(&buf_pool->zip_free_mutex)); ut_ad(!mutex_own(&buf_pool->zip_mutex)); @@ -570,8 +568,13 @@ buf_buddy_relocate( ut_ad(space != BUF_BUDDY_STAMP_FREE); mutex_exit(&buf_pool->zip_free_mutex); - /* Lock page hash to prevent a relocation for the target page */ - bpage = buf_page_hash_get_s_locked(buf_pool, space, offset, &hash_lock); + + ulint fold = buf_page_address_fold(space, offset); + prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold); + + rw_lock_x_lock(hash_lock); + + bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); if (!bpage || bpage->zip.data != src) { /* The block has probably been freshly @@ -579,9 +582,8 @@ buf_buddy_relocate( added to buf_pool->page_hash yet. Obviously, it cannot be relocated. */ - if (bpage) { - rw_lock_s_unlock(hash_lock); - } + rw_lock_x_unlock(hash_lock); + mutex_enter(&buf_pool->zip_free_mutex); return(false); } @@ -592,7 +594,8 @@ buf_buddy_relocate( For the sake of simplicity, give up. */ ut_ad(page_zip_get_size(&bpage->zip) < size); - rw_lock_s_unlock(hash_lock); + rw_lock_x_unlock(hash_lock); + mutex_enter(&buf_pool->zip_free_mutex); return(false); } @@ -601,31 +604,44 @@ buf_buddy_relocate( contain uninitialized data. */ UNIV_MEM_ASSERT_W(src, size); - mutex = buf_page_get_mutex(bpage); - - mutex_enter(mutex); + ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - rw_lock_s_unlock(hash_lock); + mutex_enter(block_mutex); mutex_enter(&buf_pool->zip_free_mutex); if (buf_page_can_relocate(bpage)) { /* Relocate the compressed page. */ - ullint usec = ut_time_us(NULL); + ullint usec = ut_time_us(NULL); + ut_a(bpage->zip.data == src); - memcpy(dst, src, size); - bpage->zip.data = (page_zip_t*) dst; - mutex_exit(mutex); + + /* Note: This is potentially expensive, we need a better + solution here. We go with correctness for now. */ + ::memcpy(dst, src, size); + + bpage->zip.data = reinterpret_cast<page_zip_t*>(dst); + + rw_lock_x_unlock(hash_lock); + + mutex_exit(block_mutex); + buf_buddy_mem_invalid( reinterpret_cast<buf_buddy_free_t*>(src), i); buf_buddy_stat_t* buddy_stat = &buf_pool->buddy_stat[i]; - buddy_stat->relocated++; + + ++buddy_stat->relocated; + buddy_stat->relocated_usec += ut_time_us(NULL) - usec; + return(true); } - mutex_exit(mutex); + rw_lock_x_unlock(hash_lock); + + mutex_exit(block_mutex); + return(false); } diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc index 2ec25491f4c..69dcc4ce9cb 100644 --- a/storage/xtradb/buf/buf0buf.cc +++ b/storage/xtradb/buf/buf0buf.cc @@ -576,8 +576,11 @@ buf_page_is_corrupted( if (checksum_field1 == 0 && checksum_field2 == 0 && mach_read_from_4(read_buf + FIL_PAGE_LSN) == 0) { /* make sure that the page is really empty */ - ut_d(for (ulint i = 0; i < UNIV_PAGE_SIZE; i++) { - ut_a(read_buf[i] == 0); }); + for (ulint i = 0; i < UNIV_PAGE_SIZE; i++) { + if (read_buf[i] != 0) { + return(TRUE); + } + } return(FALSE); } @@ -1650,16 +1653,19 @@ buf_pool_watch_set( bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); - if (UNIV_LIKELY_NULL(bpage)) { + if (bpage != NULL) { page_found: if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) { /* The page was loaded meanwhile. */ return(bpage); } + /* Add to an existing watch. */ - mutex_enter(&buf_pool->zip_mutex); - bpage->buf_fix_count++; - mutex_exit(&buf_pool->zip_mutex); +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_increment_uint32(&bpage->buf_fix_count, 1); +#else + ++bpage->buf_fix_count; +#endif /* PAGE_ATOMIC_REF_COUNT */ return(NULL); } @@ -1782,31 +1788,32 @@ buf_pool_watch_unset( buf_page_t* bpage; buf_pool_t* buf_pool = buf_pool_get(space, offset); ulint fold = buf_page_address_fold(space, offset); - prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, - fold); + prio_rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold); rw_lock_x_lock(hash_lock); - bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); - /* The page must exist because buf_pool_watch_set() - increments buf_fix_count. */ - ut_a(bpage); + /* The page must exist because buf_pool_watch_set() increments + buf_fix_count. */ - if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) { - ib_mutex_t* mutex = buf_page_get_mutex(bpage); + bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); - mutex_enter(mutex); - ut_a(bpage->buf_fix_count > 0); - bpage->buf_fix_count--; - mutex_exit(mutex); + if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) { + buf_block_unfix(reinterpret_cast<buf_block_t*>(bpage)); } else { - ut_a(bpage->buf_fix_count > 0); - mutex_enter(&buf_pool->zip_mutex); - if (UNIV_LIKELY(!--bpage->buf_fix_count)) { + ut_ad(bpage->buf_fix_count > 0); + +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_decrement_uint32(&bpage->buf_fix_count, 1); +#else + --bpage->buf_fix_count; +#endif /* PAGE_ATOMIC_REF_COUNT */ + + if (bpage->buf_fix_count == 0) { + mutex_enter(&buf_pool->zip_mutex); buf_pool_watch_remove(buf_pool, fold, bpage); + mutex_exit(&buf_pool->zip_mutex); } - mutex_exit(&buf_pool->zip_mutex); } rw_lock_x_unlock(hash_lock); @@ -1833,10 +1840,10 @@ buf_pool_watch_occurred( rw_lock_s_lock(hash_lock); - bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); /* The page must exist because buf_pool_watch_set() increments buf_fix_count. */ - ut_a(bpage); + bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); + ret = !buf_pool_watch_is_sentinel(buf_pool, bpage); rw_lock_s_unlock(hash_lock); @@ -2093,27 +2100,32 @@ err_exit: case BUF_BLOCK_READY_FOR_USE: case BUF_BLOCK_MEMORY: case BUF_BLOCK_REMOVE_HASH: - break; + ut_error; + case BUF_BLOCK_ZIP_PAGE: case BUF_BLOCK_ZIP_DIRTY: block_mutex = &buf_pool->zip_mutex; mutex_enter(block_mutex); - bpage->buf_fix_count++; +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_increment_uint32(&bpage->buf_fix_count, 1); +#else + ++bpage->buf_fix_count; +#endif /* PAGE_ATOMIC_REF_COUNT */ goto got_block; case BUF_BLOCK_FILE_PAGE: /* Discard the uncompressed page frame if possible. */ if (!discard_attempted) { rw_lock_s_unlock(hash_lock); - buf_block_try_discard_uncompressed(space, - offset); + buf_block_try_discard_uncompressed(space, offset); discard_attempted = TRUE; goto lookup; } block_mutex = &((buf_block_t*) bpage)->mutex; + mutex_enter(block_mutex); - buf_block_buf_fix_inc((buf_block_t*) bpage, - __FILE__, __LINE__); + + buf_block_buf_fix_inc((buf_block_t*) bpage, __FILE__, __LINE__); goto got_block; } @@ -2126,7 +2138,7 @@ got_block: rw_lock_s_unlock(hash_lock); #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(!bpage->file_page_was_freed); -#endif +#endif /* defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG */ buf_page_set_accessed(bpage); @@ -2451,7 +2463,7 @@ buf_block_is_uncompressed( const buf_block_t* block) /*!< in: pointer to block, not dereferenced */ { - if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) { + if ((((ulint) block) % sizeof *block) != 0) { /* The pointer should be aligned. */ return(FALSE); } @@ -2481,6 +2493,70 @@ buf_debug_execute_is_force_flush() } #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ +/** +Wait for the block to be read in. +@param block The block to check +@param trx Transaction to account the I/Os to */ +static +void +buf_wait_for_read(buf_block_t* block, trx_t* trx) +{ + /* Note: For the PAGE_ATOMIC_REF_COUNT case: + + We are using the block->lock to check for IO state (and a dirty read). + We set the IO_READ state under the protection of the hash_lock + (and block->mutex). This is safe because another thread can only + access the block (and check for IO state) after the block has been + added to the page hashtable. */ + + if (buf_block_get_io_fix_unlocked(block) == BUF_IO_READ) { + + ib_uint64_t start_time; + ulint sec; + ulint ms; + + /* Wait until the read operation completes */ + + ib_mutex_t* mutex = buf_page_get_mutex(&block->page); + + if (UNIV_UNLIKELY(trx && trx->take_stats)) + { + ut_usectime(&sec, &ms); + start_time = (ib_uint64_t)sec * 1000000 + ms; + } else { + start_time = 0; + } + + for (;;) { + buf_io_fix io_fix; + + mutex_enter(mutex); + + io_fix = buf_block_get_io_fix(block); + + mutex_exit(mutex); + + if (io_fix == BUF_IO_READ) { + /* Wait by temporaly s-latch */ + rw_lock_s_lock(&block->lock); + rw_lock_s_unlock(&block->lock); + } else { + break; + } + } + + if (UNIV_UNLIKELY(start_time != 0)) + { + ut_usectime(&sec, &ms); + ib_uint64_t finish_time + = (ib_uint64_t)sec * 1000000 + ms; + trx->io_reads_wait_timer + += (ulint)(finish_time - start_time); + } + + } +} + /********************************************************************//** This is the general function used to get access to a database page. @return pointer to the block or NULL */ @@ -2505,15 +2581,11 @@ buf_page_get_gen( ulint fold; unsigned access_time; ulint fix_type; - ibool must_read; prio_rw_lock_t* hash_lock; - ib_mutex_t* block_mutex; ulint retries = 0; trx_t* trx = NULL; - ulint sec; - ulint ms; - ib_uint64_t start_time; - ib_uint64_t finish_time; + buf_block_t* fix_block; + ib_mutex_t* fix_mutex = NULL; buf_pool_t* buf_pool = buf_pool_get(space, offset); ut_ad(mtr); @@ -2553,7 +2625,9 @@ loop: block = guess; rw_lock_s_lock(hash_lock); - if (block) { + + if (block != NULL) { + /* If the guess is a compressed page descriptor that has been allocated by buf_page_alloc_descriptor(), it may have been freed by buf_relocate(). */ @@ -2591,10 +2665,10 @@ loop: if (UNIV_LIKELY_NULL(block)) { /* We can release hash_lock after we - acquire block_mutex to make sure that - no state change takes place. */ - block_mutex = buf_page_get_mutex(&block->page); - mutex_enter(block_mutex); + increment the fix count to make + sure that no state change takes place. */ + fix_block = block; + buf_block_fix(fix_block); /* Now safe to release page_hash mutex */ rw_lock_x_unlock(hash_lock); @@ -2649,48 +2723,60 @@ loop: ut_a(++buf_dbg_counter % 5771 || buf_validate()); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ goto loop; + } else { + fix_block = block; } - - /* We can release hash_lock after we acquire block_mutex to - make sure that no state change takes place. */ - block_mutex = buf_page_get_mutex(&block->page); - mutex_enter(block_mutex); + buf_block_fix(fix_block); /* Now safe to release page_hash mutex */ rw_lock_s_unlock(hash_lock); got_block: + + fix_mutex = buf_page_get_mutex(&fix_block->page); + ut_ad(page_zip_get_size(&block->page.zip) == zip_size); - ut_ad(mutex_own(block_mutex)); - must_read = buf_block_get_io_fix(block) == BUF_IO_READ; + if (mode == BUF_GET_IF_IN_POOL || mode == BUF_PEEK_IF_IN_POOL) { - if (must_read && (mode == BUF_GET_IF_IN_POOL - || mode == BUF_PEEK_IF_IN_POOL)) { + bool must_read; - /* The page is being read to buffer pool, - but we cannot wait around for the read to - complete. */ -null_exit: - mutex_exit(block_mutex); + { + buf_page_t* fix_page = &fix_block->page; - return(NULL); + mutex_enter(fix_mutex); + + buf_io_fix io_fix = buf_page_get_io_fix(fix_page); + + must_read = (io_fix == BUF_IO_READ); + + mutex_exit(fix_mutex); + } + + if (must_read) { + /* The page is being read to buffer pool, + but we cannot wait around for the read to + complete. */ + buf_block_unfix(fix_block); + + return(NULL); + } } - if (UNIV_UNLIKELY(block->page.is_corrupt && + if (UNIV_UNLIKELY(fix_block->page.is_corrupt && srv_pass_corrupt_table <= 1)) { - mutex_exit(block_mutex); + buf_block_unfix(fix_block); return(NULL); } - switch (buf_block_get_state(block)) { + switch(buf_block_get_state(fix_block)) { buf_page_t* bpage; case BUF_BLOCK_FILE_PAGE: - ut_ad(block_mutex != &buf_pool->zip_mutex); + ut_ad(fix_mutex != &buf_pool->zip_mutex); break; case BUF_BLOCK_ZIP_PAGE: @@ -2700,19 +2786,24 @@ null_exit: adaptive hash index. There cannot be an adaptive hash index for a compressed-only page, so do not bother decompressing the page. */ - goto null_exit; + buf_block_unfix(fix_block); + + return(NULL); } bpage = &block->page; - ut_ad(block_mutex == &buf_pool->zip_mutex); + ut_ad(fix_mutex == &buf_pool->zip_mutex); + + /* Note: We have already buffer fixed this block. */ + if (bpage->buf_fix_count > 1 + || buf_page_get_io_fix_unlocked(bpage) != BUF_IO_NONE) { - if (bpage->buf_fix_count - || buf_page_get_io_fix(bpage) != BUF_IO_NONE) { /* This condition often occurs when the buffer is not buffer-fixed, but I/O-fixed by buf_page_init_for_read(). */ - mutex_exit(&buf_pool->zip_mutex); -wait_until_unfixed: + + buf_block_unfix(fix_block); + /* The block is buffer-fixed or I/O-fixed. Try again later. */ os_thread_sleep(WAIT_FOR_READ); @@ -2723,24 +2814,34 @@ wait_until_unfixed: /* Buffer-fix the block so that it cannot be evicted or relocated while we are attempting to allocate an uncompressed page. */ - bpage->buf_fix_count++; /* Allocate an uncompressed page. */ - mutex_exit(&buf_pool->zip_mutex); + block = buf_LRU_get_free_block(buf_pool); - ut_a(block); mutex_enter(&buf_pool->LRU_list_mutex); rw_lock_x_lock(hash_lock); + /* Buffer-fixing prevents the page_hash from changing. */ ut_ad(bpage == buf_page_hash_get_low( buf_pool, space, offset, fold)); - mutex_enter(&block->mutex); + buf_block_mutex_enter(block); + mutex_enter(&buf_pool->zip_mutex); - if (--bpage->buf_fix_count + ut_ad(fix_block->page.buf_fix_count > 0); + +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_decrement_uint32(&fix_block->page.buf_fix_count, 1); +#else + --fix_block->page.buf_fix_count; +#endif /* PAGE_ATOMIC_REF_COUNT */ + + fix_block = block; + + if (bpage->buf_fix_count > 0 || buf_page_get_io_fix(bpage) != BUF_IO_NONE) { mutex_exit(&buf_pool->zip_mutex); @@ -2753,23 +2854,31 @@ wait_until_unfixed: buf_LRU_block_free_non_file_page(block); mutex_exit(&buf_pool->LRU_list_mutex); rw_lock_x_unlock(hash_lock); - mutex_exit(&block->mutex); + buf_block_mutex_exit(block); - goto wait_until_unfixed; + /* Try again */ + goto loop; } /* Move the compressed page from bpage to block, and uncompress it. */ + /* Note: this is the uncompressed block and it is not + accessible by other threads yet because it is not in + any list or hash table */ buf_relocate(bpage, &block->page); + buf_block_init_low(block); + + /* Set after relocate(). */ + block->page.buf_fix_count = 1; + block->lock_hash_val = lock_rec_hash(space, offset); UNIV_MEM_DESC(&block->page.zip.data, - page_zip_get_size(&block->page.zip)); + page_zip_get_size(&block->page.zip)); - if (buf_page_get_state(&block->page) - == BUF_BLOCK_ZIP_PAGE) { + if (buf_page_get_state(&block->page) == BUF_BLOCK_ZIP_PAGE) { #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG UT_LIST_REMOVE(list, buf_pool->zip_clean, &block->page); @@ -2777,8 +2886,7 @@ wait_until_unfixed: ut_ad(!block->page.in_flush_list); } else { /* Relocate buf_pool->flush_list. */ - buf_flush_relocate_on_flush_list(bpage, - &block->page); + buf_flush_relocate_on_flush_list(bpage, &block->page); } /* Buffer-fix, I/O-fix, and X-latch the block @@ -2791,7 +2899,6 @@ wait_until_unfixed: mutex_exit(&buf_pool->LRU_list_mutex); - block->page.buf_fix_count = 1; buf_block_set_io_fix(block, BUF_IO_READ); rw_lock_x_lock_inline(&block->lock, 0, file, line); @@ -2802,7 +2909,9 @@ wait_until_unfixed: os_atomic_increment_ulint(&buf_pool->n_pend_unzip, 1); access_time = buf_page_is_accessed(&block->page); - mutex_exit(&block->mutex); + + buf_block_mutex_exit(block); + mutex_exit(&buf_pool->zip_mutex); buf_page_free_descriptor(bpage); @@ -2813,9 +2922,12 @@ wait_until_unfixed: /* Page checksum verification is already done when the page is read from disk. Hence page checksum verification is not necessary when decompressing the page. */ - ut_a(buf_zip_decompress(block, FALSE)); + { + bool success = buf_zip_decompress(block, FALSE); + ut_a(success); + } - if (UNIV_LIKELY(!recv_no_ibuf_operations)) { + if (!recv_no_ibuf_operations) { if (access_time) { #ifdef UNIV_IBUF_COUNT_DEBUG ut_a(ibuf_count_get(space, offset) == 0); @@ -2827,10 +2939,14 @@ wait_until_unfixed: } /* Unfix and unlatch the block. */ - mutex_enter(&block->mutex); - block->page.buf_fix_count--; - buf_block_set_io_fix(block, BUF_IO_NONE); + buf_block_mutex_enter(fix_block); + + buf_block_set_io_fix(fix_block, BUF_IO_NONE); + + buf_block_mutex_exit(fix_block); + os_atomic_decrement_ulint(&buf_pool->n_pend_unzip, 1); + rw_lock_x_unlock(&block->lock); break; @@ -2844,39 +2960,45 @@ wait_until_unfixed: break; } + ut_ad(block == fix_block); + ut_ad(fix_block->page.buf_fix_count > 0); + #ifdef UNIV_SYNC_DEBUG ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)); ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED)); #endif /* UNIV_SYNC_DEBUG */ - ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); + ut_ad(buf_block_get_state(fix_block) == BUF_BLOCK_FILE_PAGE); #if UNIV_WORD_SIZE == 4 /* On 32-bit systems, there is no padding in buf_page_t. On other systems, Valgrind could complain about uninitialized pad bytes. */ - UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page); + UNIV_MEM_ASSERT_RW(&fix_block->page, sizeof(fix_block->page)); #endif #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH) && (ibuf_debug || buf_debug_execute_is_force_flush())) { + /* Try to evict the block from the buffer pool, to use the insert buffer (change buffer) as much as possible. */ - /* To obey the latching order, release the - block->mutex before acquiring buf_pool->LRU_list_mutex. Protect - the block from changes by temporarily buffer-fixing it - for the time we are not holding block->mutex. */ - - buf_block_buf_fix_inc(block, file, line); - mutex_exit(&block->mutex); mutex_enter(&buf_pool->LRU_list_mutex); - mutex_enter(&block->mutex); - buf_block_buf_fix_dec(block); - if (buf_LRU_free_page(&block->page, true)) { - mutex_exit(&block->mutex); + buf_block_unfix(fix_block); + + /* Now we are only holding the buf_pool->LRU_list_mutex, + not block->mutex or hash_lock. Blocks cannot be + relocated or enter or exit the buf_pool while we + are holding the buf_pool->LRU_list_mutex. */ + + fix_mutex = buf_page_get_mutex(&fix_block->page); + mutex_enter(fix_mutex); + + if (buf_LRU_free_page(&fix_block->page, true)) { + + mutex_exit(fix_mutex); rw_lock_x_lock(hash_lock); if (mode == BUF_GET_IF_IN_POOL_OR_WATCH) { @@ -2892,7 +3014,7 @@ wait_until_unfixed: rw_lock_x_unlock(hash_lock); - if (UNIV_LIKELY_NULL(block)) { + if (block != NULL) { /* Either the page has been read in or a watch was set on that in the window where we released the buf_pool::mutex @@ -2906,111 +3028,108 @@ wait_until_unfixed: "innodb_change_buffering_debug evict %u %u\n", (unsigned) space, (unsigned) offset); return(NULL); - } else { - - mutex_exit(&buf_pool->LRU_list_mutex); } - if (buf_flush_page_try(buf_pool, block)) { + if (buf_flush_page_try(buf_pool, fix_block)) { fprintf(stderr, "innodb_change_buffering_debug flush %u %u\n", (unsigned) space, (unsigned) offset); - guess = block; + guess = fix_block; goto loop; } + mutex_exit(&buf_pool->LRU_list_mutex); + + buf_block_mutex_exit(fix_block); + + buf_block_fix(fix_block); + /* Failed to evict the page; change it directly */ } #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - buf_block_buf_fix_inc(block, file, line); + ut_ad(fix_block->page.buf_fix_count > 0); + +#ifdef UNIV_SYNC_DEBUG + /* We have already buffer fixed the page, and we are committed to + returning this page to the caller. Register for debugging. */ + { + ibool ret; + ret = rw_lock_s_lock_nowait(&fix_block->debug_latch, file, line); + ut_a(ret); + } +#endif /* UNIV_SYNC_DEBUG */ + #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(mode == BUF_GET_POSSIBLY_FREED - || !block->page.file_page_was_freed); + || !fix_block->page.file_page_was_freed); #endif /* Check if this is the first access to the page */ - access_time = buf_page_is_accessed(&block->page); + access_time = buf_page_is_accessed(&fix_block->page); - buf_page_set_accessed(&block->page); + /* This is a heuristic and we don't care about ordering issues. */ + if (access_time == 0) { + buf_block_mutex_enter(fix_block); - mutex_exit(&block->mutex); + buf_page_set_accessed(&fix_block->page); + + buf_block_mutex_exit(fix_block); + } if (mode != BUF_PEEK_IF_IN_POOL) { - buf_page_make_young_if_needed(&block->page); + buf_page_make_young_if_needed(&fix_block->page); } #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(++buf_dbg_counter % 5771 || buf_validate()); - ut_a(block->page.buf_fix_count > 0); - ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); + ut_a(fix_block->page.buf_fix_count > 0); + ut_a(buf_block_get_state(fix_block) == BUF_BLOCK_FILE_PAGE); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ +#ifdef PAGE_ATOMIC_REF_COUNT + /* We have to wait here because the IO_READ state was set + under the protection of the hash_lock and the block->mutex + but not the block->lock. */ + buf_wait_for_read(fix_block, trx); +#endif /* PAGE_ATOMIC_REF_COUNT */ + switch (rw_latch) { case RW_NO_LATCH: - if (must_read) { - /* Let us wait until the read operation - completes */ - if (UNIV_UNLIKELY(trx && trx->take_stats)) - { - ut_usectime(&sec, &ms); - start_time = (ib_uint64_t)sec * 1000000 + ms; - } else { - start_time = 0; - } - for (;;) { - enum buf_io_fix io_fix; - - mutex_enter(&block->mutex); - io_fix = buf_block_get_io_fix(block); - mutex_exit(&block->mutex); - - if (io_fix == BUF_IO_READ) { - /* wait by temporaly s-latch */ - rw_lock_s_lock(&(block->lock)); - rw_lock_s_unlock(&(block->lock)); - } else { - break; - } - } - if (UNIV_UNLIKELY(start_time != 0)) - { - ut_usectime(&sec, &ms); - finish_time = (ib_uint64_t)sec * 1000000 + ms; - trx->io_reads_wait_timer += (ulint)(finish_time - start_time); - } - } +#ifndef PAGE_ATOMIC_REF_COUNT + buf_wait_for_read(fix_block, trx); +#endif /* !PAGE_ATOMIC_REF_COUNT */ fix_type = MTR_MEMO_BUF_FIX; break; case RW_S_LATCH: - rw_lock_s_lock_inline(&(block->lock), 0, file, line); + rw_lock_s_lock_inline(&fix_block->lock, 0, file, line); fix_type = MTR_MEMO_PAGE_S_FIX; break; default: ut_ad(rw_latch == RW_X_LATCH); - rw_lock_x_lock_inline(&(block->lock), 0, file, line); + rw_lock_x_lock_inline(&fix_block->lock, 0, file, line); fix_type = MTR_MEMO_PAGE_X_FIX; break; } - mtr_memo_push(mtr, block, fix_type); + mtr_memo_push(mtr, fix_block, fix_type); if (mode != BUF_PEEK_IF_IN_POOL && !access_time) { /* In the case of a first access, try to apply linear read-ahead */ - buf_read_ahead_linear(space, zip_size, offset, - ibuf_inside(mtr), trx); + buf_read_ahead_linear( + space, zip_size, offset, ibuf_inside(mtr), trx); } #ifdef UNIV_IBUF_COUNT_DEBUG - ut_a(ibuf_count_get(buf_block_get_space(block), - buf_block_get_page_no(block)) == 0); + ut_a(ibuf_count_get(buf_block_get_space(fix_block), + buf_block_get_page_no(fix_block)) == 0); #endif #ifdef UNIV_SYNC_DEBUG ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)); @@ -3021,7 +3140,7 @@ wait_until_unfixed: _increment_page_get_statistics(block, trx); } - return(block); + return(fix_block); } /********************************************************************//** @@ -3085,9 +3204,7 @@ buf_page_optimistic_get( } if (UNIV_UNLIKELY(!success)) { - mutex_enter(&block->mutex); buf_block_buf_fix_dec(block); - mutex_exit(&block->mutex); return(FALSE); } @@ -3101,9 +3218,7 @@ buf_page_optimistic_get( rw_lock_x_unlock(&(block->lock)); } - mutex_enter(&block->mutex); buf_block_buf_fix_dec(block); - mutex_exit(&block->mutex); return(FALSE); } @@ -3215,9 +3330,7 @@ buf_page_get_known_nowait( } if (!success) { - mutex_enter(&block->mutex); buf_block_buf_fix_dec(block); - mutex_exit(&block->mutex); return(FALSE); } @@ -3325,9 +3438,7 @@ buf_page_try_get_func( } if (!success) { - mutex_enter(&block->mutex); buf_block_buf_fix_dec(block); - mutex_exit(&block->mutex); return(NULL); } @@ -3425,16 +3536,23 @@ buf_page_init( hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold); - if (UNIV_LIKELY(!hash_page)) { + if (hash_page == NULL) { + /* Block not found in the hash table */ } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) { - /* Preserve the reference count. */ mutex_enter(&buf_pool->zip_mutex); - ulint buf_fix_count = hash_page->buf_fix_count; + ib_uint32_t buf_fix_count = hash_page->buf_fix_count; ut_a(buf_fix_count > 0); - block->page.buf_fix_count += buf_fix_count; + +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_increment_uint32( + &block->page.buf_fix_count, buf_fix_count); +#else + block->page.buf_fix_count += ulint(buf_fix_count); +#endif /* PAGE_ATOMIC_REF_COUNT */ + buf_pool_watch_remove(buf_pool, fold, hash_page); mutex_exit(&buf_pool->zip_mutex); @@ -3459,8 +3577,9 @@ buf_page_init( ut_ad(!block->page.in_zip_hash); ut_ad(!block->page.in_page_hash); ut_d(block->page.in_page_hash = TRUE); - HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, - fold, &block->page); + + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, &block->page); + if (zip_size) { page_zip_set_size(&block->page.zip, zip_size); } @@ -3497,7 +3616,7 @@ buf_page_init_for_read( prio_rw_lock_t* hash_lock; mtr_t mtr; ulint fold; - ibool lru = FALSE; + ibool lru; void* data; buf_pool_t* buf_pool = buf_pool_get(space, offset); @@ -3572,12 +3691,18 @@ err_exit: ut_ad(buf_pool_from_bpage(bpage) == buf_pool); buf_page_init(buf_pool, space, offset, fold, zip_size, block); - rw_lock_x_unlock(hash_lock); + +#ifdef PAGE_ATOMIC_REF_COUNT + /* Note: We set the io state without the protection of + the block->lock. This is because other threads cannot + access this block unless it is in the hash table. */ + + buf_page_set_io_fix(bpage, BUF_IO_READ); +#endif /* PAGE_ATOMIC_REF_COUNT */ /* The block must be put to the LRU list, to the old blocks */ buf_LRU_add_block(bpage, TRUE/* to old blocks */); mutex_exit(&buf_pool->LRU_list_mutex); - lru = TRUE; /* We set a pass-type x-lock on the frame because then the same thread which called for the read operation @@ -3589,7 +3714,12 @@ err_exit: io-handler thread. */ rw_lock_x_lock_gen(&block->lock, BUF_IO_READ); + +#ifndef PAGE_ATOMIC_REF_COUNT buf_page_set_io_fix(bpage, BUF_IO_READ); +#endif /* !PAGE_ATOMIC_REF_COUNT */ + + rw_lock_x_unlock(hash_lock); if (zip_size) { /* buf_pool->LRU_list_mutex may be released and @@ -3628,28 +3758,24 @@ err_exit: rw_lock_x_lock(hash_lock); - /* If buf_buddy_alloc() allocated storage from the LRU list, - it released and reacquired buf_pool->LRU_list_mutex. Thus, we - must check the page_hash again, as it may have been + /* We must check the page_hash again, as it may have been modified. */ - if (UNIV_UNLIKELY(lru)) { - watch_page = buf_page_hash_get_low( + watch_page = buf_page_hash_get_low( buf_pool, space, offset, fold); - if (UNIV_UNLIKELY(watch_page + if (UNIV_UNLIKELY(watch_page && !buf_pool_watch_is_sentinel(buf_pool, watch_page))) { - /* The block was added by some other thread. */ - mutex_exit(&buf_pool->LRU_list_mutex); - rw_lock_x_unlock(hash_lock); - watch_page = NULL; - buf_buddy_free(buf_pool, data, zip_size); + /* The block was added by some other thread. */ + mutex_exit(&buf_pool->LRU_list_mutex); + rw_lock_x_unlock(hash_lock); + watch_page = NULL; + buf_buddy_free(buf_pool, data, zip_size); - bpage = NULL; - goto func_exit; - } + bpage = NULL; + goto func_exit; } bpage = buf_page_alloc_descriptor(); @@ -3681,13 +3807,24 @@ err_exit: ut_d(bpage->in_page_hash = TRUE); - if (UNIV_LIKELY_NULL(watch_page)) { + if (watch_page != NULL) { /* Preserve the reference count. */ - ulint buf_fix_count = watch_page->buf_fix_count; + ib_uint32_t buf_fix_count; + + buf_fix_count = watch_page->buf_fix_count; + ut_a(buf_fix_count > 0); + ut_ad(buf_own_zip_mutex_for_page(bpage)); + +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_increment_uint32( + &bpage->buf_fix_count, buf_fix_count); +#else bpage->buf_fix_count += buf_fix_count; +#endif /* PAGE_ATOMIC_REF_COUNT */ + ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page)); buf_pool_watch_remove(buf_pool, fold, watch_page); } @@ -3783,8 +3920,7 @@ buf_page_create( buf_block_free(free_block); - return(buf_page_get_with_no_latch(space, zip_size, - offset, mtr)); + return(buf_page_get_with_no_latch(space, zip_size, offset, mtr)); } /* If we get here, the page was not in buf_pool: init it there */ diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc index 506a5b177ba..c1bc0ee4c6e 100644 --- a/storage/xtradb/buf/buf0dblwr.cc +++ b/storage/xtradb/buf/buf0dblwr.cc @@ -351,13 +351,12 @@ At a database startup initializes the doublewrite buffer memory structure if we already have a doublewrite buffer created in the data files. If we are upgrading to an InnoDB version which supports multiple tablespaces, then this function performs the necessary update operations. If we are in a crash -recovery, this function uses a possible doublewrite buffer to restore -half-written pages in the data files. */ +recovery, this function loads the pages from double write buffer into memory. */ UNIV_INTERN void -buf_dblwr_init_or_restore_pages( -/*============================*/ - ibool restore_corrupt_pages) /*!< in: TRUE=restore pages */ +buf_dblwr_init_or_load_pages( +/*==========================*/ + bool load_corrupt_pages) { byte* buf; byte* read_buf; @@ -368,8 +367,8 @@ buf_dblwr_init_or_restore_pages( ibool reset_space_ids = FALSE; byte* doublewrite; ulint space_id; - ulint page_no; ulint i; + recv_dblwr_t& recv_dblwr = recv_sys->dblwr; /* We do the file i/o past the buffer pool */ @@ -431,13 +430,12 @@ buf_dblwr_init_or_restore_pages( for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) { ulint source_page_no; - page_no = mach_read_from_4(page + FIL_PAGE_OFFSET); if (reset_space_ids) { space_id = 0; mach_write_to_4(page - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0); + + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id); /* We do not need to calculate new checksums for the pages because the field .._SPACE_ID does not affect them. Write the page back to where we read it from. */ @@ -449,19 +447,50 @@ buf_dblwr_init_or_restore_pages( + i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; } - fil_io(OS_FILE_WRITE, true, 0, 0, source_page_no, 0, + fil_io(OS_FILE_WRITE, true, space_id, 0, source_page_no, 0, UNIV_PAGE_SIZE, page, NULL); - } else { - space_id = mach_read_from_4( - page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + } else if (load_corrupt_pages) { + + recv_dblwr.add(page); } - if (!restore_corrupt_pages) { - /* The database was shut down gracefully: no need to - restore pages */ + page += UNIV_PAGE_SIZE; + } + + fil_flush_file_spaces(FIL_TABLESPACE); + +leave_func: + ut_free(unaligned_read_buf); +} + +/****************************************************************//** +Process the double write buffer pages. */ +void +buf_dblwr_process() +/*===============*/ +{ + ulint space_id; + ulint page_no; + ulint page_no_dblwr = 0; + byte* page; + byte* read_buf; + byte* unaligned_read_buf; + recv_dblwr_t& recv_dblwr = recv_sys->dblwr; + + unaligned_read_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); + + read_buf = static_cast<byte*>( + ut_align(unaligned_read_buf, UNIV_PAGE_SIZE)); + + for (std::list<byte*>::iterator i = recv_dblwr.pages.begin(); + i != recv_dblwr.pages.end(); ++i, ++page_no_dblwr ) { - } else if (!fil_tablespace_exists_in_mem(space_id)) { + page = *i; + page_no = mach_read_from_4(page + FIL_PAGE_OFFSET); + space_id = mach_read_from_4(page + FIL_PAGE_SPACE_ID); + + if (!fil_tablespace_exists_in_mem(space_id)) { /* Maybe we have dropped the single-table tablespace and this page once belonged to it: do nothing */ @@ -472,19 +501,8 @@ buf_dblwr_init_or_restore_pages( "within space bounds; space id %lu " "page number %lu, page %lu in " "doublewrite buf.", - (ulong) space_id, (ulong) page_no, (ulong) i); - - } else if (space_id == TRX_SYS_SPACE - && ((page_no >= block1 - && page_no - < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) - || (page_no >= block2 - && page_no - < (block2 - + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)))) { - - /* It is an unwritten doublewrite buffer page: - do nothing */ + (ulong) space_id, (ulong) page_no, + page_no_dblwr); } else { ulint zip_size = fil_space_get_zip_size(space_id); @@ -551,14 +569,11 @@ buf_dblwr_init_or_restore_pages( " the doublewrite buffer."); } } - - page += UNIV_PAGE_SIZE; } fil_flush_file_spaces(FIL_TABLESPACE); - -leave_func: ut_free(unaligned_read_buf); + recv_dblwr.pages.clear(); } /****************************************************************//** @@ -776,6 +791,7 @@ buf_dblwr_write_block_to_datafile( fil_io(flags, sync, buf_block_get_space(block), 0, buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE, (void*) block->frame, (void*) block); + } /********************************************************************//** diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc index a7f55eb9c79..9e92cf321a7 100644 --- a/storage/xtradb/buf/buf0flu.cc +++ b/storage/xtradb/buf/buf0flu.cc @@ -59,8 +59,12 @@ need to protect it by a mutex. It is only ever read by the thread doing the shutdown */ UNIV_INTERN ibool buf_page_cleaner_is_active = FALSE; +/** Flag indicating if the lru_manager is in active state. */ +UNIV_INTERN bool buf_lru_manager_is_active = false; + #ifdef UNIV_PFS_THREAD UNIV_INTERN mysql_pfs_key_t buf_page_cleaner_thread_key; +UNIV_INTERN mysql_pfs_key_t buf_lru_manager_thread_key; #endif /* UNIV_PFS_THREAD */ /** If LRU list of a buf_pool is less than this size then LRU eviction @@ -503,15 +507,15 @@ buf_flush_ready_for_replace( #ifdef UNIV_DEBUG buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); -#endif +#endif /* UNIV_DEBUG */ ut_ad(mutex_own(buf_page_get_mutex(bpage))); ut_ad(bpage->in_LRU_list); - if (UNIV_LIKELY(buf_page_in_file(bpage))) { + if (buf_page_in_file(bpage)) { return(bpage->oldest_modification == 0 - && buf_page_get_io_fix(bpage) == BUF_IO_NONE - && bpage->buf_fix_count == 0); + && bpage->buf_fix_count == 0 + && buf_page_get_io_fix(bpage) == BUF_IO_NONE); } ut_print_timestamp(stderr); @@ -552,13 +556,8 @@ buf_flush_ready_for_flush( case BUF_FLUSH_LIST: case BUF_FLUSH_LRU: case BUF_FLUSH_SINGLE_PAGE: - /* Because any thread may call single page flush, even - when owning locks on pages, to avoid deadlocks, we must - make sure that the that it is not buffer fixed. - The same holds true for LRU flush because a user thread - may end up waiting for an LRU flush to end while - holding locks on other pages. */ - return(bpage->buf_fix_count == 0); + return(true); + case BUF_FLUSH_N_TYPES: break; } @@ -982,9 +981,12 @@ Writes a flushable page asynchronously from the buffer pool to a file. NOTE: in simulated aio we must call os_aio_simulated_wake_handler_threads after we have posted a batch of writes! NOTE: buf_page_get_mutex(bpage) must be held upon entering this -function, and it will be released by this function. */ +function, and it will be released by this function if it returns true. +LRU_list_mutex must be held iff performing a single page flush and will be +released by the function if it returns true. +@return TRUE if the page was flushed */ UNIV_INTERN -void +bool buf_flush_page( /*===========*/ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ @@ -992,111 +994,98 @@ buf_flush_page( buf_flush_t flush_type, /*!< in: type of flush */ bool sync) /*!< in: true if sync IO request */ { - ib_mutex_t* block_mutex; - ibool is_uncompressed; - ut_ad(flush_type < BUF_FLUSH_N_TYPES); - ut_ad(!mutex_own(&buf_pool->LRU_list_mutex)); + /* Hold the LRU list mutex iff called for a single page LRU + flush. A single page LRU flush is already non-performant, and holding + the LRU list mutex allows us to avoid having to store the previous LRU + list page or to restart the LRU scan in + buf_flush_single_page_from_LRU(). */ + ut_ad(flush_type == BUF_FLUSH_SINGLE_PAGE || + !mutex_own(&buf_pool->LRU_list_mutex)); + ut_ad(flush_type != BUF_FLUSH_SINGLE_PAGE || + mutex_own(&buf_pool->LRU_list_mutex)); ut_ad(buf_page_in_file(bpage)); ut_ad(!sync || flush_type == BUF_FLUSH_SINGLE_PAGE); - block_mutex = buf_page_get_mutex(bpage); + ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); + ut_ad(mutex_own(block_mutex)); ut_ad(buf_flush_ready_for_flush(bpage, flush_type)); - mutex_enter(&buf_pool->flush_state_mutex); - - buf_page_set_io_fix(bpage, BUF_IO_WRITE); + bool is_uncompressed; - buf_page_set_flush_type(bpage, flush_type); - - if (buf_pool->n_flush[flush_type] == 0) { + is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); + ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex)); - os_event_reset(buf_pool->no_flush[flush_type]); - } + ibool flush; + rw_lock_t* rw_lock; + bool no_fix_count = bpage->buf_fix_count == 0; - buf_pool->n_flush[flush_type]++; + if (!is_uncompressed) { + flush = TRUE; + rw_lock = NULL; - mutex_exit(&buf_pool->flush_state_mutex); + } else if (!(no_fix_count || flush_type == BUF_FLUSH_LIST)) { + /* This is a heuristic, to avoid expensive S attempts. */ + flush = FALSE; + } else { - is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); - ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex)); + rw_lock = &reinterpret_cast<buf_block_t*>(bpage)->lock; - switch (flush_type) { - ibool is_s_latched; - case BUF_FLUSH_LIST: - /* If the simulated aio thread is not running, we must - not wait for any latch, as we may end up in a deadlock: - if buf_fix_count == 0, then we know we need not wait */ - - is_s_latched = (bpage->buf_fix_count == 0); - if (is_s_latched && is_uncompressed) { - rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock, - BUF_IO_WRITE); + if (flush_type != BUF_FLUSH_LIST) { + flush = rw_lock_s_lock_gen_nowait( + rw_lock, BUF_IO_WRITE); + } else { + /* Will S lock later */ + flush = TRUE; } + } - mutex_exit(block_mutex); + if (flush) { - /* Even though bpage is not protected by any mutex at - this point, it is safe to access bpage, because it is - io_fixed and oldest_modification != 0. Thus, it - cannot be relocated in the buffer pool or removed from - flush_list or LRU_list. */ + /* We are committed to flushing by the time we get here */ - if (!is_s_latched) { - buf_dblwr_flush_buffered_writes(); + mutex_enter(&buf_pool->flush_state_mutex); - if (is_uncompressed) { - rw_lock_s_lock_gen(&((buf_block_t*) bpage) - ->lock, BUF_IO_WRITE); - } - } + buf_page_set_io_fix(bpage, BUF_IO_WRITE); - break; + buf_page_set_flush_type(bpage, flush_type); - case BUF_FLUSH_LRU: - case BUF_FLUSH_SINGLE_PAGE: - /* VERY IMPORTANT: - Because any thread may call single page flush, even when - owning locks on pages, to avoid deadlocks, we must make - sure that the s-lock is acquired on the page without - waiting: this is accomplished because - buf_flush_ready_for_flush() must hold, and that requires - the page not to be bufferfixed. - The same holds true for LRU flush because a user thread - may end up waiting for an LRU flush to end while - holding locks on other pages. */ - - if (is_uncompressed) { - rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock, - BUF_IO_WRITE); + if (buf_pool->n_flush[flush_type] == 0) { + + os_event_reset(buf_pool->no_flush[flush_type]); } - /* Note that the s-latch is acquired before releasing the - buf_page_get_mutex() mutex: this ensures that the latch is - acquired immediately. */ + ++buf_pool->n_flush[flush_type]; + + mutex_exit(&buf_pool->flush_state_mutex); mutex_exit(block_mutex); - break; - default: - ut_error; - } + if (flush_type == BUF_FLUSH_SINGLE_PAGE) + mutex_exit(&buf_pool->LRU_list_mutex); + + if (flush_type == BUF_FLUSH_LIST + && is_uncompressed + && !rw_lock_s_lock_gen_nowait(rw_lock, BUF_IO_WRITE)) { + /* avoiding deadlock possibility involves doublewrite + buffer, should flush it, because it might hold the + another block->lock. */ + buf_dblwr_flush_buffered_writes(); - /* Even though bpage is not protected by any mutex at this - point, it is safe to access bpage, because it is io_fixed and - oldest_modification != 0. Thus, it cannot be relocated in the - buffer pool or removed from flush_list or LRU_list. */ + rw_lock_s_lock_gen(rw_lock, BUF_IO_WRITE); + } -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, - "Flushing %u space %u page %u\n", - flush_type, bpage->space, bpage->offset); - } -#endif /* UNIV_DEBUG */ - buf_flush_write_block_low(bpage, flush_type, sync); + /* Even though bpage is not protected by any mutex at this + point, it is safe to access bpage, because it is io_fixed and + oldest_modification != 0. Thus, it cannot be relocated in the + buffer pool or removed from flush_list or LRU_list. */ + + buf_flush_write_block_low(bpage, flush_type, sync); + } + + return(flush); } # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG @@ -1115,15 +1104,16 @@ buf_flush_page_try( { ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); ut_ad(mutex_own(&block->mutex)); + ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); if (!buf_flush_ready_for_flush(&block->page, BUF_FLUSH_SINGLE_PAGE)) { return(FALSE); } - /* The following call will release the buffer pool and - block mutex. */ - buf_flush_page(buf_pool, &block->page, BUF_FLUSH_SINGLE_PAGE, true); - return(TRUE); + /* The following call will release the LRU list and + block mutex if successful. */ + return(buf_flush_page( + buf_pool, &block->page, BUF_FLUSH_SINGLE_PAGE, true)); } # endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ /***********************************************************//** @@ -1199,7 +1189,6 @@ buf_flush_try_neighbors( ulint i; ulint low; ulint high; - ulint count = 0; buf_pool_t* buf_pool = buf_pool_get(space, offset); ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST); @@ -1257,9 +1246,10 @@ buf_flush_try_neighbors( high = fil_space_get_size(space); } + ulint count = 0; + for (i = low; i < high; i++) { - buf_page_t* bpage; prio_rw_lock_t* hash_lock; ib_mutex_t* block_mutex; @@ -1281,10 +1271,10 @@ buf_flush_try_neighbors( buf_pool = buf_pool_get(space, i); /* We only want to flush pages from this buffer pool. */ - bpage = buf_page_hash_get_s_locked(buf_pool, space, i, - &hash_lock); + buf_page_t* bpage = buf_page_hash_get_s_locked(buf_pool, + space, i, &hash_lock); - if (!bpage) { + if (bpage == NULL) { continue; } @@ -1305,19 +1295,12 @@ buf_flush_try_neighbors( || buf_page_is_old(bpage)) { if (buf_flush_ready_for_flush(bpage, flush_type) - && (i == offset || !bpage->buf_fix_count)) { - /* We only try to flush those - neighbors != offset where the buf fix - count is zero, as we then know that we - probably can latch the page without a - semaphore wait. Semaphore waits are - expensive because we must flush the - doublewrite buffer before we start - waiting. */ - - buf_flush_page(buf_pool, bpage, flush_type, false); - ut_ad(!mutex_own(block_mutex)); - count++; + && (i == offset || bpage->buf_fix_count == 0) + && buf_flush_page( + buf_pool, bpage, flush_type, false)) { + + ++count; + continue; } } @@ -1358,8 +1341,8 @@ buf_flush_page_and_try_neighbors( ulint* count) /*!< in/out: number of pages flushed */ { + ibool flushed; ib_mutex_t* block_mutex = NULL; - ibool flushed = FALSE; #ifdef UNIV_DEBUG buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); #endif /* UNIV_DEBUG */ @@ -1374,21 +1357,10 @@ buf_flush_page_and_try_neighbors( mutex_enter(block_mutex); } - if (UNIV_UNLIKELY(buf_page_get_state(bpage) - == BUF_BLOCK_REMOVE_HASH)) { - - /* In case we don't hold the LRU list mutex, we may see a page - that is about to be relocated on the flush list. Do not - attempt to flush it. */ - ut_ad(flush_type == BUF_FLUSH_LIST); - return (flushed); - } - - ut_a(buf_page_in_file(bpage)); + ut_a(buf_page_in_file(bpage) + || buf_page_get_state(bpage) == BUF_BLOCK_REMOVE_HASH); if (buf_flush_ready_for_flush(bpage, flush_type)) { - ulint space; - ulint offset; buf_pool_t* buf_pool; buf_pool = buf_pool_from_bpage(bpage); @@ -1399,8 +1371,10 @@ buf_flush_page_and_try_neighbors( /* These fields are protected by the buf_page_get_mutex() mutex. */ - space = buf_page_get_space(bpage); - offset = buf_page_get_page_no(bpage); + /* Read the fields directly in order to avoid asserting on + BUF_BLOCK_REMOVE_HASH pages. */ + ulint space = bpage->space; + ulint offset = bpage->offset; if (flush_type == BUF_FLUSH_LRU) { mutex_exit(block_mutex); @@ -1409,11 +1383,8 @@ buf_flush_page_and_try_neighbors( } /* Try to flush also all the neighbors */ - *count += buf_flush_try_neighbors(space, - offset, - flush_type, - *count, - n_to_flush); + *count += buf_flush_try_neighbors( + space, offset, flush_type, *count, n_to_flush); if (flush_type == BUF_FLUSH_LRU) { mutex_enter(&buf_pool->LRU_list_mutex); @@ -1421,8 +1392,12 @@ buf_flush_page_and_try_neighbors( buf_flush_list_mutex_enter(buf_pool); } flushed = TRUE; + } else if (flush_type == BUF_FLUSH_LRU) { mutex_exit(block_mutex); + flushed = FALSE; + } else { + flushed = FALSE; } ut_ad((flush_type == BUF_FLUSH_LRU @@ -1574,6 +1549,7 @@ buf_flush_LRU_list_batch( of the flushed pages then the scan becomes O(n*n). */ if (evict) { + if (buf_LRU_free_page(bpage, true)) { mutex_exit(block_mutex); @@ -1588,19 +1564,42 @@ buf_flush_LRU_list_batch( } } else if (UNIV_LIKELY(!failed_acquire)) { + ulint space; + ulint offset; + buf_page_t* prev_bpage; + + prev_bpage = UT_LIST_GET_PREV(LRU, bpage); + + /* Save the previous bpage */ + + if (prev_bpage != NULL) { + space = prev_bpage->space; + offset = prev_bpage->offset; + } else { + space = ULINT_UNDEFINED; + offset = ULINT_UNDEFINED; + } + if (buf_flush_page_and_try_neighbors( bpage, BUF_FLUSH_LRU, max, &n->flushed)) { - lru_position = 0; - /* LRU list mutex was released. - Restart the scan. */ - bpage = UT_LIST_GET_LAST(buf_pool->LRU); - } else { + reposition the iterator. Note: the + prev block could have been repositioned + too but that should be rare. */ - bpage = UT_LIST_GET_PREV(LRU, bpage); + if (prev_bpage != NULL) { + + ut_ad(space != ULINT_UNDEFINED); + ut_ad(offset != ULINT_UNDEFINED); + + prev_bpage = buf_page_hash_get( + buf_pool, space, offset); + } } + + bpage = prev_bpage; } free_len = UT_LIST_GET_LEN(buf_pool->free); @@ -1912,7 +1911,7 @@ buf_flush_wait_batch_end( } } else { thd_wait_begin(NULL, THD_WAIT_DISKIO); - os_event_wait(buf_pool->no_flush[type]); + os_event_wait(buf_pool->no_flush[type]); thd_wait_end(NULL); } } @@ -2101,9 +2100,7 @@ buf_flush_single_page_from_LRU( { ulint scanned; buf_page_t* bpage; - ib_mutex_t* block_mutex; - ibool freed; - bool evict_zip; + ibool flushed = FALSE; mutex_enter(&buf_pool->LRU_list_mutex); @@ -2111,18 +2108,30 @@ buf_flush_single_page_from_LRU( bpage != NULL; bpage = UT_LIST_GET_PREV(LRU, bpage), ++scanned) { - block_mutex = buf_page_get_mutex(bpage); + ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); + mutex_enter(block_mutex); - if (buf_flush_ready_for_flush(bpage, - BUF_FLUSH_SINGLE_PAGE)) { - /* buf_flush_page() will release the block - mutex */ - break; + + if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)) { + + /* The following call will release the LRU list + and block mutex. */ + + flushed = buf_flush_page(buf_pool, bpage, + BUF_FLUSH_SINGLE_PAGE, true); + + if (flushed) { + /* buf_flush_page() will release the + block mutex */ + break; + } } + mutex_exit(block_mutex); } - mutex_exit(&buf_pool->LRU_list_mutex); + if (!flushed) + mutex_exit(&buf_pool->LRU_list_mutex); MONITOR_INC_VALUE_CUMULATIVE( MONITOR_LRU_SINGLE_FLUSH_SCANNED, @@ -2130,13 +2139,13 @@ buf_flush_single_page_from_LRU( MONITOR_LRU_SINGLE_FLUSH_SCANNED_PER_CALL, scanned); - if (!bpage) { + if (bpage == NULL) { /* Can't find a single flushable page. */ return(FALSE); } - /* The following call will release the buf_page_get_mutex() mutex. */ - buf_flush_page(buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, true); + + ibool freed = FALSE; /* At this point the page has been written to the disk. As we are not holding LRU list or buf_page_get_mutex() mutex therefore @@ -2151,30 +2160,30 @@ buf_flush_single_page_from_LRU( bpage != NULL; bpage = UT_LIST_GET_PREV(LRU, bpage)) { - ibool ready; + ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - block_mutex = buf_page_get_mutex(bpage); mutex_enter(block_mutex); - ready = buf_flush_ready_for_replace(bpage); + + ibool ready = buf_flush_ready_for_replace(bpage); + if (ready) { + bool evict_zip; + + evict_zip = !buf_LRU_evict_from_unzip_LRU(buf_pool);; + + freed = buf_LRU_free_page(bpage, evict_zip); + + mutex_exit(block_mutex); + break; } - mutex_exit(block_mutex); - } + mutex_exit(block_mutex); - if (!bpage) { - /* Can't find a single replaceable page. */ - mutex_exit(&buf_pool->LRU_list_mutex); - return(FALSE); } - evict_zip = !buf_LRU_evict_from_unzip_LRU(buf_pool);; - - freed = buf_LRU_free_page(bpage, evict_zip); if (!freed) mutex_exit(&buf_pool->LRU_list_mutex); - mutex_exit(block_mutex); return(freed); } @@ -2626,7 +2635,7 @@ page_cleaner_adapt_flush_sleep_time(void) /******************************************************************//** page_cleaner thread tasked with flushing dirty pages from the buffer -pools. As of now we'll have only one instance of this thread. +pool flush lists. As of now we'll have only one instance of this thread. @return a dummy parameter */ extern "C" UNIV_INTERN os_thread_ret_t @@ -2639,7 +2648,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( ulint next_loop_time = ut_time_ms() + 1000; ulint n_flushed = 0; ulint last_activity = srv_get_activity_count(); - ulint lru_sleep_time = srv_cleaner_max_lru_time; + ulint last_activity_time = ut_time_ms(); ut_ad(!srv_read_only_mode); @@ -2660,8 +2669,8 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( while (srv_shutdown_state == SRV_SHUTDOWN_NONE) { - ulint flush_sleep_time; ulint page_cleaner_sleep_time; + ibool server_active; srv_current_thread_priority = srv_cleaner_thread_priority; @@ -2674,20 +2683,20 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( page_cleaner_sleep_if_needed(next_loop_time); } - page_cleaner_adapt_lru_sleep_time(&lru_sleep_time); - - flush_sleep_time = page_cleaner_adapt_flush_sleep_time(); - - page_cleaner_sleep_time = ut_min(lru_sleep_time, - flush_sleep_time); + page_cleaner_sleep_time + = page_cleaner_adapt_flush_sleep_time(); next_loop_time = ut_time_ms() + page_cleaner_sleep_time; - /* Flush pages from end of LRU if required */ - n_flushed = buf_flush_LRU_tail(); + server_active = srv_check_activity(last_activity); + if (server_active + || ut_time_ms() - last_activity_time < 1000) { - if (srv_check_activity(last_activity)) { - last_activity = srv_get_activity_count(); + if (server_active) { + + last_activity = srv_get_activity_count(); + last_activity_time = ut_time_ms(); + } /* Flush pages from flush_list if required */ n_flushed += page_cleaner_flush_pages_if_needed(); @@ -2778,6 +2787,74 @@ thread_exit: OS_THREAD_DUMMY_RETURN; } +/******************************************************************//** +lru_manager thread tasked with performing LRU flushes and evictions to refill +the buffer pool free lists. As of now we'll have only one instance of this +thread. +@return a dummy parameter */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(buf_flush_lru_manager_thread)( +/*==========================================*/ + void* arg __attribute__((unused))) + /*!< in: a dummy parameter required by + os_thread_create */ +{ + ulint next_loop_time = ut_time_ms() + 1000; + ulint lru_sleep_time = srv_cleaner_max_lru_time; + +#ifdef UNIV_PFS_THREAD + pfs_register_thread(buf_lru_manager_thread_key); +#endif /* UNIV_PFS_THREAD */ + + srv_lru_manager_tid = os_thread_get_tid(); + + os_thread_set_priority(srv_lru_manager_tid, + srv_sched_priority_cleaner); + +#ifdef UNIV_DEBUG_THREAD_CREATION + fprintf(stderr, "InnoDB: lru_manager thread running, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +#endif /* UNIV_DEBUG_THREAD_CREATION */ + + buf_lru_manager_is_active = true; + + /* On server shutdown, the LRU manager thread runs through cleanup + phase to provide free pages for the master and purge threads. */ + while (srv_shutdown_state == SRV_SHUTDOWN_NONE + || srv_shutdown_state == SRV_SHUTDOWN_CLEANUP) { + + ulint n_flushed_lru; + + srv_current_thread_priority = srv_cleaner_thread_priority; + + page_cleaner_sleep_if_needed(next_loop_time); + + page_cleaner_adapt_lru_sleep_time(&lru_sleep_time); + + next_loop_time = ut_time_ms() + lru_sleep_time; + + n_flushed_lru = buf_flush_LRU_tail(); + + if (n_flushed_lru) { + + MONITOR_INC_VALUE_CUMULATIVE( + MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE, + MONITOR_FLUSH_BACKGROUND_COUNT, + MONITOR_FLUSH_BACKGROUND_PAGES, + n_flushed_lru); + } + } + + buf_lru_manager_is_active = false; + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; +} + #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG /** Functor to validate the flush list. */ diff --git a/storage/xtradb/buf/buf0lru.cc b/storage/xtradb/buf/buf0lru.cc index 8a6d042f4c7..d3e0eda0257 100644 --- a/storage/xtradb/buf/buf0lru.cc +++ b/storage/xtradb/buf/buf0lru.cc @@ -503,17 +503,15 @@ buf_flush_or_remove_page( yet; maybe the system is currently reading it in, or flushing the modifications to the file */ return(false); - } - bool processed = false; - buf_flush_list_mutex_exit(buf_pool); /* We don't have to worry about bpage becoming a dangling pointer by a compressed page flush list relocation because buf_page_get_gen() won't be called for pages from this tablespace. */ + bool processed; mutex_enter(block_mutex); @@ -529,6 +527,7 @@ buf_flush_or_remove_page( mutex_exit(block_mutex); *must_restart = TRUE; + processed = false; } else if (!flush) { @@ -538,29 +537,29 @@ buf_flush_or_remove_page( processed = true; - } else if (buf_flush_ready_for_flush(bpage, - BUF_FLUSH_SINGLE_PAGE)) { + } else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)) { - mutex_exit(&buf_pool->LRU_list_mutex); + if (buf_flush_page( + buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, false)) { - /* The following call will release the buf_page_get_mutex() - mutex. */ - buf_flush_page(buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, false); - ut_ad(!mutex_own(block_mutex)); + /* Wake possible simulated aio thread to actually + post the writes to the operating system */ + os_aio_simulated_wake_handler_threads(); - /* Wake possible simulated aio thread to actually - post the writes to the operating system */ - os_aio_simulated_wake_handler_threads(); + mutex_enter(&buf_pool->LRU_list_mutex); - mutex_enter(&buf_pool->LRU_list_mutex); + processed = true; + + } else { + mutex_exit(block_mutex); + + processed = false; + } - processed = true; } else { - /* Not ready for flush. It can't be IO fixed because we - checked for that at the start of the function. It must - be buffer fixed. */ - ut_ad(bpage->buf_fix_count > 0); mutex_exit(block_mutex); + + processed = false; } buf_flush_list_mutex_enter(buf_pool); @@ -1365,8 +1364,9 @@ loop: } if (srv_empty_free_list_algorithm == SRV_EMPTY_FREE_LIST_BACKOFF - && buf_page_cleaner_is_active - && srv_shutdown_state == SRV_SHUTDOWN_NONE) { + && buf_lru_manager_is_active + && (srv_shutdown_state == SRV_SHUTDOWN_NONE + || srv_shutdown_state == SRV_SHUTDOWN_CLEANUP)) { /* Backoff to minimize the free list mutex contention while the free list is empty */ @@ -1408,12 +1408,13 @@ loop: goto loop; } else { - /* The cleaner is not running or Oracle MySQL 5.6 algorithm was - requested, will perform a single page flush */ + /* The LRU manager is not running or Oracle MySQL 5.6 algorithm + was requested, will perform a single page flush */ ut_ad((srv_empty_free_list_algorithm == SRV_EMPTY_FREE_LIST_LEGACY) - || !buf_page_cleaner_is_active - || (srv_shutdown_state != SRV_SHUTDOWN_NONE)); + || !buf_lru_manager_is_active + || (srv_shutdown_state != SRV_SHUTDOWN_NONE + && srv_shutdown_state != SRV_SHUTDOWN_CLEANUP)); } mutex_enter(&buf_pool->flush_state_mutex); @@ -1829,8 +1830,6 @@ buf_LRU_add_block_low( { buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(buf_pool); - ut_ad(bpage); ut_ad(mutex_own(&buf_pool->LRU_list_mutex)); ut_a(buf_page_in_file(bpage)); @@ -1980,7 +1979,7 @@ buf_LRU_free_page( if (!buf_page_can_relocate(bpage)) { - /* Do not free buffer-fixed or I/O-fixed blocks. */ + /* Do not free buffer fixed or I/O-fixed blocks. */ return(false); } @@ -1995,12 +1994,10 @@ buf_LRU_free_page( if (bpage->oldest_modification) { return(false); } - } else if ((bpage->oldest_modification) - && (buf_page_get_state(bpage) - != BUF_BLOCK_FILE_PAGE)) { + } else if (bpage->oldest_modification > 0 + && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) { - ut_ad(buf_page_get_state(bpage) - == BUF_BLOCK_ZIP_DIRTY); + ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY); return(false); @@ -2088,10 +2085,8 @@ not_freed: rw_lock_x_lock(hash_lock); mutex_enter(block_mutex); - ut_a(!buf_page_hash_get_low(buf_pool, - bpage->space, - bpage->offset, - fold)); + ut_a(!buf_page_hash_get_low( + buf_pool, b->space, b->offset, fold)); b->state = b->oldest_modification ? BUF_BLOCK_ZIP_DIRTY @@ -2489,6 +2484,11 @@ buf_LRU_block_remove_hashed( UNIV_PAGE_SIZE); buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH); + if (buf_pool->flush_rbt == NULL) { + bpage->space = ULINT32_UNDEFINED; + bpage->offset = ULINT32_UNDEFINED; + } + /* Question: If we release bpage and hash mutex here then what protects us against: 1) Some other thread buffer fixing this page diff --git a/storage/xtradb/dict/dict0boot.cc b/storage/xtradb/dict/dict0boot.cc index 94a3af2852b..b57a8873bd5 100644 --- a/storage/xtradb/dict/dict0boot.cc +++ b/storage/xtradb/dict/dict0boot.cc @@ -302,7 +302,8 @@ dict_boot(void) /* Insert into the dictionary cache the descriptions of the basic system tables */ /*-------------------------*/ - table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0, 0); + table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0, 0, + false); dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0); dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0); @@ -356,7 +357,8 @@ dict_boot(void) ut_a(error == DB_SUCCESS); /*-------------------------*/ - table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0, 0); + table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0, 0, + false); dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0); dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4); @@ -389,7 +391,8 @@ dict_boot(void) ut_a(error == DB_SUCCESS); /*-------------------------*/ - table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0, 0); + table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0, 0, + false); dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0); dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0); @@ -422,7 +425,8 @@ dict_boot(void) ut_a(error == DB_SUCCESS); /*-------------------------*/ - table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0, 0); + table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0, 0, + false); dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0); dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4); diff --git a/storage/xtradb/dict/dict0crea.cc b/storage/xtradb/dict/dict0crea.cc index eba5417dc76..ff892749d4f 100644 --- a/storage/xtradb/dict/dict0crea.cc +++ b/storage/xtradb/dict/dict0crea.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -270,6 +270,12 @@ dict_build_table_def_step( thr_get_trx(thr)->table_id = table->id; + /* Always set this bit for all new created tables */ + DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME); + DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name", + DICT_TF2_FLAG_UNSET(table, + DICT_TF2_FTS_AUX_HEX_NAME);); + if (use_tablespace) { /* This table will not use the system tablespace. Get a new space id. */ diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc index 0aaec42cd2f..5cc013b7d6b 100644 --- a/storage/xtradb/dict/dict0dict.cc +++ b/storage/xtradb/dict/dict0dict.cc @@ -121,19 +121,6 @@ UNIV_INTERN mysql_pfs_key_t dict_foreign_err_mutex_key; /** Identifies generated InnoDB foreign key names */ static char dict_ibfk[] = "_ibfk_"; -/** array of rw locks protecting -dict_table_t::stat_initialized -dict_table_t::stat_n_rows (*) -dict_table_t::stat_clustered_index_size -dict_table_t::stat_sum_of_other_index_sizes -dict_table_t::stat_modified_counter (*) -dict_table_t::indexes*::stat_n_diff_key_vals[] -dict_table_t::indexes*::stat_index_size -dict_table_t::indexes*::stat_n_leaf_pages -(*) those are not always protected for performance reasons */ -#define DICT_TABLE_STATS_LATCHES_SIZE 64 -static rw_lock_t dict_table_stats_latches[DICT_TABLE_STATS_LATCHES_SIZE]; - /*******************************************************************//** Tries to find column names for the index and sets the col field of the index. @@ -332,32 +319,31 @@ dict_mutex_exit_for_mysql(void) mutex_exit(&(dict_sys->mutex)); } -/** Get the latch that protects the stats of a given table */ -#define GET_TABLE_STATS_LATCH(table) \ - (&dict_table_stats_latches[ut_fold_ull((ib_uint64_t) table) \ - % DICT_TABLE_STATS_LATCHES_SIZE]) - /**********************************************************************//** -Lock the appropriate latch to protect a given table's statistics. -table->id is used to pick the corresponding latch from a global array of -latches. */ +Lock the appropriate latch to protect a given table's statistics. */ UNIV_INTERN void dict_table_stats_lock( /*==================*/ - const dict_table_t* table, /*!< in: table */ - ulint latch_mode) /*!< in: RW_S_LATCH or - RW_X_LATCH */ + dict_table_t* table, /*!< in: table */ + ulint latch_mode) /*!< in: RW_S_LATCH or RW_X_LATCH */ { ut_ad(table != NULL); ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + if (table->stats_latch == NULL) { + /* This is a dummy table object that is private in the current + thread and is not shared between multiple threads, thus we + skip any locking. */ + return; + } + switch (latch_mode) { case RW_S_LATCH: - rw_lock_s_lock(GET_TABLE_STATS_LATCH(table)); + rw_lock_s_lock(table->stats_latch); break; case RW_X_LATCH: - rw_lock_x_lock(GET_TABLE_STATS_LATCH(table)); + rw_lock_x_lock(table->stats_latch); break; case RW_NO_LATCH: /* fall through */ @@ -372,19 +358,26 @@ UNIV_INTERN void dict_table_stats_unlock( /*====================*/ - const dict_table_t* table, /*!< in: table */ - ulint latch_mode) /*!< in: RW_S_LATCH or + dict_table_t* table, /*!< in: table */ + ulint latch_mode) /*!< in: RW_S_LATCH or RW_X_LATCH */ { ut_ad(table != NULL); ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + if (table->stats_latch == NULL) { + /* This is a dummy table object that is private in the current + thread and is not shared between multiple threads, thus we + skip any locking. */ + return; + } + switch (latch_mode) { case RW_S_LATCH: - rw_lock_s_unlock(GET_TABLE_STATS_LATCH(table)); + rw_lock_s_unlock(table->stats_latch); break; case RW_X_LATCH: - rw_lock_x_unlock(GET_TABLE_STATS_LATCH(table)); + rw_lock_x_unlock(table->stats_latch); break; case RW_NO_LATCH: /* fall through */ @@ -880,8 +873,6 @@ void dict_init(void) /*===========*/ { - int i; - dict_sys = static_cast<dict_sys_t*>(mem_zalloc(sizeof(*dict_sys))); mutex_create(dict_sys_mutex_key, &dict_sys->mutex, SYNC_DICT); @@ -902,11 +893,6 @@ dict_init(void) mutex_create(dict_foreign_err_mutex_key, &dict_foreign_err_mutex, SYNC_NO_ORDER_CHECK); } - - for (i = 0; i < DICT_TABLE_STATS_LATCHES_SIZE; i++) { - rw_lock_create(dict_table_stats_latch_key, - &dict_table_stats_latches[i], SYNC_INDEX_TREE); - } } /**********************************************************************//** @@ -5770,7 +5756,8 @@ dict_ind_init(void) dict_table_t* table; /* create dummy table and index for REDUNDANT infimum and supremum */ - table = dict_mem_table_create("SYS_DUMMY1", DICT_HDR_SPACE, 1, 0, 0); + table = dict_mem_table_create("SYS_DUMMY1", DICT_HDR_SPACE, 1, 0, 0, + true); dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR, DATA_ENGLISH | DATA_NOT_NULL, 8); @@ -5783,7 +5770,7 @@ dict_ind_init(void) /* create dummy table and index for COMPACT infimum and supremum */ table = dict_mem_table_create("SYS_DUMMY2", DICT_HDR_SPACE, 1, - DICT_TF_COMPACT, 0); + DICT_TF_COMPACT, 0, true); dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR, DATA_ENGLISH | DATA_NOT_NULL, 8); dict_ind_compact = dict_mem_index_create("SYS_DUMMY2", "SYS_DUMMY2", @@ -6010,6 +5997,17 @@ dict_table_check_for_dup_indexes( } #endif /* UNIV_DEBUG */ +/** Auxiliary macro used inside dict_table_schema_check(). */ +#define CREATE_TYPES_NAMES() \ + dtype_sql_name((unsigned) req_schema->columns[i].mtype, \ + (unsigned) req_schema->columns[i].prtype_mask, \ + (unsigned) req_schema->columns[i].len, \ + req_type, sizeof(req_type)); \ + dtype_sql_name(table->cols[j].mtype, \ + table->cols[j].prtype, \ + table->cols[j].len, \ + actual_type, sizeof(actual_type)) + /*********************************************************************//** Checks whether a table exists and whether it has the given structure. The table must have the same number of columns with the same names and @@ -6029,6 +6027,8 @@ dict_table_schema_check( size_t errstr_sz) /*!< in: errstr size */ { char buf[MAX_FULL_NAME_LEN]; + char req_type[64]; + char actual_type[64]; dict_table_t* table; ulint i; @@ -6080,9 +6080,6 @@ dict_table_schema_check( for (i = 0; i < req_schema->n_cols; i++) { ulint j; - char req_type[64]; - char actual_type[64]; - /* check if i'th column is the same in both arrays */ if (innobase_strcasecmp(req_schema->columns[i].name, dict_table_get_col_name(table, i)) == 0) { @@ -6124,19 +6121,11 @@ dict_table_schema_check( /* we found a column with the same name on j'th position, compare column types and flags */ - dtype_sql_name(req_schema->columns[i].mtype, - req_schema->columns[i].prtype_mask, - req_schema->columns[i].len, - req_type, sizeof(req_type)); - - dtype_sql_name(table->cols[j].mtype, - table->cols[j].prtype, - table->cols[j].len, - actual_type, sizeof(actual_type)); - /* check length for exact match */ if (req_schema->columns[i].len != table->cols[j].len) { + CREATE_TYPES_NAMES(); + ut_snprintf(errstr, errstr_sz, "Column %s in table %s is %s " "but should be %s (length mismatch).", @@ -6151,6 +6140,8 @@ dict_table_schema_check( /* check mtype for exact match */ if (req_schema->columns[i].mtype != table->cols[j].mtype) { + CREATE_TYPES_NAMES(); + ut_snprintf(errstr, errstr_sz, "Column %s in table %s is %s " "but should be %s (type mismatch).", @@ -6168,6 +6159,8 @@ dict_table_schema_check( & req_schema->columns[i].prtype_mask) != req_schema->columns[i].prtype_mask) { + CREATE_TYPES_NAMES(); + ut_snprintf(errstr, errstr_sz, "Column %s in table %s is %s " "but should be %s (flags mismatch).", @@ -6326,10 +6319,6 @@ dict_close(void) mem_free(dict_sys); dict_sys = NULL; - - for (i = 0; i < DICT_TABLE_STATS_LATCHES_SIZE; i++) { - rw_lock_free(&dict_table_stats_latches[i]); - } } #ifdef UNIV_DEBUG diff --git a/storage/xtradb/dict/dict0load.cc b/storage/xtradb/dict/dict0load.cc index 5c97b5aba7c..9add88c0ea5 100644 --- a/storage/xtradb/dict/dict0load.cc +++ b/storage/xtradb/dict/dict0load.cc @@ -1092,10 +1092,34 @@ loop: case DICT_CHECK_ALL_LOADED: /* All tablespaces should have been found in fil_load_single_table_tablespaces(). */ - - fil_space_for_table_exists_in_mem( + if (fil_space_for_table_exists_in_mem( space_id, name, TRUE, !(is_temp || discarded), - false, NULL, 0); + false, NULL, 0) + && !(is_temp || discarded)) { + /* If user changes the path of .ibd files in + *.isl files before doing crash recovery , + then this leads to inconsistency in + SYS_DATAFILES system table because the + tables are loaded from the updated path + but the SYS_DATAFILES still points to the + old path.Therefore after crash recovery + update SYS_DATAFILES with the updated path.*/ + ut_ad(space_id); + ut_ad(recv_needed_recovery); + char *dict_path = dict_get_first_path(space_id, + name); + char *remote_path = fil_read_link_file(name); + if(dict_path && remote_path) { + if(strcmp(dict_path,remote_path)) { + dict_update_filepath(space_id, + remote_path); + } + } + if(dict_path) + mem_free(dict_path); + if(remote_path) + mem_free(remote_path); + } break; case DICT_CHECK_SOME_LOADED: @@ -2151,7 +2175,8 @@ err_len: /* See if the tablespace is available. */ *table = dict_mem_table_create( - name, space, n_cols & ~DICT_N_COLS_COMPACT, flags, flags2); + name, space, n_cols & ~DICT_N_COLS_COMPACT, flags, flags2, + false); field = rec_get_nth_field_old(rec, DICT_FLD__SYS_TABLES__ID, &len); ut_ad(len == 8); /* this was checked earlier */ diff --git a/storage/xtradb/dict/dict0mem.cc b/storage/xtradb/dict/dict0mem.cc index f69e6cc47ae..7ce42fa8efc 100644 --- a/storage/xtradb/dict/dict0mem.cc +++ b/storage/xtradb/dict/dict0mem.cc @@ -65,7 +65,10 @@ dict_mem_table_create( the table is placed */ ulint n_cols, /*!< in: number of columns */ ulint flags, /*!< in: table flags */ - ulint flags2) /*!< in: table flags2 */ + ulint flags2, /*!< in: table flags2 */ + bool nonshared)/*!< in: whether the table object is a dummy + one that does not need the initialization of + locking-related fields. */ { dict_table_t* table; mem_heap_t* heap; @@ -95,12 +98,27 @@ dict_mem_table_create( ut_d(table->magic_n = DICT_TABLE_MAGIC_N); + if (!nonshared) { + table->stats_latch = new rw_lock_t; + rw_lock_create(dict_table_stats_latch_key, table->stats_latch, + SYNC_INDEX_TREE); + } else { + table->stats_latch = NULL; + } + #ifndef UNIV_HOTBACKUP - table->autoinc_lock = static_cast<ib_lock_t*>( - mem_heap_alloc(heap, lock_get_size())); - mutex_create(autoinc_mutex_key, - &table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX); + if (!nonshared) { + + table->autoinc_lock = static_cast<ib_lock_t*>( + mem_heap_alloc(heap, lock_get_size())); + + mutex_create(autoinc_mutex_key, + &table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX); + } else { + + table->autoinc_lock = NULL; + } table->autoinc = 0; @@ -150,8 +168,18 @@ dict_mem_table_free( } } #ifndef UNIV_HOTBACKUP - mutex_free(&(table->autoinc_mutex)); + if (table->stats_latch) { + + mutex_free(&(table->autoinc_mutex)); + } #endif /* UNIV_HOTBACKUP */ + + if (table->stats_latch) { + + rw_lock_free(table->stats_latch); + delete table->stats_latch; + } + ut_free(table->name); mem_heap_free(table->heap); } diff --git a/storage/xtradb/dict/dict0stats.cc b/storage/xtradb/dict/dict0stats.cc index 8bf02f9785c..68c02a301cd 100644 --- a/storage/xtradb/dict/dict0stats.cc +++ b/storage/xtradb/dict/dict0stats.cc @@ -400,6 +400,11 @@ dict_stats_table_clone_create( t->corrupted = table->corrupted; + /* This private object "t" is not shared with other threads, so + we do not need the stats_latch. The lock/unlock routines will do + nothing if stats_latch is NULL. */ + t->stats_latch = NULL; + UT_LIST_INIT(t->indexes); for (index = dict_table_get_first_index(table); @@ -731,7 +736,7 @@ static dict_table_t* dict_stats_snapshot_create( /*=======================*/ - const dict_table_t* table) /*!< in: table whose stats to copy */ + dict_table_t* table) /*!< in: table whose stats to copy */ { mutex_enter(&dict_sys->mutex); @@ -2131,8 +2136,16 @@ dict_stats_save_index_stat( ret = dict_stats_exec_sql( pinfo, - "PROCEDURE INDEX_STATS_SAVE_INSERT () IS\n" + "PROCEDURE INDEX_STATS_SAVE () IS\n" "BEGIN\n" + + "DELETE FROM \"" INDEX_STATS_NAME "\"\n" + "WHERE\n" + "database_name = :database_name AND\n" + "table_name = :table_name AND\n" + "index_name = :index_name AND\n" + "stat_name = :stat_name;\n" + "INSERT INTO \"" INDEX_STATS_NAME "\"\n" "VALUES\n" "(\n" @@ -2147,47 +2160,6 @@ dict_stats_save_index_stat( ");\n" "END;"); - if (ret == DB_DUPLICATE_KEY) { - - pinfo = pars_info_create(); - pars_info_add_str_literal(pinfo, "database_name", db_utf8); - pars_info_add_str_literal(pinfo, "table_name", table_utf8); - UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name)); - pars_info_add_str_literal(pinfo, "index_name", index->name); - UNIV_MEM_ASSERT_RW_ABORT(&last_update, 4); - pars_info_add_int4_literal(pinfo, "last_update", last_update); - UNIV_MEM_ASSERT_RW_ABORT(stat_name, strlen(stat_name)); - pars_info_add_str_literal(pinfo, "stat_name", stat_name); - UNIV_MEM_ASSERT_RW_ABORT(&stat_value, 8); - pars_info_add_ull_literal(pinfo, "stat_value", stat_value); - if (sample_size != NULL) { - UNIV_MEM_ASSERT_RW_ABORT(sample_size, 8); - pars_info_add_ull_literal(pinfo, "sample_size", *sample_size); - } else { - pars_info_add_literal(pinfo, "sample_size", NULL, - UNIV_SQL_NULL, DATA_FIXBINARY, 0); - } - UNIV_MEM_ASSERT_RW_ABORT(stat_description, strlen(stat_description)); - pars_info_add_str_literal(pinfo, "stat_description", - stat_description); - - ret = dict_stats_exec_sql( - pinfo, - "PROCEDURE INDEX_STATS_SAVE_UPDATE () IS\n" - "BEGIN\n" - "UPDATE \"" INDEX_STATS_NAME "\" SET\n" - "last_update = :last_update,\n" - "stat_value = :stat_value,\n" - "sample_size = :sample_size,\n" - "stat_description = :stat_description\n" - "WHERE\n" - "database_name = :database_name AND\n" - "table_name = :table_name AND\n" - "index_name = :index_name AND\n" - "stat_name = :stat_name;\n" - "END;"); - } - if (ret != DB_SUCCESS) { char buf_table[MAX_FULL_NAME_LEN]; char buf_index[MAX_FULL_NAME_LEN]; @@ -2205,14 +2177,18 @@ dict_stats_save_index_stat( return(ret); } -/*********************************************************************//** -Save the table's statistics into the persistent statistics storage. +/** Save the table's statistics into the persistent statistics storage. +@param[in] table_orig table whose stats to save +@param[in] only_for_index if this is non-NULL, then stats for indexes +that are not equal to it will not be saved, if NULL, then all +indexes' stats are saved @return DB_SUCCESS or error code */ static dberr_t dict_stats_save( /*============*/ - dict_table_t* table_orig) /*!< in: table */ + dict_table_t* table_orig, + const index_id_t* only_for_index) { pars_info_t* pinfo; lint now; @@ -2234,26 +2210,27 @@ dict_stats_save( lint */ now = (lint) ut_time(); -#define PREPARE_PINFO_FOR_TABLE_SAVE(p, t, n) \ - do { \ - pars_info_add_str_literal((p), "database_name", db_utf8); \ - pars_info_add_str_literal((p), "table_name", table_utf8); \ - pars_info_add_int4_literal((p), "last_update", (n)); \ - pars_info_add_ull_literal((p), "n_rows", (t)->stat_n_rows); \ - pars_info_add_ull_literal((p), "clustered_index_size", \ - (t)->stat_clustered_index_size); \ - pars_info_add_ull_literal((p), "sum_of_other_index_sizes", \ - (t)->stat_sum_of_other_index_sizes); \ - } while(false); - pinfo = pars_info_create(); - PREPARE_PINFO_FOR_TABLE_SAVE(pinfo, table, now); + pars_info_add_str_literal(pinfo, "database_name", db_utf8); + pars_info_add_str_literal(pinfo, "table_name", table_utf8); + pars_info_add_int4_literal(pinfo, "last_update", now); + pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows); + pars_info_add_ull_literal(pinfo, "clustered_index_size", + table->stat_clustered_index_size); + pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes", + table->stat_sum_of_other_index_sizes); ret = dict_stats_exec_sql( pinfo, - "PROCEDURE TABLE_STATS_SAVE_INSERT () IS\n" + "PROCEDURE TABLE_STATS_SAVE () IS\n" "BEGIN\n" + + "DELETE FROM \"" TABLE_STATS_NAME "\"\n" + "WHERE\n" + "database_name = :database_name AND\n" + "table_name = :table_name;\n" + "INSERT INTO \"" TABLE_STATS_NAME "\"\n" "VALUES\n" "(\n" @@ -2266,27 +2243,6 @@ dict_stats_save( ");\n" "END;"); - if (ret == DB_DUPLICATE_KEY) { - pinfo = pars_info_create(); - - PREPARE_PINFO_FOR_TABLE_SAVE(pinfo, table, now); - - ret = dict_stats_exec_sql( - pinfo, - "PROCEDURE TABLE_STATS_SAVE_UPDATE () IS\n" - "BEGIN\n" - "UPDATE \"" TABLE_STATS_NAME "\" SET\n" - "last_update = :last_update,\n" - "n_rows = :n_rows,\n" - "clustered_index_size = :clustered_index_size,\n" - "sum_of_other_index_sizes = " - " :sum_of_other_index_sizes\n" - "WHERE\n" - "database_name = :database_name AND\n" - "table_name = :table_name;\n" - "END;"); - } - if (ret != DB_SUCCESS) { char buf[MAX_FULL_NAME_LEN]; ut_print_timestamp(stderr); @@ -2304,6 +2260,10 @@ dict_stats_save( index != NULL; index = dict_table_get_next_index(index)) { + if (only_for_index != NULL && index->id != *only_for_index) { + continue; + } + if (dict_stats_should_ignore_index(index)) { continue; } @@ -2860,7 +2820,7 @@ dict_stats_update_for_index( dict_table_stats_lock(index->table, RW_X_LATCH); dict_stats_analyze_index(index); dict_table_stats_unlock(index->table, RW_X_LATCH); - dict_stats_save(index->table); + dict_stats_save(index->table, &index->id); DBUG_VOID_RETURN; } /* else */ @@ -2955,7 +2915,7 @@ dict_stats_update( return(err); } - err = dict_stats_save(table); + err = dict_stats_save(table, NULL); return(err); } @@ -2988,7 +2948,7 @@ dict_stats_update( if (dict_stats_persistent_storage_check(false)) { - return(dict_stats_save(table)); + return(dict_stats_save(table, NULL)); } return(DB_STATS_DO_NOT_EXIST); @@ -3834,7 +3794,7 @@ test_dict_stats_save() index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE; index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE; - ret = dict_stats_save(&table); + ret = dict_stats_save(&table, NULL); ut_a(ret == DB_SUCCESS); diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index 5e797f2583c..ee3c3943ab8 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -2409,27 +2409,21 @@ fil_op_log_parse_or_replay( break; case MLOG_FILE_RENAME: - /* We do the rename based on space id, not old file name; - this should guarantee that after the log replay each .ibd file - has the correct name for the latest log sequence number; the - proof is left as an exercise :) */ - - if (fil_tablespace_exists_in_mem(space_id)) { + /* In order to replay the rename, the following must hold: + * The new name is not already used. + * A tablespace is open in memory with the old name. + * The space ID for that tablepace matches this log entry. + This will prevent unintended renames during recovery. */ + + if (fil_get_space_id_for_table(new_name) == ULINT_UNDEFINED + && space_id == fil_get_space_id_for_table(name)) { /* Create the database directory for the new name, if it does not exist yet */ fil_create_directory_for_tablename(new_name); - /* Rename the table if there is not yet a tablespace - with the same name */ - - if (fil_get_space_id_for_table(new_name) - == ULINT_UNDEFINED) { - /* We do not care about the old name, that - is why we pass NULL as the first argument. */ - if (!fil_rename_tablespace(NULL, space_id, - new_name, NULL)) { - ut_error; - } + if (!fil_rename_tablespace(name, space_id, + new_name, NULL)) { + ut_error; } } @@ -4035,6 +4029,176 @@ fil_make_ibbackup_old_name( } #endif /* UNIV_HOTBACKUP */ + +/*******************************************************************//** +Determine the space id of the given file descriptor by reading a few +pages from the beginning of the .ibd file. +@return true if space id was successfully identified, or false. */ +static +bool +fil_user_tablespace_find_space_id( +/*==============================*/ + fsp_open_info* fsp) /* in/out: contains file descriptor, which is + used as input. contains space_id, which is + the output */ +{ + bool st; + os_offset_t file_size; + + file_size = os_file_get_size(fsp->file); + + if (file_size == (os_offset_t) -1) { + ib_logf(IB_LOG_LEVEL_ERROR, "Could not get file size: %s", + fsp->filepath); + return(false); + } + + /* Assuming a page size, read the space_id from each page and store it + in a map. Find out which space_id is agreed on by majority of the + pages. Choose that space_id. */ + for (ulint page_size = UNIV_ZIP_SIZE_MIN; + page_size <= UNIV_PAGE_SIZE_MAX; page_size <<= 1) { + + /* map[space_id] = count of pages */ + std::map<ulint, ulint> verify; + + ulint page_count = 64; + ulint valid_pages = 0; + + /* Adjust the number of pages to analyze based on file size */ + while ((page_count * page_size) > file_size) { + --page_count; + } + + ib_logf(IB_LOG_LEVEL_INFO, "Page size:%lu Pages to analyze:" + "%lu", page_size, page_count); + + byte* buf = static_cast<byte*>(ut_malloc(2*page_size)); + byte* page = static_cast<byte*>(ut_align(buf, page_size)); + + for (ulint j = 0; j < page_count; ++j) { + + st = os_file_read(fsp->file, page, (j* page_size), page_size); + + if (!st) { + ib_logf(IB_LOG_LEVEL_INFO, + "READ FAIL: page_no:%lu", j); + continue; + } + + bool uncompressed_ok = false; + + /* For uncompressed pages, the page size must be equal + to UNIV_PAGE_SIZE. */ + if (page_size == UNIV_PAGE_SIZE) { + uncompressed_ok = !buf_page_is_corrupted( + false, page, 0); + } + + bool compressed_ok = !buf_page_is_corrupted( + false, page, page_size); + + if (uncompressed_ok || compressed_ok) { + + ulint space_id = mach_read_from_4(page + + FIL_PAGE_SPACE_ID); + + if (space_id > 0) { + ib_logf(IB_LOG_LEVEL_INFO, + "VALID: space:%lu " + "page_no:%lu page_size:%lu", + space_id, j, page_size); + verify[space_id]++; + ++valid_pages; + } + } + } + + ut_free(buf); + + ib_logf(IB_LOG_LEVEL_INFO, "Page size: %lu, Possible space_id " + "count:" UINT64PF, page_size, + static_cast<ib_uint64_t>(verify.size())); + + const ulint pages_corrupted = 3; + for (ulint missed = 0; missed <= pages_corrupted; ++missed) { + + for (std::map<ulint, ulint>::iterator + m = verify.begin(); m != verify.end(); ++m ) { + + ib_logf(IB_LOG_LEVEL_INFO, "space_id:%lu, " + "Number of pages matched: %lu/%lu " + "(%lu)", m->first, m->second, + valid_pages, page_size); + + if (m->second == (valid_pages - missed)) { + + ib_logf(IB_LOG_LEVEL_INFO, + "Chosen space:%lu\n", m->first); + + fsp->id = m->first; + return(true); + } + } + + } + } + + return(false); +} + +/*******************************************************************//** +Finds the page 0 of the given space id from the double write buffer, and +copies it to the corresponding .ibd file. +@return true if copy was successful, or false. */ +static +bool +fil_user_tablespace_restore_page0( +/*==============================*/ + fsp_open_info* fsp) /* in: contains space id and .ibd file + information */ +{ + bool err; + ulint flags; + ulint zip_size; + ulint page_no; + ulint page_size; + ulint buflen; + byte* page; + + ib_logf(IB_LOG_LEVEL_INFO, "Restoring first page of tablespace %lu", + fsp->id); + + if (fsp->id == 0) { + err = false; + goto out; + } + + // find if double write buffer has page0 of given space id + page = recv_sys->dblwr.find_first_page(fsp->id); + + if (!page) { + err = false; + goto out; + } + + flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page); + zip_size = fsp_flags_get_zip_size(flags); + page_no = page_get_page_no(page); + page_size = fsp_flags_get_page_size(flags); + + ut_ad(page_no == 0); + + buflen = zip_size ? zip_size: page_size; + + ib_logf(IB_LOG_LEVEL_INFO, "Writing %lu bytes into file: %s", + buflen, fsp->filepath); + + err = os_file_write(fsp->filepath, fsp->file, page, 0, buflen); +out: + return(err); +} + /********************************************************************//** Opens an .ibd file and adds the associated single-table tablespace to the InnoDB fil0fil.cc data structures. @@ -4046,6 +4210,10 @@ fil_validate_single_table_tablespace( const char* tablename, /*!< in: database/tablename */ fsp_open_info* fsp) /*!< in/out: tablespace info */ { + bool restore_attempted = false; + +check_first_page: + fsp->success = TRUE; if (const char* check_msg = fil_read_first_page( fsp->file, FALSE, &fsp->flags, &fsp->id, &fsp->lsn, &fsp->lsn)) { @@ -4053,6 +4221,19 @@ fil_validate_single_table_tablespace( "%s in tablespace %s (table %s)", check_msg, fsp->filepath, tablename); fsp->success = FALSE; + } + + if (!fsp->success) { + if (!restore_attempted) { + if (!fil_user_tablespace_find_space_id(fsp)) { + return; + } + restore_attempted = true; + if (!fil_user_tablespace_restore_page0(fsp)) { + return; + } + goto check_first_page; + } return; } @@ -4170,7 +4351,7 @@ fil_load_single_table_tablespace( /* Try to open the tablespace in the datadir. */ def.file = os_file_create_simple_no_error_handling( innodb_file_data_key, def.filepath, OS_FILE_OPEN, - OS_FILE_READ_ONLY, &def.success); + OS_FILE_READ_WRITE, &def.success); /* Read the first page of the remote tablespace */ if (def.success) { diff --git a/storage/xtradb/fts/fts0ast.cc b/storage/xtradb/fts/fts0ast.cc index 3a03fc63303..d6c19c0050a 100644 --- a/storage/xtradb/fts/fts0ast.cc +++ b/storage/xtradb/fts/fts0ast.cc @@ -112,9 +112,11 @@ fts_ast_create_node_term( if (str.f_n_char > 0) { /* If the subsequent term (after the first one)'s size - is less than fts_min_token_size, we shall ignore - that. This is to make consistent with MyISAM behavior */ - if (first_node && (str.f_n_char < fts_min_token_size)) { + is less than fts_min_token_size or the term is greater + than fts_max_token_size, we shall ignore that. This is + to make consistent with MyISAM behavior */ + if ((first_node && (str.f_n_char < fts_min_token_size)) + || str.f_n_char > fts_max_token_size) { continue; } @@ -394,6 +396,10 @@ fts_ast_term_set_distance( ulint distance) /*!< in: the text proximity distance */ { + if (node == NULL) { + return; + } + ut_a(node->type == FTS_AST_TEXT); ut_a(node->text.distance == ULINT_UNDEFINED); @@ -551,14 +557,6 @@ fts_ast_visit( break; - case FTS_AST_SUBEXP_LIST: - if (visit_pass != FTS_PASS_FIRST) { - break; - } - - error = fts_ast_visit_sub_exp(node, visitor, arg); - break; - case FTS_AST_OPER: oper = node->oper; oper_node = node; diff --git a/storage/xtradb/fts/fts0blex.cc b/storage/xtradb/fts/fts0blex.cc index dccedac0212..6082261e74c 100644 --- a/storage/xtradb/fts/fts0blex.cc +++ b/storage/xtradb/fts/fts0blex.cc @@ -305,9 +305,9 @@ YY_BUFFER_STATE fts0b_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner YY_BUFFER_STATE fts0b_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); YY_BUFFER_STATE fts0b_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner ); -void *fts0balloc (yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); -void *fts0brealloc (void *,yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); -void fts0bfree (void * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); +void *fts0balloc (yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); +void *fts0brealloc (void *,yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); +void fts0bfree (void * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); #define yy_new_buffer fts0b_create_buffer @@ -347,7 +347,7 @@ typedef int yy_state_type; static yy_state_type yy_get_previous_state (yyscan_t yyscanner ); static yy_state_type yy_try_NUL_trans (yy_state_type current_state ,yyscan_t yyscanner); static int yy_get_next_buffer (yyscan_t yyscanner ); -static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); +static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); /* Done after the current pattern has been matched and before the * corresponding action - sets up yytext. @@ -579,11 +579,11 @@ extern int fts0bwrap (yyscan_t yyscanner ); #endif #ifndef yytext_ptr -static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))); +static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))); #endif #ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))); +static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))); #endif #ifndef YY_NO_INPUT @@ -1609,9 +1609,9 @@ YY_BUFFER_STATE fts0b_scan_bytes (yyconst char * yybytes, int _yybytes_len , y #define YY_EXIT_FAILURE 2 #endif -static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { - (void) fprintf( stderr, "%s\n", msg ); + (void) fprintf( stderr, "%s\n", msg ); exit( YY_EXIT_FAILURE ); } @@ -1910,7 +1910,7 @@ int fts0blex_destroy (yyscan_t yyscanner) */ #ifndef yytext_ptr -static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { register int i; for ( i = 0; i < n; ++i ) @@ -1919,7 +1919,7 @@ static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t #endif #ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { register int n; for ( n = 0; s[n]; ++n ) @@ -1929,12 +1929,12 @@ static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribu } #endif -void *fts0balloc (yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +void *fts0balloc (yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { return (void *) malloc( size ); } -void *fts0brealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +void *fts0brealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { /* The cast to (char *) in the following accommodates both * implementations that use char* generic pointers, and those @@ -1946,7 +1946,7 @@ void *fts0brealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __at return (void *) realloc( (char *) ptr, size ); } -void fts0bfree (void * ptr , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +void fts0bfree (void * ptr , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { free( (char *) ptr ); /* see fts0brealloc() for (char *) cast */ } diff --git a/storage/xtradb/fts/fts0config.cc b/storage/xtradb/fts/fts0config.cc index c5cf38ca7f9..5b4ae5c39f7 100644 --- a/storage/xtradb/fts/fts0config.cc +++ b/storage/xtradb/fts/fts0config.cc @@ -151,7 +151,9 @@ fts_config_create_index_param_name( strcpy(name, param); name[len] = '_'; - fts_write_object_id(index->id, name + len + 1); + fts_write_object_id(index->id, name + len + 1, + DICT_TF2_FLAG_IS_SET(index->table, + DICT_TF2_FTS_AUX_HEX_NAME)); return(name); } diff --git a/storage/xtradb/fts/fts0fts.cc b/storage/xtradb/fts/fts0fts.cc index 1b114adea1f..47deee8d8e6 100644 --- a/storage/xtradb/fts/fts0fts.cc +++ b/storage/xtradb/fts/fts0fts.cc @@ -1608,7 +1608,8 @@ fts_rename_aux_tables( new_name, old_table_name, trx); DBUG_EXECUTE_IF("fts_rename_failure", - err = DB_DEADLOCK;); + err = DB_DEADLOCK; + fts_sql_rollback(trx);); mem_free(old_table_name); @@ -1949,7 +1950,7 @@ fts_create_one_index_table( ut_ad(index->type & DICT_FTS); - new_table = dict_mem_table_create(table_name, 0, 5, 1, 0); + new_table = dict_mem_table_create(table_name, 0, 5, 1, 0, false); field = dict_index_get_nth_field(index, 0); charset = innobase_get_fts_charset( @@ -2018,7 +2019,7 @@ fts_create_index_tables_low( fts_table.index_id = index->id; fts_table.table_id = table_id; fts_table.parent = table_name; - fts_table.table = NULL; + fts_table.table = index->table; #ifdef FTS_DOC_STATS_DEBUG char* sql; @@ -4479,7 +4480,7 @@ fts_sync_table( ut_ad(table->fts); - if (table->fts->cache) { + if (!dict_table_is_discarded(table) && table->fts->cache) { err = fts_sync(table->fts->cache->sync); } @@ -4506,15 +4507,11 @@ fts_process_token( fts_string_t str; ulint offset = 0; fts_doc_t* result_doc; - byte buf[FTS_MAX_WORD_LEN + 1]; - - str.f_str = buf; /* Determine where to save the result. */ result_doc = (result) ? result : doc; /* The length of a string in characters is set here only. */ - ret = innobase_mysql_fts_get_token( doc->charset, doc->text.f_str + start_pos, doc->text.f_str + doc->text.f_len, &str, &offset); @@ -4545,6 +4542,7 @@ fts_process_token( (char*) t_str.f_str, t_str.f_len); t_str.f_len = newlen; + t_str.f_str[newlen] = 0; /* Add the word to the document statistics. If the word hasn't been seen before we create a new entry for it. */ @@ -5797,7 +5795,7 @@ fts_is_aux_table_name( my_name[len] = 0; end = my_name + len; - ptr = static_cast<const char*>(memchr(my_name, '/', len)); + ptr = static_cast<const char*>(memchr(my_name, '/', len)); if (ptr != NULL) { /* We will start the match after the '/' */ @@ -5940,6 +5938,374 @@ fts_read_tables( return(TRUE); } +/******************************************************************//** +Callback that sets a hex formatted FTS table's flags2 in +SYS_TABLES. The flags is stored in MIX_LEN column. +@return FALSE if all OK */ +static +ibool +fts_set_hex_format( +/*===============*/ + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: bool set/unset flag */ +{ + sel_node_t* node = static_cast<sel_node_t*>(row); + dfield_t* dfield = que_node_get_val(node->select_list); + + ut_ad(dtype_get_mtype(dfield_get_type(dfield)) == DATA_INT); + ut_ad(dfield_get_len(dfield) == sizeof(ib_uint32_t)); + /* There should be at most one matching record. So the value + must be the default value. */ + ut_ad(mach_read_from_4(static_cast<byte*>(user_arg)) + == ULINT32_UNDEFINED); + + ulint flags2 = mach_read_from_4( + static_cast<byte*>(dfield_get_data(dfield))); + + flags2 |= DICT_TF2_FTS_AUX_HEX_NAME; + + mach_write_to_4(static_cast<byte*>(user_arg), flags2); + + return(FALSE); +} + +/*****************************************************************//** +Update the DICT_TF2_FTS_AUX_HEX_NAME flag in SYS_TABLES. +@return DB_SUCCESS or error code. */ +UNIV_INTERN +dberr_t +fts_update_hex_format_flag( +/*=======================*/ + trx_t* trx, /*!< in/out: transaction that + covers the update */ + table_id_t table_id, /*!< in: Table for which we want + to set the root table->flags2 */ + bool dict_locked) /*!< in: set to true if the + caller already owns the + dict_sys_t::mutex. */ +{ + pars_info_t* info; + ib_uint32_t flags2; + + static const char sql[] = + "PROCEDURE UPDATE_HEX_FORMAT_FLAG() IS\n" + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR c IS\n" + " SELECT MIX_LEN " + " FROM SYS_TABLES " + " WHERE ID = :table_id FOR UPDATE;" + "\n" + "BEGIN\n" + "OPEN c;\n" + "WHILE 1 = 1 LOOP\n" + " FETCH c INTO my_func();\n" + " IF c % NOTFOUND THEN\n" + " EXIT;\n" + " END IF;\n" + "END LOOP;\n" + "UPDATE SYS_TABLES" + " SET MIX_LEN = :flags2" + " WHERE ID = :table_id;\n" + "CLOSE c;\n" + "END;\n"; + + flags2 = ULINT32_UNDEFINED; + + info = pars_info_create(); + + pars_info_add_ull_literal(info, "table_id", table_id); + pars_info_bind_int4_literal(info, "flags2", &flags2); + + pars_info_bind_function( + info, "my_func", fts_set_hex_format, &flags2); + + if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) { + trx_set_dict_operation(trx, TRX_DICT_OP_INDEX); + } + + dberr_t err = que_eval_sql(info, sql, !dict_locked, trx); + + ut_a(flags2 != ULINT32_UNDEFINED); + + return (err); +} + +#ifdef _WIN32 + +/*********************************************************************//** +Rename an aux table to HEX format. It's called when "%016llu" is used +to format an object id in table name, which only happens in Windows. */ +static __attribute__((nonnull, warn_unused_result)) +dberr_t +fts_rename_one_aux_table_to_hex_format( +/*===================================*/ + trx_t* trx, /*!< in: transaction */ + const fts_aux_table_t* aux_table, /*!< in: table info */ + const dict_table_t* parent_table) /*!< in: parent table name */ +{ + const char* ptr; + fts_table_t fts_table; + char* new_name; + dberr_t error; + + ptr = strchr(aux_table->name, '/'); + ut_a(ptr != NULL); + ++ptr; + /* Skip "FTS_", table id and underscore */ + for (ulint i = 0; i < 2; ++i) { + ptr = strchr(ptr, '_'); + ut_a(ptr != NULL); + ++ptr; + } + + fts_table.suffix = NULL; + if (aux_table->index_id == 0) { + fts_table.type = FTS_COMMON_TABLE; + + for (ulint i = 0; fts_common_tables[i] != NULL; ++i) { + if (strcmp(ptr, fts_common_tables[i]) == 0) { + fts_table.suffix = fts_common_tables[i]; + break; + } + } + } else { + fts_table.type = FTS_INDEX_TABLE; + + /* Skip index id and underscore */ + ptr = strchr(ptr, '_'); + ut_a(ptr != NULL); + ++ptr; + + for (ulint i = 0; fts_index_selector[i].value; ++i) { + if (strcmp(ptr, fts_get_suffix(i)) == 0) { + fts_table.suffix = fts_get_suffix(i); + break; + } + } + } + + ut_a(fts_table.suffix != NULL); + + fts_table.parent = parent_table->name; + fts_table.table_id = aux_table->parent_id; + fts_table.index_id = aux_table->index_id; + fts_table.table = parent_table; + + new_name = fts_get_table_name(&fts_table); + ut_ad(strcmp(new_name, aux_table->name) != 0); + + if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) { + trx_set_dict_operation(trx, TRX_DICT_OP_INDEX); + } + + error = row_rename_table_for_mysql(aux_table->name, new_name, trx, + FALSE); + + if (error != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_WARN, + "Failed to rename aux table \'%s\' to " + "new format \'%s\'. ", + aux_table->name, new_name); + } else { + ib_logf(IB_LOG_LEVEL_INFO, + "Renamed aux table \'%s\' to \'%s\'.", + aux_table->name, new_name); + } + + mem_free(new_name); + + return (error); +} + +/**********************************************************************//** +Rename all aux tables of a parent table to HEX format. Also set aux tables' +flags2 and parent table's flags2 with DICT_TF2_FTS_AUX_HEX_NAME. +It's called when "%016llu" is used to format an object id in table name, +which only happens in Windows. +Note the ids in tables are correct but the names are old ambiguous ones. + +This function should make sure that either all the parent table and aux tables +are set DICT_TF2_FTS_AUX_HEX_NAME with flags2 or none of them are set */ +static __attribute__((nonnull, warn_unused_result)) +dberr_t +fts_rename_aux_tables_to_hex_format( +/*================================*/ + trx_t* trx, /*!< in: transaction */ + dict_table_t* parent_table, /*!< in: parent table */ + ib_vector_t* tables) /*!< in: aux tables to rename. */ +{ + dberr_t error; + ulint count; + + ut_ad(!DICT_TF2_FLAG_IS_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME)); + ut_ad(!ib_vector_is_empty(tables)); + + error = fts_update_hex_format_flag(trx, parent_table->id, true); + + if (error != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_WARN, + "Setting parent table %s to hex format failed.", + parent_table->name); + + fts_sql_rollback(trx); + return (error); + } + + DICT_TF2_FLAG_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME); + + for (count = 0; count < ib_vector_size(tables); ++count) { + dict_table_t* table; + fts_aux_table_t* aux_table; + + aux_table = static_cast<fts_aux_table_t*>( + ib_vector_get(tables, count)); + + table = dict_table_open_on_id(aux_table->id, TRUE, + DICT_TABLE_OP_NORMAL); + + ut_ad(table != NULL); + ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_AUX_HEX_NAME)); + + /* Set HEX_NAME flag here to make sure we can get correct + new table name in following function */ + DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME); + error = fts_rename_one_aux_table_to_hex_format(trx, + aux_table, parent_table); + /* We will rollback the trx if the error != DB_SUCCESS, + so setting the flag here is the same with setting it in + row_rename_table_for_mysql */ + DBUG_EXECUTE_IF("rename_aux_table_fail", error = DB_ERROR;); + + if (error != DB_SUCCESS) { + dict_table_close(table, TRUE, FALSE); + + ib_logf(IB_LOG_LEVEL_WARN, + "Failed to rename one aux table %s " + "Will revert all successful rename " + "operations.", aux_table->name); + + fts_sql_rollback(trx); + break; + } + + error = fts_update_hex_format_flag(trx, aux_table->id, true); + dict_table_close(table, TRUE, FALSE); + + if (error != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_WARN, + "Setting aux table %s to hex format failed.", + aux_table->name); + + fts_sql_rollback(trx); + break; + } + } + + if (error != DB_SUCCESS) { + ut_ad(count != ib_vector_size(tables)); + /* If rename fails, thr trx would be rolled back, we can't + use it any more, we'll start a new background trx to do + the reverting. */ + ut_a(trx->state == TRX_STATE_NOT_STARTED); + bool not_rename = false; + + /* Try to revert those succesful rename operations + in order to revert the ibd file rename. */ + for (ulint i = 0; i <= count; ++i) { + dict_table_t* table; + fts_aux_table_t* aux_table; + trx_t* trx_bg; + dberr_t err; + + aux_table = static_cast<fts_aux_table_t*>( + ib_vector_get(tables, i)); + + table = dict_table_open_on_id(aux_table->id, TRUE, + DICT_TABLE_OP_NORMAL); + ut_ad(table != NULL); + + if (not_rename) { + DICT_TF2_FLAG_UNSET(table, + DICT_TF2_FTS_AUX_HEX_NAME); + } + + if (!DICT_TF2_FLAG_IS_SET(table, + DICT_TF2_FTS_AUX_HEX_NAME)) { + dict_table_close(table, TRUE, FALSE); + continue; + } + + trx_bg = trx_allocate_for_background(); + trx_bg->op_info = "Revert half done rename"; + trx_bg->dict_operation_lock_mode = RW_X_LATCH; + trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE); + + DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS_AUX_HEX_NAME); + err = row_rename_table_for_mysql(table->name, + aux_table->name, + trx_bg, FALSE); + + trx_bg->dict_operation_lock_mode = 0; + dict_table_close(table, TRUE, FALSE); + + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_WARN, "Failed to revert " + "table %s. Please revert manually.", + table->name); + fts_sql_rollback(trx_bg); + /* Continue to clear aux tables' flags2 */ + not_rename = true; + continue; + } + + fts_sql_commit(trx_bg); + } + + DICT_TF2_FLAG_UNSET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME); + } + + return (error); +} + +/**********************************************************************//** +Convert an id, which is actually a decimal number but was regard as a HEX +from a string, to its real value. */ +static +ib_id_t +fts_fake_hex_to_dec( +/*================*/ + ib_id_t id) /*!< in: number to convert */ +{ + ib_id_t dec_id = 0; + char tmp_id[FTS_AUX_MIN_TABLE_ID_LENGTH]; + int ret; + + ret = sprintf(tmp_id, UINT64PFx, id); + ut_ad(ret == 16); + ret = sscanf(tmp_id, "%016llu", &dec_id); + ut_ad(ret == 1); + + return dec_id; +} + +/*********************************************************************//** +Compare two fts_aux_table_t parent_ids. +@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ +UNIV_INLINE +int +fts_check_aux_table_parent_id_cmp( +/*==============================*/ + const void* p1, /*!< in: id1 */ + const void* p2) /*!< in: id2 */ +{ + const fts_aux_table_t* fa1 = static_cast<const fts_aux_table_t*>(p1); + const fts_aux_table_t* fa2 = static_cast<const fts_aux_table_t*>(p2); + + return static_cast<int>(fa1->parent_id - fa2->parent_id); +} + +#endif /* _WIN32 */ + /**********************************************************************//** Check and drop all orphaned FTS auxiliary tables, those that don't have a parent table or FTS index defined on them. @@ -5951,18 +6317,75 @@ fts_check_and_drop_orphaned_tables( trx_t* trx, /*!< in: transaction */ ib_vector_t* tables) /*!< in: tables to check */ { +#ifdef _WIN32 + mem_heap_t* heap; + ib_vector_t* aux_tables_to_rename; + ib_alloc_t* heap_alloc; + + heap = mem_heap_create(1024); + heap_alloc = ib_heap_allocator_create(heap); + + /* We store all aux tables belonging to the same parent table here, + and rename all these tables in a batch mode. */ + aux_tables_to_rename = ib_vector_create(heap_alloc, + sizeof(fts_aux_table_t), 128); + + /* Sort by parent_id first, in case rename will fail */ + ib_vector_sort(tables, fts_check_aux_table_parent_id_cmp); +#endif /* _WIN32 */ + for (ulint i = 0; i < ib_vector_size(tables); ++i) { - dict_table_t* table; + dict_table_t* parent_table; fts_aux_table_t* aux_table; bool drop = false; +#ifdef _WIN32 + dict_table_t* table; + fts_aux_table_t* next_aux_table = NULL; + ib_id_t orig_parent_id = 0; + bool rename = false; +#endif /* _WIN32 */ aux_table = static_cast<fts_aux_table_t*>( ib_vector_get(tables, i)); +#ifdef _WIN32 table = dict_table_open_on_id( + aux_table->id, TRUE, DICT_TABLE_OP_NORMAL); + orig_parent_id = aux_table->parent_id; + + if (table == NULL || strcmp(table->name, aux_table->name)) { + /* Skip these aux tables, which are common tables + with wrong table ids */ + if (table) { + dict_table_close(table, TRUE, FALSE); + } + + continue; + + } else if (!DICT_TF2_FLAG_IS_SET(table, + DICT_TF2_FTS_AUX_HEX_NAME)) { + + aux_table->parent_id = fts_fake_hex_to_dec( + aux_table->parent_id); + + if (aux_table->index_id != 0) { + aux_table->index_id = fts_fake_hex_to_dec( + aux_table->index_id); + } + + ut_ad(aux_table->id > aux_table->parent_id); + rename = true; + } + + if (table) { + dict_table_close(table, TRUE, FALSE); + } +#endif /* _WIN32 */ + + parent_table = dict_table_open_on_id( aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL); - if (table == NULL || table->fts == NULL) { + if (parent_table == NULL || parent_table->fts == NULL) { drop = true; @@ -5971,7 +6394,7 @@ fts_check_and_drop_orphaned_tables( fts_t* fts; drop = true; - fts = table->fts; + fts = parent_table->fts; id = aux_table->index_id; /* Search for the FT index in the table's list. */ @@ -5979,33 +6402,28 @@ fts_check_and_drop_orphaned_tables( j < ib_vector_size(fts->indexes); ++j) { - const dict_index_t* index; + const dict_index_t* index; index = static_cast<const dict_index_t*>( ib_vector_getp_const(fts->indexes, j)); if (index->id == id) { - drop = false; break; } } } - if (table) { - dict_table_close(table, TRUE, FALSE); - } - if (drop) { ib_logf(IB_LOG_LEVEL_WARN, "Parent table of FTS auxiliary table %s not " "found.", aux_table->name); - dberr_t err = fts_drop_table(trx, aux_table->name); + dberr_t err = fts_drop_table(trx, aux_table->name); if (err == DB_FAIL) { - char* path; + char* path; path = fil_make_ibd_name( aux_table->name, false); @@ -6016,7 +6434,120 @@ fts_check_and_drop_orphaned_tables( mem_free(path); } } +#ifdef _WIN32 + if (!drop && rename) { + ib_vector_push(aux_tables_to_rename, aux_table); + } + + if (i + 1 < ib_vector_size(tables)) { + next_aux_table = static_cast<fts_aux_table_t*>( + ib_vector_get(tables, i + 1)); + } + + if ((next_aux_table == NULL + || orig_parent_id != next_aux_table->parent_id) + && !ib_vector_is_empty(aux_tables_to_rename)) { + /* All aux tables of parent table, whose id is + last_parent_id, have been checked, try to rename + them if necessary. We had better use a new background + trx to rename rather than the original trx, in case + any failure would cause a complete rollback. */ + dberr_t err; + trx_t* trx_rename = trx_allocate_for_background(); + trx_rename->op_info = "Rename aux tables to " + "hex format"; + trx_rename->dict_operation_lock_mode = RW_X_LATCH; + trx_start_for_ddl(trx_rename, TRX_DICT_OP_TABLE); + + err = fts_rename_aux_tables_to_hex_format(trx_rename, + parent_table, aux_tables_to_rename); + + trx_rename->dict_operation_lock_mode = 0; + + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_WARN, + "Rollback operations on all " + "aux tables of table %s. " + "Please check why renaming aux tables " + "failed, and restart the server to " + "upgrade again to " + "get the table work.", + parent_table->name); + + fts_sql_rollback(trx_rename); + } else { + fts_sql_commit(trx_rename); + } + + trx_free_for_background(trx_rename); + ib_vector_reset(aux_tables_to_rename); + } +#else /* _WIN32 */ + if (!drop) { + dict_table_t* table; + + table = dict_table_open_on_id( + aux_table->id, TRUE, DICT_TABLE_OP_NORMAL); + if (table != NULL + && strcmp(table->name, aux_table->name)) { + dict_table_close(table, TRUE, FALSE); + table = NULL; + } + + if (table != NULL + && !DICT_TF2_FLAG_IS_SET( + table, + DICT_TF2_FTS_AUX_HEX_NAME)) { + dberr_t err = fts_update_hex_format_flag( + trx, table->id, true); + + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_WARN, + "Setting aux table %s to hex " + "format failed.", table->name); + } else { + DICT_TF2_FLAG_SET(table, + DICT_TF2_FTS_AUX_HEX_NAME); + } + } + + if (table != NULL) { + dict_table_close(table, TRUE, FALSE); + } + + ut_ad(parent_table != NULL); + if (!DICT_TF2_FLAG_IS_SET(parent_table, + DICT_TF2_FTS_AUX_HEX_NAME)) { + dberr_t err = fts_update_hex_format_flag( + trx, parent_table->id, true); + + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_WARN, + "Setting parent table %s of " + "FTS auxiliary %s to hex " + "format failed.", + parent_table->name, + aux_table->name); + } else { + DICT_TF2_FLAG_SET(parent_table, + DICT_TF2_FTS_AUX_HEX_NAME); + } + } + } + +#endif /* _WIN32 */ + + if (parent_table) { + dict_table_close(parent_table, TRUE, FALSE); + } + } + +#ifdef _WIN32 + /* Free the memory allocated at the beginning */ + if (heap != NULL) { + mem_heap_free(heap); } +#endif /* _WIN32 */ } /**********************************************************************//** diff --git a/storage/xtradb/fts/fts0opt.cc b/storage/xtradb/fts/fts0opt.cc index 7cdad522564..2efb5d05c21 100644 --- a/storage/xtradb/fts/fts0opt.cc +++ b/storage/xtradb/fts/fts0opt.cc @@ -1624,10 +1624,12 @@ fts_optimize_create( optim->fts_common_table.parent = table->name; optim->fts_common_table.table_id = table->id; optim->fts_common_table.type = FTS_COMMON_TABLE; + optim->fts_common_table.table = table; optim->fts_index_table.parent = table->name; optim->fts_index_table.table_id = table->id; optim->fts_index_table.type = FTS_INDEX_TABLE; + optim->fts_index_table.table = table; /* The common prefix for all this parent table's aux tables. */ optim->name_prefix = fts_get_table_name_prefix( diff --git a/storage/xtradb/fts/fts0pars.cc b/storage/xtradb/fts/fts0pars.cc index a4009106c83..ef361b3c9c6 100644 --- a/storage/xtradb/fts/fts0pars.cc +++ b/storage/xtradb/fts/fts0pars.cc @@ -467,9 +467,9 @@ static const yytype_int8 yyrhs[] = /* YYRLINE[YYN] -- source line where rule number YYN was defined. */ static const yytype_uint8 yyrline[] = { - 0, 79, 79, 85, 89, 99, 111, 115, 124, 128, - 132, 136, 141, 147, 152, 159, 165, 169, 173, 177, - 181, 186, 191, 197, 202 + 0, 79, 79, 85, 89, 99, 111, 119, 129, 133, + 137, 141, 146, 152, 157, 164, 170, 174, 178, 182, + 186, 191, 196, 202, 207 }; #endif @@ -1458,7 +1458,7 @@ yyreduce: (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node)); if (!(yyval.node)) { - (yyval.node) = fts_ast_create_node_subexp_list(state, (yyvsp[(2) - (2)].node)); + (yyval.node) = (yyvsp[(2) - (2)].node); } else { fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node)); } @@ -1471,18 +1471,23 @@ yyreduce: #line 111 "fts0pars.y" { (yyval.node) = (yyvsp[(2) - (3)].node); + + if ((yyval.node)) { + (yyval.node) = fts_ast_create_node_subexp_list(state, (yyval.node)); + } } break; case 7: /* Line 1806 of yacc.c */ -#line 115 "fts0pars.y" +#line 119 "fts0pars.y" { - (yyval.node) = fts_ast_create_node_subexp_list(state, (yyvsp[(1) - (4)].node)); + (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (4)].node)); if ((yyvsp[(3) - (4)].node)) { - fts_ast_add_node((yyval.node), (yyvsp[(3) - (4)].node)); + fts_ast_add_node((yyval.node), + fts_ast_create_node_subexp_list(state, (yyvsp[(3) - (4)].node))); } } break; @@ -1490,7 +1495,7 @@ yyreduce: case 8: /* Line 1806 of yacc.c */ -#line 124 "fts0pars.y" +#line 129 "fts0pars.y" { (yyval.node) = (yyvsp[(1) - (1)].node); } @@ -1499,7 +1504,7 @@ yyreduce: case 9: /* Line 1806 of yacc.c */ -#line 128 "fts0pars.y" +#line 133 "fts0pars.y" { (yyval.node) = (yyvsp[(1) - (1)].node); } @@ -1508,7 +1513,7 @@ yyreduce: case 10: /* Line 1806 of yacc.c */ -#line 132 "fts0pars.y" +#line 137 "fts0pars.y" { fts_ast_term_set_wildcard((yyvsp[(1) - (2)].node)); } @@ -1517,7 +1522,7 @@ yyreduce: case 11: /* Line 1806 of yacc.c */ -#line 136 "fts0pars.y" +#line 141 "fts0pars.y" { fts_ast_term_set_distance((yyvsp[(1) - (3)].node), strtoul((yyvsp[(3) - (3)].token), NULL, 10)); free((yyvsp[(3) - (3)].token)); @@ -1527,7 +1532,7 @@ yyreduce: case 12: /* Line 1806 of yacc.c */ -#line 141 "fts0pars.y" +#line 146 "fts0pars.y" { (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (3)].node)); fts_ast_add_node((yyval.node), (yyvsp[(2) - (3)].node)); @@ -1538,7 +1543,7 @@ yyreduce: case 13: /* Line 1806 of yacc.c */ -#line 147 "fts0pars.y" +#line 152 "fts0pars.y" { (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node)); fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node)); @@ -1548,7 +1553,7 @@ yyreduce: case 14: /* Line 1806 of yacc.c */ -#line 152 "fts0pars.y" +#line 157 "fts0pars.y" { (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (4)].node)); fts_ast_add_node((yyval.node), (yyvsp[(2) - (4)].node)); @@ -1560,7 +1565,7 @@ yyreduce: case 15: /* Line 1806 of yacc.c */ -#line 159 "fts0pars.y" +#line 164 "fts0pars.y" { (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node)); fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node)); @@ -1570,7 +1575,7 @@ yyreduce: case 16: /* Line 1806 of yacc.c */ -#line 165 "fts0pars.y" +#line 170 "fts0pars.y" { (yyval.node) = fts_ast_create_node_oper(state, FTS_IGNORE); } @@ -1579,7 +1584,7 @@ yyreduce: case 17: /* Line 1806 of yacc.c */ -#line 169 "fts0pars.y" +#line 174 "fts0pars.y" { (yyval.node) = fts_ast_create_node_oper(state, FTS_EXIST); } @@ -1588,7 +1593,7 @@ yyreduce: case 18: /* Line 1806 of yacc.c */ -#line 173 "fts0pars.y" +#line 178 "fts0pars.y" { (yyval.node) = fts_ast_create_node_oper(state, FTS_NEGATE); } @@ -1597,7 +1602,7 @@ yyreduce: case 19: /* Line 1806 of yacc.c */ -#line 177 "fts0pars.y" +#line 182 "fts0pars.y" { (yyval.node) = fts_ast_create_node_oper(state, FTS_DECR_RATING); } @@ -1606,7 +1611,7 @@ yyreduce: case 20: /* Line 1806 of yacc.c */ -#line 181 "fts0pars.y" +#line 186 "fts0pars.y" { (yyval.node) = fts_ast_create_node_oper(state, FTS_INCR_RATING); } @@ -1615,7 +1620,7 @@ yyreduce: case 21: /* Line 1806 of yacc.c */ -#line 186 "fts0pars.y" +#line 191 "fts0pars.y" { (yyval.node) = fts_ast_create_node_term(state, (yyvsp[(1) - (1)].token)); free((yyvsp[(1) - (1)].token)); @@ -1625,7 +1630,7 @@ yyreduce: case 22: /* Line 1806 of yacc.c */ -#line 191 "fts0pars.y" +#line 196 "fts0pars.y" { (yyval.node) = fts_ast_create_node_term(state, (yyvsp[(1) - (1)].token)); free((yyvsp[(1) - (1)].token)); @@ -1635,7 +1640,7 @@ yyreduce: case 23: /* Line 1806 of yacc.c */ -#line 197 "fts0pars.y" +#line 202 "fts0pars.y" { (yyval.node) = (yyvsp[(2) - (2)].node); } @@ -1644,7 +1649,7 @@ yyreduce: case 24: /* Line 1806 of yacc.c */ -#line 202 "fts0pars.y" +#line 207 "fts0pars.y" { (yyval.node) = fts_ast_create_node_text(state, (yyvsp[(1) - (1)].token)); free((yyvsp[(1) - (1)].token)); @@ -1654,7 +1659,7 @@ yyreduce: /* Line 1806 of yacc.c */ -#line 1658 "fts0pars.cc" +#line 1663 "fts0pars.cc" default: break; } /* User semantic actions sometimes alter yychar, and that requires @@ -1885,7 +1890,7 @@ yyreturn: /* Line 2067 of yacc.c */ -#line 207 "fts0pars.y" +#line 212 "fts0pars.y" /******************************************************************** diff --git a/storage/xtradb/fts/fts0pars.y b/storage/xtradb/fts/fts0pars.y index 73d71bc87c5..ff22e9a9873 100644 --- a/storage/xtradb/fts/fts0pars.y +++ b/storage/xtradb/fts/fts0pars.y @@ -101,7 +101,7 @@ expr_lst: /* Empty */ { $$ = fts_ast_create_node_list(state, $1); if (!$$) { - $$ = fts_ast_create_node_subexp_list(state, $2); + $$ = $2; } else { fts_ast_add_node($$, $2); } @@ -110,13 +110,18 @@ expr_lst: /* Empty */ { sub_expr: '(' expr_lst ')' { $$ = $2; + + if ($$) { + $$ = fts_ast_create_node_subexp_list(state, $$); + } } | prefix '(' expr_lst ')' { - $$ = fts_ast_create_node_subexp_list(state, $1); + $$ = fts_ast_create_node_list(state, $1); if ($3) { - fts_ast_add_node($$, $3); + fts_ast_add_node($$, + fts_ast_create_node_subexp_list(state, $3)); } } ; diff --git a/storage/xtradb/fts/fts0que.cc b/storage/xtradb/fts/fts0que.cc index 7da60c0d166..189c43768cd 100644 --- a/storage/xtradb/fts/fts0que.cc +++ b/storage/xtradb/fts/fts0que.cc @@ -40,9 +40,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang #include "fts0vlc.ic" #endif -#include <string> #include <vector> -#include <map> #define FTS_ELEM(t, n, i, j) (t[(i) * n + (j)]) @@ -66,8 +64,7 @@ static const double FTS_NORMALIZE_COEFF = 0.0115F; // FIXME: Need to have a generic iterator that traverses the ilist. -typedef std::map<std::string, ulint> word_map_t; -typedef std::vector<std::string> word_vector_t; +typedef std::vector<fts_string_t> word_vector_t; struct fts_word_freq_t; @@ -92,7 +89,7 @@ struct fts_query_t { fts_ast_node_t* cur_node; /*!< Current tree node */ - word_map_t* word_map; /*!< Matched word map for + ib_rbt_t* word_map; /*!< Matched word map for searching by word*/ word_vector_t* word_vector; /*!< Matched word vector for @@ -229,7 +226,7 @@ struct fts_doc_freq_t { /** To determine the word frequency per document. */ struct fts_word_freq_t { - byte* word; /*!< Word for which we need the freq, + fts_string_t word; /*!< Word for which we need the freq, it's allocated on the query heap */ ib_rbt_t* doc_freqs; /*!< RB Tree for storing per document @@ -257,15 +254,14 @@ static dberr_t fts_query_filter_doc_ids( /*=====================*/ - fts_query_t* query, /*!< in: query instance */ - const byte* word, /*!< in: the current word */ - fts_word_freq_t*word_freq, /*!< in/out: word frequency */ - const fts_node_t* - node, /*!< in: current FTS node */ - void* data, /*!< in: doc id ilist */ - ulint len, /*!< in: doc id ilist size */ - ibool calc_doc_count);/*!< in: whether to remember doc - count */ + fts_query_t* query, /*!< in: query instance */ + const fts_string_t* word, /*!< in: the current word */ + fts_word_freq_t* word_freq, /*!< in/out: word frequency */ + const fts_node_t* node, /*!< in: current FTS node */ + void* data, /*!< in: doc id ilist */ + ulint len, /*!< in: doc id ilist size */ + ibool calc_doc_count);/*!< in: whether to remember doc + count */ #if 0 /*****************************************************************//*** @@ -575,27 +571,41 @@ static void fts_ranking_words_add( /*==================*/ - fts_query_t* query, /*!< in: query instance */ - fts_ranking_t* ranking, /*!< in: ranking instance */ - const char* word) /*!< in: term/word to add */ + fts_query_t* query, /*!< in: query instance */ + fts_ranking_t* ranking, /*!< in: ranking instance */ + const fts_string_t* word) /*!< in: term/word to add */ { ulint pos; ulint byte_offset; ulint bit_offset; - word_map_t::iterator it; - - /* Note: we suppose the word map and vector are append-only */ - /* Check if need to add it to word map */ - it = query->word_map->lower_bound(word); - if (it != query->word_map->end() - && !query->word_map->key_comp()(word, it->first)) { - pos = it->second; + ib_rbt_bound_t parent; + + /* Note: we suppose the word map and vector are append-only. */ + ut_ad(query->word_vector->size() == rbt_size(query->word_map)); + + /* We use ib_rbt to simulate a map, f_n_char means position. */ + if (rbt_search(query->word_map, &parent, word) == 0) { + fts_string_t* result_word; + + result_word = rbt_value(fts_string_t, parent.last); + pos = result_word->f_n_char; + ut_ad(pos < rbt_size(query->word_map)); } else { - pos = query->word_map->size(); - query->word_map->insert(it, - std::pair<std::string, ulint>(word, pos)); + /* Add the word to map. */ + fts_string_t new_word; - query->word_vector->push_back(word); + pos = rbt_size(query->word_map); + + new_word.f_str = static_cast<byte*>(mem_heap_alloc(query->heap, + word->f_len + 1)); + memcpy(new_word.f_str, word->f_str, word->f_len); + new_word.f_str[word->f_len] = 0; + new_word.f_len = word->f_len; + new_word.f_n_char = pos; + + rbt_add_node(query->word_map, &parent, &new_word); + ut_ad(rbt_validate(query->word_map)); + query->word_vector->push_back(new_word); } /* Check words len */ @@ -630,7 +640,7 @@ fts_ranking_words_get_next( const fts_query_t* query, /*!< in: query instance */ fts_ranking_t* ranking,/*!< in: ranking instance */ ulint* pos, /*!< in/out: word start pos */ - byte** word) /*!< in/out: term/word to add */ + fts_string_t* word) /*!< in/out: term/word to add */ { bool ret = false; ulint max_pos = ranking->words_len * CHAR_BIT; @@ -651,7 +661,7 @@ fts_ranking_words_get_next( /* Get next word from word vector */ if (ret) { ut_ad(*pos < query->word_vector->size()); - *word = (byte*)query->word_vector->at((size_t)*pos).c_str(); + *word = query->word_vector->at((size_t)*pos); *pos += 1; } @@ -666,23 +676,22 @@ static fts_word_freq_t* fts_query_add_word_freq( /*====================*/ - fts_query_t* query, /*!< in: query instance */ - const byte* word) /*!< in: term/word to add */ + fts_query_t* query, /*!< in: query instance */ + const fts_string_t* word) /*!< in: term/word to add */ { ib_rbt_bound_t parent; /* Lookup the word in our rb tree and add if it doesn't exist. */ if (rbt_search(query->word_freqs, &parent, word) != 0) { fts_word_freq_t word_freq; - ulint len = ut_strlen((char*) word) + 1; memset(&word_freq, 0, sizeof(word_freq)); - word_freq.word = static_cast<byte*>( - mem_heap_alloc(query->heap, len)); - - /* Need to copy the NUL character too. */ - memcpy(word_freq.word, word, len); + word_freq.word.f_str = static_cast<byte*>( + mem_heap_alloc(query->heap, word->f_len + 1)); + memcpy(word_freq.word.f_str, word->f_str, word->f_len); + word_freq.word.f_str[word->f_len] = 0; + word_freq.word.f_len = word->f_len; word_freq.doc_count = 0; @@ -692,7 +701,7 @@ fts_query_add_word_freq( parent.last = rbt_add_node( query->word_freqs, &parent, &word_freq); - query->total_size += len + query->total_size += word->f_len + SIZEOF_RBT_CREATE + SIZEOF_RBT_NODE_ADD + sizeof(fts_word_freq_t); @@ -956,7 +965,7 @@ fts_query_add_word_to_document( /*===========================*/ fts_query_t* query, /*!< in: query to update */ doc_id_t doc_id, /*!< in: the document to update */ - const byte* word) /*!< in: the token to add */ + const fts_string_t* word) /*!< in: the token to add */ { ib_rbt_bound_t parent; fts_ranking_t* ranking = NULL; @@ -980,7 +989,7 @@ fts_query_add_word_to_document( } if (ranking != NULL) { - fts_ranking_words_add(query, ranking, (char*)word); + fts_ranking_words_add(query, ranking, word); } } @@ -1010,13 +1019,13 @@ fts_query_check_node( fts_word_freq_t*word_freqs; /* The word must exist. */ - ret = rbt_search(query->word_freqs, &parent, token->f_str); + ret = rbt_search(query->word_freqs, &parent, token); ut_a(ret == 0); word_freqs = rbt_value(fts_word_freq_t, parent.last); query->error = fts_query_filter_doc_ids( - query, token->f_str, word_freqs, node, + query, token, word_freqs, node, node->ilist, ilist_size, TRUE); } } @@ -1073,7 +1082,7 @@ fts_cache_find_wildcard( ret = rbt_search(query->word_freqs, &freq_parent, - srch_text.f_str); + &srch_text); ut_a(ret == 0); @@ -1082,7 +1091,7 @@ fts_cache_find_wildcard( freq_parent.last); query->error = fts_query_filter_doc_ids( - query, srch_text.f_str, + query, &srch_text, word_freqs, node, node->ilist, node->ilist_size, TRUE); @@ -1542,7 +1551,7 @@ fts_merge_doc_ids( for (node = rbt_first(doc_ids); node; node = rbt_next(doc_ids, node)) { fts_ranking_t* ranking; ulint pos = 0; - byte* word = NULL; + fts_string_t word; ranking = rbt_value(fts_ranking_t, node); @@ -1557,7 +1566,7 @@ fts_merge_doc_ids( ut_a(ranking->words); while (fts_ranking_words_get_next(query, ranking, &pos, &word)) { fts_query_add_word_to_document(query, ranking->doc_id, - word); + &word); } } @@ -2472,8 +2481,7 @@ fts_query_search_phrase( token = static_cast<fts_string_t*>( ib_vector_get(tokens, z)); fts_query_add_word_to_document( - query, match->doc_id, - token->f_str); + query, match->doc_id, token); } } } @@ -2562,7 +2570,7 @@ fts_query_phrase_search( && result_str.f_n_char <= fts_max_token_size) { /* Add the word to the RB tree so that we can calculate it's frequencey within a document. */ - fts_query_add_word_freq(query, token->f_str); + fts_query_add_word_freq(query, token); } else { ib_vector_pop(tokens); } @@ -2687,7 +2695,7 @@ fts_query_phrase_search( } fts_query_add_word_to_document( - query, match->doc_id, token->f_str); + query, match->doc_id, token); } query->oper = oper; goto func_exit; @@ -2837,6 +2845,8 @@ fts_query_visitor( ut_ad(query->intersection == NULL); query->intersection = rbt_create( sizeof(fts_ranking_t), fts_ranking_doc_id_cmp); + + query->total_size += SIZEOF_RBT_CREATE; } /* Set the current proximity distance. */ @@ -2858,10 +2868,12 @@ fts_query_visitor( break; case FTS_AST_TERM: + token.f_str = node->term.ptr; + token.f_len = ut_strlen(reinterpret_cast<char*>(token.f_str)); /* Add the word to our RB tree that will be used to calculate this terms per document frequency. */ - fts_query_add_word_freq(query, node->term.ptr); + fts_query_add_word_freq(query, &token); ptr = fts_query_get_token(node, &token); query->error = fts_query_execute(query, &token); @@ -2871,6 +2883,10 @@ fts_query_visitor( } break; + case FTS_AST_SUBEXP_LIST: + query->error = fts_ast_visit_sub_exp(node, fts_query_visitor, arg); + break; + default: ut_error; } @@ -2905,13 +2921,7 @@ fts_ast_visit_sub_exp( ut_a(node->type == FTS_AST_SUBEXP_LIST); - node = node->list.head; - - if (!node || !node->next) { - return(error); - } - - cur_oper = node->oper; + cur_oper = query->oper; /* Save current result set */ parent_doc_ids = query->doc_ids; @@ -2927,26 +2937,20 @@ fts_ast_visit_sub_exp( query->multi_exist = false; /* Process nodes in current sub-expression and store its result set in query->doc_ids we created above. */ - error = fts_ast_visit(FTS_NONE, node->next, visitor, + error = fts_ast_visit(FTS_NONE, node, visitor, arg, &will_be_ignored); - /* Reinstate parent node state and prepare for merge. */ + /* Reinstate parent node state */ query->multi_exist = multi_exist; query->oper = cur_oper; - subexpr_doc_ids = query->doc_ids; - - /* Restore current result set. */ - query->doc_ids = parent_doc_ids; /* Merge the sub-expression result with the parent result set. */ + subexpr_doc_ids = query->doc_ids; + query->doc_ids = parent_doc_ids; if (error == DB_SUCCESS && !rbt_empty(subexpr_doc_ids)) { error = fts_merge_doc_ids(query, subexpr_doc_ids); } - if (query->oper == FTS_EXIST) { - query->multi_exist = true; - } - /* Free current result set. Result already merged into parent. */ fts_query_free_doc_ids(query, subexpr_doc_ids); @@ -3033,14 +3037,13 @@ static dberr_t fts_query_filter_doc_ids( /*=====================*/ - fts_query_t* query, /*!< in: query instance */ - const byte* word, /*!< in: the current word */ - fts_word_freq_t*word_freq, /*!< in/out: word frequency */ - const fts_node_t* - node, /*!< in: current FTS node */ - void* data, /*!< in: doc id ilist */ - ulint len, /*!< in: doc id ilist size */ - ibool calc_doc_count) /*!< in: whether to remember doc count */ + fts_query_t* query, /*!< in: query instance */ + const fts_string_t* word, /*!< in: the current word */ + fts_word_freq_t* word_freq, /*!< in/out: word frequency */ + const fts_node_t* node, /*!< in: current FTS node */ + void* data, /*!< in: doc id ilist */ + ulint len, /*!< in: doc id ilist size */ + ibool calc_doc_count) /*!< in: whether to remember doc count */ { byte* ptr = static_cast<byte*>(data); doc_id_t doc_id = 0; @@ -3163,13 +3166,15 @@ fts_query_read_node( ib_rbt_bound_t parent; fts_word_freq_t* word_freq; ibool skip = FALSE; - byte term[FTS_MAX_WORD_LEN + 1]; + fts_string_t term; + byte buf[FTS_MAX_WORD_LEN + 1]; dberr_t error = DB_SUCCESS; ut_a(query->cur_node->type == FTS_AST_TERM || query->cur_node->type == FTS_AST_TEXT); memset(&node, 0, sizeof(node)); + term.f_str = buf; /* Need to consider the wildcard search case, the word frequency is created on the search string not the actual word. So we need @@ -3179,15 +3184,18 @@ fts_query_read_node( /* These cast are safe since we only care about the terminating NUL character as an end of string marker. */ - ut_strcpy((char*) term, (char*) query->cur_node->term.ptr); + term.f_len = ut_strlen(reinterpret_cast<char*> + (query->cur_node->term.ptr)); + ut_ad(FTS_MAX_WORD_LEN >= term.f_len); + memcpy(term.f_str, query->cur_node->term.ptr, term.f_len); } else { - /* Need to copy the NUL character too. */ - memcpy(term, word->f_str, word->f_len); - term[word->f_len] = 0; + term.f_len = word->f_len; + ut_ad(FTS_MAX_WORD_LEN >= word->f_len); + memcpy(term.f_str, word->f_str, word->f_len); } /* Lookup the word in our rb tree, it must exist. */ - ret = rbt_search(query->word_freqs, &parent, term); + ret = rbt_search(query->word_freqs, &parent, &term); ut_a(ret == 0); @@ -3239,7 +3247,7 @@ fts_query_read_node( case 4: /* ILIST */ error = fts_query_filter_doc_ids( - query, word_freq->word, word_freq, + query, &word_freq->word, word_freq, &node, data, len, FALSE); break; @@ -3332,7 +3340,7 @@ fts_query_calculate_idf( if (fts_enable_diag_print) { fprintf(stderr,"'%s' -> " UINT64PF "/" UINT64PF " %6.5lf\n", - word_freq->word, + word_freq->word.f_str, query->total_docs, word_freq->doc_count, word_freq->idf); } @@ -3349,12 +3357,12 @@ fts_query_calculate_ranking( fts_ranking_t* ranking) /*!< in: Document to rank */ { ulint pos = 0; - byte* word = NULL; + fts_string_t word; /* At this stage, ranking->rank should not exceed the 1.0 bound */ ut_ad(ranking->rank <= 1.0 && ranking->rank >= -1.0); - ut_ad(query->word_map->size() == query->word_vector->size()); + ut_ad(rbt_size(query->word_map) == query->word_vector->size()); while (fts_ranking_words_get_next(query, ranking, &pos, &word)) { int ret; @@ -3363,8 +3371,7 @@ fts_query_calculate_ranking( fts_doc_freq_t* doc_freq; fts_word_freq_t* word_freq; - ut_ad(word != NULL); - ret = rbt_search(query->word_freqs, &parent, word); + ret = rbt_search(query->word_freqs, &parent, &word); /* It must exist. */ ut_a(ret == 0); @@ -3620,18 +3627,18 @@ fts_query_free( ut_a(!query->intersection); - if (query->heap) { - mem_heap_free(query->heap); - } - if (query->word_map) { - delete query->word_map; + rbt_free(query->word_map); } if (query->word_vector) { delete query->word_vector; } + if (query->heap) { + mem_heap_free(query->heap); + } + memset(query, 0, sizeof(*query)); } @@ -3820,6 +3827,7 @@ fts_query( query.fts_common_table.type = FTS_COMMON_TABLE; query.fts_common_table.table_id = index->table->id; query.fts_common_table.parent = index->table->name; + query.fts_common_table.table = index->table; charset = fts_index_get_charset(index); @@ -3828,15 +3836,17 @@ fts_query( query.fts_index_table.table_id = index->table->id; query.fts_index_table.parent = index->table->name; query.fts_index_table.charset = charset; + query.fts_index_table.table = index->table; - query.word_map = new word_map_t; + query.word_map = rbt_create_arg_cmp( + sizeof(fts_string_t), innobase_fts_text_cmp, (void*) charset); query.word_vector = new word_vector_t; query.error = DB_SUCCESS; /* Setup the RB tree that will be used to collect per term statistics. */ query.word_freqs = rbt_create_arg_cmp( - sizeof(fts_word_freq_t), innobase_fts_string_cmp, (void*) charset); + sizeof(fts_word_freq_t), innobase_fts_text_cmp, (void*) charset); query.total_size += SIZEOF_RBT_CREATE; @@ -4060,13 +4070,14 @@ fts_print_doc_id( fts_ranking_t* ranking; ranking = rbt_value(fts_ranking_t, node); - fprintf(stderr, "doc_ids info, doc_id: %ld \n", + ib_logf(IB_LOG_LEVEL_INFO, "doc_ids info, doc_id: %ld \n", (ulint) ranking->doc_id); - ulint pos = 0; - byte* value = NULL; - while (fts_ranking_words_get_next(query, ranking, &pos, &value)) { - fprintf(stderr, "doc_ids info, value: %s \n", value); + ulint pos = 0; + fts_string_t word; + + while (fts_ranking_words_get_next(query, ranking, &pos, &word)) { + ib_logf(IB_LOG_LEVEL_INFO, "doc_ids info, value: %s \n", word.f_str); } } } @@ -4122,7 +4133,7 @@ fts_expand_query( fts_ranking_t* ranking; ulint pos; - byte* word; + fts_string_t word; ulint prev_token_size; ulint estimate_size; @@ -4144,22 +4155,17 @@ fts_expand_query( /* Remove words that have already been searched in the first pass */ pos = 0; - word = NULL; while (fts_ranking_words_get_next(query, ranking, &pos, - &word)) { - fts_string_t str; + &word)) { ibool ret; - /* FIXME: We are discarding a const qualifier here. */ - str.f_str = word; - str.f_len = ut_strlen((const char*) str.f_str); - ret = rbt_delete(result_doc.tokens, &str); + ret = rbt_delete(result_doc.tokens, &word); /* The word must exist in the doc we found */ if (!ret) { - fprintf(stderr, " InnoDB: Error: Did not " + ib_logf(IB_LOG_LEVEL_ERROR, "Did not " "find word %s in doc %ld for query " - "expansion search.\n", str.f_str, + "expansion search.\n", word.f_str, (ulint) ranking->doc_id); } } @@ -4184,7 +4190,8 @@ fts_expand_query( fts_token_t* mytoken; mytoken = rbt_value(fts_token_t, token_node); - fts_query_add_word_freq(query, mytoken->text.f_str); + ut_ad(mytoken->text.f_str[mytoken->text.f_len] == 0); + fts_query_add_word_freq(query, &mytoken->text); error = fts_query_union(query, &mytoken->text); if (error != DB_SUCCESS) { @@ -4323,8 +4330,7 @@ fts_phrase_or_proximity_search( token = static_cast<fts_string_t*>( ib_vector_get(tokens, z)); fts_query_add_word_to_document( - query, match[0]->doc_id, - token->f_str); + query, match[0]->doc_id, token); } } } diff --git a/storage/xtradb/fts/fts0sql.cc b/storage/xtradb/fts/fts0sql.cc index 03c19d93af6..14bc3ec44c9 100644 --- a/storage/xtradb/fts/fts0sql.cc +++ b/storage/xtradb/fts/fts0sql.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -61,21 +61,28 @@ fts_get_table_id( long */ { int len; + bool hex_name = DICT_TF2_FLAG_IS_SET(fts_table->table, + DICT_TF2_FTS_AUX_HEX_NAME); + + ut_a(fts_table->table != NULL); switch (fts_table->type) { case FTS_COMMON_TABLE: - len = fts_write_object_id(fts_table->table_id, table_id); + len = fts_write_object_id(fts_table->table_id, table_id, + hex_name); break; case FTS_INDEX_TABLE: - len = fts_write_object_id(fts_table->table_id, table_id); + len = fts_write_object_id(fts_table->table_id, table_id, + hex_name); table_id[len] = '_'; ++len; table_id += len; - len += fts_write_object_id(fts_table->index_id, table_id); + len += fts_write_object_id(fts_table->index_id, table_id, + hex_name); break; default: @@ -191,7 +198,7 @@ fts_parse_sql( str = ut_str3cat(fts_sql_begin, str_tmp, fts_sql_end); mem_free(str_tmp); - dict_locked = (fts_table && fts_table->table + dict_locked = (fts_table && fts_table->table->fts && (fts_table->table->fts->fts_status & TABLE_DICT_LOCKED)); diff --git a/storage/xtradb/fts/fts0tlex.cc b/storage/xtradb/fts/fts0tlex.cc index 717ddb8a77e..f78456d8795 100644 --- a/storage/xtradb/fts/fts0tlex.cc +++ b/storage/xtradb/fts/fts0tlex.cc @@ -305,9 +305,9 @@ YY_BUFFER_STATE fts0t_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner YY_BUFFER_STATE fts0t_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); YY_BUFFER_STATE fts0t_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner ); -void *fts0talloc (yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); -void *fts0trealloc (void *,yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); -void fts0tfree (void * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); +void *fts0talloc (yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); +void *fts0trealloc (void *,yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); +void fts0tfree (void * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); #define yy_new_buffer fts0t_create_buffer @@ -347,7 +347,7 @@ typedef int yy_state_type; static yy_state_type yy_get_previous_state (yyscan_t yyscanner ); static yy_state_type yy_try_NUL_trans (yy_state_type current_state ,yyscan_t yyscanner); static int yy_get_next_buffer (yyscan_t yyscanner ); -static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); +static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); /* Done after the current pattern has been matched and before the * corresponding action - sets up yytext. @@ -575,11 +575,11 @@ extern int fts0twrap (yyscan_t yyscanner ); #endif #ifndef yytext_ptr -static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))); +static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))); #endif #ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))); +static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))); #endif #ifndef YY_NO_INPUT @@ -1601,7 +1601,7 @@ YY_BUFFER_STATE fts0t_scan_bytes (yyconst char * yybytes, int _yybytes_len , y #define YY_EXIT_FAILURE 2 #endif -static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { (void) fprintf( stderr, "%s\n", msg ); exit( YY_EXIT_FAILURE ); @@ -1902,7 +1902,7 @@ int fts0tlex_destroy (yyscan_t yyscanner) */ #ifndef yytext_ptr -static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { register int i; for ( i = 0; i < n; ++i ) @@ -1911,7 +1911,7 @@ static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t #endif #ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { register int n; for ( n = 0; s[n]; ++n ) @@ -1921,12 +1921,12 @@ static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribu } #endif -void *fts0talloc (yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +void *fts0talloc (yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { return (void *) malloc( size ); } -void *fts0trealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +void *fts0trealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { /* The cast to (char *) in the following accommodates both * implementations that use char* generic pointers, and those @@ -1938,7 +1938,7 @@ void *fts0trealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __at return (void *) realloc( (char *) ptr, size ); } -void fts0tfree (void * ptr , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +void fts0tfree (void * ptr , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { free( (char *) ptr ); /* see fts0trealloc() for (char *) cast */ } diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 1b3a4805e78..b90806724fe 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -469,6 +469,7 @@ static PSI_thread_info all_innodb_threads[] = { {&srv_master_thread_key, "srv_master_thread", 0}, {&srv_purge_thread_key, "srv_purge_thread", 0}, {&buf_page_cleaner_thread_key, "page_cleaner_thread", 0}, + {&buf_lru_manager_thread_key, "lru_manager_thread", 0}, {&recv_writer_thread_key, "recv_writer_thread", 0}, {&srv_log_tracking_thread_key, "srv_redo_log_follow_thread", 0} }; @@ -547,6 +548,7 @@ ib_cb_t innodb_api_cb[] = { (ib_cb_t) ib_cursor_open_index_using_name, (ib_cb_t) ib_close_thd, (ib_cb_t) ib_cfg_get_cfg, + (ib_cb_t) ib_cursor_set_memcached_sync, (ib_cb_t) ib_cursor_set_cluster_access, (ib_cb_t) ib_cursor_commit_trx, (ib_cb_t) ib_cfg_trx_level, @@ -965,6 +967,19 @@ innobase_rollback_to_savepoint( void* savepoint); /*!< in: savepoint data */ /*****************************************************************//** +Check whether innodb state allows to safely release MDL locks after +rollback to savepoint. +@return true if it is safe, false if its not safe. */ +static +bool +innobase_rollback_to_savepoint_can_release_mdl( +/*===========================================*/ + handlerton* hton, /*!< in/out: InnoDB handlerton */ + THD* thd); /*!< in: handle to the MySQL thread of + the user whose XA transaction should + be rolled back to savepoint */ + +/*****************************************************************//** Sets a transaction savepoint. @return always 0, that is, always succeeds */ static @@ -3190,6 +3205,8 @@ innobase_init( innobase_hton->close_connection = innobase_close_connection; innobase_hton->savepoint_set = innobase_savepoint; innobase_hton->savepoint_rollback = innobase_rollback_to_savepoint; + innobase_hton->savepoint_rollback_can_release_mdl = + innobase_rollback_to_savepoint_can_release_mdl; innobase_hton->savepoint_release = innobase_release_savepoint; innobase_hton->commit_ordered=innobase_commit_ordered; innobase_hton->commit = innobase_commit; @@ -4462,6 +4479,38 @@ innobase_rollback_to_savepoint( } /*****************************************************************//** +Check whether innodb state allows to safely release MDL locks after +rollback to savepoint. +When binlog is on, MDL locks acquired after savepoint unit are not +released if there are any locks held in InnoDB. +@return true if it is safe, false if its not safe. */ +static +bool +innobase_rollback_to_savepoint_can_release_mdl( +/*===========================================*/ + handlerton* hton, /*!< in: InnoDB handlerton */ + THD* thd) /*!< in: handle to the MySQL thread + of the user whose transaction should + be rolled back to savepoint */ +{ + trx_t* trx; + + DBUG_ENTER("innobase_rollback_to_savepoint_can_release_mdl"); + DBUG_ASSERT(hton == innodb_hton_ptr); + + trx = check_trx_exists(thd); + ut_ad(trx); + + /* If transaction has not acquired any locks then it is safe + to release MDL after rollback to savepoint */ + if (!(UT_LIST_GET_LEN(trx->lock.trx_locks))) { + DBUG_RETURN(true); + } + + DBUG_RETURN(false); +} + +/*****************************************************************//** Release transaction savepoint name. @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the given name */ @@ -6147,23 +6196,7 @@ innobase_fts_text_cmp_prefix( to negate the result */ return(-result); } -/******************************************************************//** -compare two character string according to their charset. */ -UNIV_INTERN -int -innobase_fts_string_cmp( -/*====================*/ - const void* cs, /*!< in: Character set */ - const void* p1, /*!< in: key */ - const void* p2) /*!< in: node */ -{ - const CHARSET_INFO* charset = (const CHARSET_INFO*) cs; - uchar* s1 = (uchar*) p1; - uchar* s2 = *(uchar**) p2; - return(ha_compare_text(charset, s1, strlen((const char*) s1), - s2, strlen((const char*) s2), 0, 0)); -} /******************************************************************//** Makes all characters in a string lower case. */ UNIV_INTERN @@ -9237,12 +9270,6 @@ ha_innobase::position( } } -/* limit innodb monitor access to users with PROCESS privilege. -See http://bugs.mysql.com/32710 for expl. why we choose PROCESS. */ -#define IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name, thd) \ - (row_is_magic_monitor_table(table_name) \ - && check_global_access(thd, PROCESS_ACL)) - /*****************************************************************//** Check whether there exist a column named as "FTS_DOC_ID", which is reserved for InnoDB FTS Doc ID @@ -9357,16 +9384,6 @@ create_table_def( DBUG_RETURN(ER_TABLE_NAME); } - /* table_name must contain '/'. Later in the code we assert if it - does not */ - if (strcmp(strchr(table_name, '/') + 1, - "innodb_table_monitor") == 0) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_WRONG_COMMAND, - DEPRECATED_MSG_INNODB_TABLE_MONITOR); - } - n_cols = form->s->fields; s_cols = form->s->stored_fields; @@ -9391,18 +9408,18 @@ create_table_def( /* Adjust for the FTS hidden field */ if (!has_doc_id_col) { table = dict_mem_table_create(table_name, 0, s_cols + 1, - flags, flags2); + flags, flags2, false); /* Set the hidden doc_id column. */ table->fts->doc_col = s_cols; } else { table = dict_mem_table_create(table_name, 0, s_cols, - flags, flags2); + flags, flags2, false); table->fts->doc_col = doc_id_col; } } else { table = dict_mem_table_create(table_name, 0, s_cols, - flags, flags2); + flags, flags2, false); } if (flags2 & DICT_TF2_TEMPORARY) { @@ -9740,6 +9757,7 @@ get_row_format_name( return("FIXED"); case ROW_TYPE_PAGE: case ROW_TYPE_NOT_USED: + default: break; } return("NOT USED"); @@ -9885,6 +9903,7 @@ create_options_are_invalid( case ROW_TYPE_FIXED: case ROW_TYPE_PAGE: case ROW_TYPE_NOT_USED: + default: push_warning( thd, Sql_condition::WARN_LEVEL_WARN, ER_ILLEGAL_HA_CREATE_OPTION, \ @@ -10252,6 +10271,7 @@ index_bad: case ROW_TYPE_NOT_USED: case ROW_TYPE_FIXED: case ROW_TYPE_PAGE: + default: push_warning( thd, Sql_condition::WARN_LEVEL_WARN, ER_ILLEGAL_HA_CREATE_OPTION, @@ -10282,6 +10302,11 @@ index_bad: *flags2 |= DICT_TF2_USE_TABLESPACE; } + /* Set the flags2 when create table or alter tables */ + *flags2 |= DICT_TF2_FTS_AUX_HEX_NAME; + DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name", + *flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;); + DBUG_RETURN(true); } @@ -10373,8 +10398,23 @@ ha_innobase::create( DBUG_RETURN(-1); } - if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(norm_name, thd)) { - DBUG_RETURN(HA_ERR_GENERIC); + if (row_is_magic_monitor_table(norm_name)) { + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + HA_ERR_WRONG_COMMAND, + "Using the table name %s to enable " + "diagnostic output is deprecated " + "and may be removed in future releases. " + "Use INFORMATION_SCHEMA or " + "PERFORMANCE_SCHEMA tables or " + "SET GLOBAL innodb_status_output=ON.", + dict_remove_db_name(norm_name)); + + /* Limit innodb monitor access to users with PROCESS privilege. + See http://bugs.mysql.com/32710 why we chose PROCESS. */ + if (check_global_access(thd, PROCESS_ACL)) { + DBUG_RETURN(HA_ERR_GENERIC); + } } /* Get the transaction associated with the current thd, or create one @@ -10836,7 +10876,8 @@ ha_innobase::delete_table( if (srv_read_only_mode) { DBUG_RETURN(HA_ERR_TABLE_READONLY); - } else if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(norm_name, thd)) { + } else if (row_is_magic_monitor_table(norm_name) + && check_global_access(thd, PROCESS_ACL)) { DBUG_RETURN(HA_ERR_GENERIC); } @@ -11085,17 +11126,7 @@ innobase_rename_table( } } - if (error != DB_SUCCESS) { - if (!srv_read_only_mode) { - FILE* ef = dict_foreign_err_file; - - fputs("InnoDB: Renaming table ", ef); - ut_print_name(ef, trx, TRUE, norm_from); - fputs(" to ", ef); - ut_print_name(ef, trx, TRUE, norm_to); - fputs(" failed!\n", ef); - } - } else { + if (error == DB_SUCCESS) { #ifndef __WIN__ sql_print_warning("Rename partition table %s " "succeeds after converting to lower " @@ -12101,7 +12132,8 @@ ha_innobase::optimize( calls to OPTIMIZE, which is undesirable. */ if (innodb_optimize_fulltext_only) { - if (prebuilt->table->fts && prebuilt->table->fts->cache) { + if (prebuilt->table->fts && prebuilt->table->fts->cache + && !dict_table_is_discarded(prebuilt->table)) { fts_sync_table(prebuilt->table); fts_optimize_table(prebuilt->table); } @@ -16105,7 +16137,7 @@ innodb_enable_monitor_at_startup( /****************************************************************//** Update the innodb_sched_priority_cleaner variable and set the thread -priority accordingly. */ +priorities accordingly. */ static void innodb_sched_priority_cleaner_update( @@ -16121,6 +16153,24 @@ innodb_sched_priority_cleaner_update( ulint priority = *static_cast<const ulint *>(save); ulint actual_priority; + /* Set the priority for the LRU manager thread */ + ut_ad(buf_lru_manager_is_active); + actual_priority = os_thread_set_priority(srv_lru_manager_tid, + priority); + if (UNIV_UNLIKELY(actual_priority != priority)) { + + push_warning_printf(thd, Sql_condition::WARN_LEVEL_WARN, + ER_WRONG_ARGUMENTS, + "Failed to set the LRU manager thread " + "priority to %lu, " + "the current priority is %lu", priority, + actual_priority); + } else { + + srv_sched_priority_cleaner = priority; + } + + /* Set the priority for the page cleaner thread */ if (srv_read_only_mode) { return; @@ -16136,9 +16186,6 @@ innodb_sched_priority_cleaner_update( "priority to %lu, " "the current priority is %lu", priority, actual_priority); - } else { - - srv_sched_priority_cleaner = priority; } } @@ -16443,6 +16490,7 @@ innobase_fts_find_ranking( static my_bool innodb_purge_run_now = TRUE; static my_bool innodb_purge_stop_now = TRUE; static my_bool innodb_log_checkpoint_now = TRUE; +static my_bool innodb_buf_flush_list_now = TRUE; static my_bool innodb_track_redo_log_now = TRUE; /****************************************************************//** @@ -16519,19 +16567,42 @@ checkpoint_now_set( } /****************************************************************//** +Force a dirty pages flush now. */ +static +void +buf_flush_list_now_set( +/*===================*/ + THD* thd /*!< in: thread handle */ + __attribute__((unused)), + struct st_mysql_sys_var* var /*!< in: pointer to system + variable */ + __attribute__((unused)), + void* var_ptr /*!< out: where the formal + string goes */ + __attribute__((unused)), + const void* save) /*!< in: immediate result from + check function */ +{ + if (*(my_bool*) save) { + buf_flush_list(ULINT_MAX, LSN_MAX, NULL); + buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); + } +} + +/****************************************************************//** Force log tracker to track the log synchronously. */ static void track_redo_log_now_set( /*===================*/ THD* thd /*!< in: thread handle */ - __attribute__((unused)), + __attribute__((unused)), struct st_mysql_sys_var* var /*!< in: pointer to system variable */ - __attribute__((unused)), + __attribute__((unused)), void* var_ptr /*!< out: where the formal string goes */ - __attribute__((unused)), + __attribute__((unused)), const void* save) /*!< in: immediate result from check function */ { @@ -16541,7 +16612,6 @@ track_redo_log_now_set( } } - #endif /* UNIV_DEBUG */ /*********************************************************************** @@ -16702,6 +16772,26 @@ buffer_pool_load_abort( } } +/** Update innodb_status_output or innodb_status_output_locks, +which control InnoDB "status monitor" output to the error log. +@param[in] thd thread handle +@param[in] var system variable +@param[out] var_ptr current value +@param[in] save to-be-assigned value */ +static +void +innodb_status_output_update( + THD* thd __attribute__((unused)), + struct st_mysql_sys_var* var __attribute__((unused)), + void* var_ptr __attribute__((unused)), + const void* save __attribute__((unused))) +{ + *static_cast<my_bool*>(var_ptr) = *static_cast<const my_bool*>(save); + /* The lock timeout monitor thread also takes care of this + output. */ + os_event_set(lock_sys->timeout_event); +} + static SHOW_VAR innodb_status_variables_export[]= { {"Innodb", (char*) &show_innodb_vars, SHOW_FUNC}, {NullS, NullS, SHOW_LONG} @@ -16830,11 +16920,17 @@ static MYSQL_SYSVAR_BOOL(log_checkpoint_now, innodb_log_checkpoint_now, "Force checkpoint now", NULL, checkpoint_now_set, FALSE); +static MYSQL_SYSVAR_BOOL(buf_flush_list_now, innodb_buf_flush_list_now, + PLUGIN_VAR_OPCMDARG, + "Force dirty page flush now", + NULL, buf_flush_list_now_set, FALSE); + static MYSQL_SYSVAR_BOOL(track_redo_log_now, innodb_track_redo_log_now, PLUGIN_VAR_OPCMDARG, "Force log tracker to catch up with checkpoint now", NULL, track_redo_log_now_set, FALSE); + #endif /* UNIV_DEBUG */ static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size, @@ -17137,7 +17233,7 @@ static MYSQL_SYSVAR_ENUM(foreground_preflush, srv_foreground_preflush, static MYSQL_SYSVAR_ULONG(sched_priority_cleaner, srv_sched_priority_cleaner, PLUGIN_VAR_RQCMDARG, - "Nice value for the cleaner thread scheduling", + "Nice value for the cleaner and LRU manager thread scheduling", NULL, innodb_sched_priority_cleaner_update, 19, 0, 39, 0); #endif /* UNIV_LINUX */ @@ -17182,7 +17278,8 @@ static MYSQL_SYSVAR_BOOL(priority_io, srv_io_thread_priority, static MYSQL_SYSVAR_BOOL(priority_cleaner, srv_cleaner_thread_priority, PLUGIN_VAR_OPCMDARG, - "Make buffer pool cleaner thread acquire shared resources with priority", + "Make buffer pool cleaner and LRU manager threads acquire shared resources " + "with priority", NULL, NULL, FALSE); static MYSQL_SYSVAR_BOOL(priority_master, srv_master_thread_priority, @@ -17684,6 +17781,15 @@ static MYSQL_SYSVAR_STR(monitor_reset_all, innobase_reset_all_monitor_counter, innodb_monitor_validate, innodb_reset_all_monitor_update, NULL); +static MYSQL_SYSVAR_BOOL(status_output, srv_print_innodb_monitor, + PLUGIN_VAR_OPCMDARG, "Enable InnoDB monitor output to the error log.", + NULL, innodb_status_output_update, FALSE); + +static MYSQL_SYSVAR_BOOL(status_output_locks, srv_print_innodb_lock_monitor, + PLUGIN_VAR_OPCMDARG, "Enable InnoDB lock monitor output to the error log." + " Requires innodb_status_output=ON.", + NULL, innodb_status_output_update, FALSE); + static MYSQL_SYSVAR_BOOL(print_all_deadlocks, srv_print_all_deadlocks, PLUGIN_VAR_OPCMDARG, "Print all deadlocks to MySQL error log (off by default)", @@ -17909,6 +18015,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(purge_run_now), MYSQL_SYSVAR(purge_stop_now), MYSQL_SYSVAR(log_checkpoint_now), + MYSQL_SYSVAR(buf_flush_list_now), MYSQL_SYSVAR(track_redo_log_now), #endif /* UNIV_DEBUG */ #ifdef UNIV_LINUX @@ -17933,6 +18040,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(cleaner_free_list_lwm), MYSQL_SYSVAR(cleaner_eviction_factor), #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */ + MYSQL_SYSVAR(status_output), + MYSQL_SYSVAR(status_output_locks), MYSQL_SYSVAR(cleaner_lsn_age_factor), MYSQL_SYSVAR(foreground_preflush), MYSQL_SYSVAR(empty_free_list_algorithm), @@ -18294,7 +18403,9 @@ ib_errf( str[size - 1] = 0x0; vsnprintf(str, size, format, args); #elif HAVE_VASPRINTF - (void) vasprintf(&str, format, args); + int ret; + ret = vasprintf(&str, format, args); + ut_a(ret != -1); #else /* Use a fixed length string. */ str = static_cast<char*>(malloc(BUFSIZ)); @@ -18328,7 +18439,9 @@ ib_logf( str[size - 1] = 0x0; vsnprintf(str, size, format, args); #elif HAVE_VASPRINTF - (void) vasprintf(&str, format, args); + int ret; + ret = vasprintf(&str, format, args); + ut_a(ret != -1); #else /* Use a fixed length string. */ str = static_cast<char*>(malloc(BUFSIZ)); diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index 21859cb5447..13c6752ce8f 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -46,7 +46,7 @@ Smart ALTER TABLE #include "srv0mon.h" #include "fts0priv.h" #include "pars0pars.h" - +#include "row0sel.h" #include "ha_innodb.h" /** Operations for creating secondary indexes (no rebuild needed) */ @@ -240,6 +240,7 @@ ha_innobase::check_if_supported_inplace_alter( innobase_get_err_msg(ER_READ_ONLY_MODE); DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); } else if (srv_created_new_raw || srv_force_recovery) { + ha_alter_info->unsupported_reason = innobase_get_err_msg(ER_READ_ONLY_MODE); DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); @@ -2532,15 +2533,16 @@ innobase_drop_fts_index_table( /** Get the new column names if any columns were renamed @param ha_alter_info Data used during in-place alter @param altered_table MySQL table that is being altered +@param table MySQL table as it is before the ALTER operation @param user_table InnoDB table as it is before the ALTER operation @param heap Memory heap for the allocation @return array of new column names in rebuilt_table, or NULL if not renamed */ static __attribute__((nonnull, warn_unused_result)) const char** innobase_get_col_names( -/*===================*/ Alter_inplace_info* ha_alter_info, const TABLE* altered_table, + const TABLE* table, const dict_table_t* user_table, mem_heap_t* heap) { @@ -2548,19 +2550,31 @@ innobase_get_col_names( uint i; DBUG_ENTER("innobase_get_col_names"); - DBUG_ASSERT(user_table->n_def > altered_table->s->fields); + DBUG_ASSERT(user_table->n_def > table->s->fields); DBUG_ASSERT(ha_alter_info->handler_flags & Alter_inplace_info::ALTER_COLUMN_NAME); cols = static_cast<const char**>( - mem_heap_alloc(heap, user_table->n_def * sizeof *cols)); + mem_heap_zalloc(heap, user_table->n_def * sizeof *cols)); + + i = 0; + List_iterator_fast<Create_field> cf_it( + ha_alter_info->alter_info->create_list); + while (const Create_field* new_field = cf_it++) { + DBUG_ASSERT(i < altered_table->s->fields); - for (i = 0; i < altered_table->s->fields; i++) { - const Field* field = altered_table->field[i]; - cols[i] = field->field_name; + for (uint old_i = 0; table->field[old_i]; old_i++) { + if (new_field->field == table->field[old_i]) { + cols[old_i] = new_field->field_name; + break; + } + } + + i++; } /* Copy the internal column names. */ + i = table->s->fields; cols[i] = dict_table_get_col_name(user_table, i); while (++i < user_table->n_def) { @@ -2776,7 +2790,7 @@ prepare_inplace_alter_table_dict( /* The initial space id 0 may be overridden later. */ ctx->new_table = dict_mem_table_create( - new_table_name, 0, n_cols, flags, flags2); + new_table_name, 0, n_cols, flags, flags2, false); /* The rebuilt indexed_table will use the renamed column names. */ ctx->col_names = NULL; @@ -3346,6 +3360,9 @@ ha_innobase::prepare_inplace_alter_table( ulint fts_doc_col_no = ULINT_UNDEFINED; bool add_fts_doc_id = false; bool add_fts_doc_id_idx = false; +#ifdef _WIN32 + bool add_fts_idx = false; +#endif /* _WIN32 */ DBUG_ENTER("prepare_inplace_alter_table"); DBUG_ASSERT(!ha_alter_info->handler_ctx); @@ -3494,6 +3511,9 @@ check_if_ok_to_rename: & ~(HA_FULLTEXT | HA_PACK_KEY | HA_BINARY_PACK_KEY))); +#ifdef _WIN32 + add_fts_idx = true; +#endif /* _WIN32 */ continue; } @@ -3504,6 +3524,20 @@ check_if_ok_to_rename: } } +#ifdef _WIN32 + /* We won't be allowed to add fts index to a table with + fts indexes already but without AUX_HEX_NAME set. + This means the aux tables of the table failed to + rename to hex format but new created aux tables + shall be in hex format, which is contradictory. + It's only for Windows. */ + if (!DICT_TF2_FLAG_IS_SET(indexed_table, DICT_TF2_FTS_AUX_HEX_NAME) + && indexed_table->fts != NULL && add_fts_idx) { + my_error(ER_INNODB_FT_AUX_NOT_HEX_ID, MYF(0)); + goto err_exit_no_heap; + } +#endif /* _WIN32 */ + /* Check existing index definitions for too-long column prefixes as well, in case max_col_len shrunk. */ for (const dict_index_t* index @@ -3537,8 +3571,8 @@ check_if_ok_to_rename: if (ha_alter_info->handler_flags & Alter_inplace_info::ALTER_COLUMN_NAME) { col_names = innobase_get_col_names( - ha_alter_info, altered_table, indexed_table, - heap); + ha_alter_info, altered_table, table, + indexed_table, heap); } else { col_names = NULL; } @@ -4608,16 +4642,39 @@ commit_get_autoinc( & Alter_inplace_info::CHANGE_CREATE_OPTION) && (ha_alter_info->create_info->used_fields & HA_CREATE_USED_AUTO)) { - /* An AUTO_INCREMENT value was supplied, but the table - was not rebuilt. Get the user-supplied value or the - last value from the sequence. */ - ut_ad(old_table->found_next_number_field); + /* An AUTO_INCREMENT value was supplied, but the table was not + rebuilt. Get the user-supplied value or the last value from the + sequence. */ + ib_uint64_t max_value_table; + dberr_t err; + + Field* autoinc_field = + old_table->found_next_number_field; + + dict_index_t* index = dict_table_get_index_on_first_col( + ctx->old_table, autoinc_field->field_index); max_autoinc = ha_alter_info->create_info->auto_increment_value; dict_table_autoinc_lock(ctx->old_table); - if (max_autoinc < ctx->old_table->autoinc) { - max_autoinc = ctx->old_table->autoinc; + + err = row_search_max_autoinc( + index, autoinc_field->field_name, &max_value_table); + + if (err != DB_SUCCESS) { + ut_ad(0); + max_autoinc = 0; + } else if (max_autoinc <= max_value_table) { + ulonglong col_max_value; + ulonglong offset; + + col_max_value = innobase_get_int_col_max_value( + old_table->found_next_number_field); + + offset = ctx->prebuilt->autoinc_offset; + max_autoinc = innobase_next_autoinc( + max_value_table, 1, 1, offset, + col_max_value); } dict_table_autoinc_unlock(ctx->old_table); } else { diff --git a/storage/xtradb/ibuf/ibuf0ibuf.cc b/storage/xtradb/ibuf/ibuf0ibuf.cc index 2964fa99c8a..7e5d5532ee3 100644 --- a/storage/xtradb/ibuf/ibuf0ibuf.cc +++ b/storage/xtradb/ibuf/ibuf0ibuf.cc @@ -611,7 +611,8 @@ ibuf_init_at_db_start(void) heap = mem_heap_create(450); /* Use old-style record format for the insert buffer. */ - table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0, 0); + table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0, 0, + false); dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0); @@ -1572,7 +1573,7 @@ ibuf_dummy_index_create( table = dict_mem_table_create("IBUF_DUMMY", DICT_HDR_SPACE, n, - comp ? DICT_TF_COMPACT : 0, 0); + comp ? DICT_TF_COMPACT : 0, 0, true); index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY", DICT_HDR_SPACE, 0, n); diff --git a/storage/xtradb/include/api0api.h b/storage/xtradb/include/api0api.h index c294e3f34d5..d77d691becc 100644 --- a/storage/xtradb/include/api0api.h +++ b/storage/xtradb/include/api0api.h @@ -1257,6 +1257,16 @@ ib_cfg_get_cfg(); /*============*/ /*****************************************************************//** +Increase/decrease the memcached sync count of table to sync memcached +DML with SQL DDLs. +@return DB_SUCCESS or error number */ +ib_err_t +ib_cursor_set_memcached_sync( +/*=========================*/ + ib_crsr_t ib_crsr, /*!< in: cursor */ + ib_bool_t flag); /*!< in: true for increasing */ + +/*****************************************************************//** Check whether the table name conforms to our requirements. Currently we only do a simple check for the presence of a '/'. @return DB_SUCCESS or err code */ diff --git a/storage/xtradb/include/btr0pcur.h b/storage/xtradb/include/btr0pcur.h index fc008cdd185..cfbaacf4de3 100644 --- a/storage/xtradb/include/btr0pcur.h +++ b/storage/xtradb/include/btr0pcur.h @@ -459,6 +459,13 @@ void btr_pcur_move_to_prev_on_page( /*==========================*/ btr_pcur_t* cursor);/*!< in/out: persistent cursor */ +/*********************************************************//** +Moves the persistent cursor to the infimum record on the same page. */ +UNIV_INLINE +void +btr_pcur_move_before_first_on_page( +/*===============================*/ + btr_pcur_t* cursor); /*!< in/out: persistent cursor */ /** Position state of persistent B-tree cursor. */ enum pcur_pos_t { diff --git a/storage/xtradb/include/btr0pcur.ic b/storage/xtradb/include/btr0pcur.ic index 29f2fc722a2..7e355d3709d 100644 --- a/storage/xtradb/include/btr0pcur.ic +++ b/storage/xtradb/include/btr0pcur.ic @@ -588,3 +588,19 @@ btr_pcur_close( cursor->trx_if_known = NULL; } + +/*********************************************************//** +Moves the persistent cursor to the infimum record on the same page. */ +UNIV_INLINE +void +btr_pcur_move_before_first_on_page( +/*===============================*/ + btr_pcur_t* cursor) /*!< in/out: persistent cursor */ +{ + ut_ad(cursor->latch_mode != BTR_NO_LATCHES); + + page_cur_set_before_first(btr_pcur_get_block(cursor), + btr_pcur_get_page_cur(cursor)); + + cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; +} diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index ba2f413429c..8e2c283476a 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -596,6 +596,23 @@ buf_block_buf_fix_inc_func( # endif /* UNIV_SYNC_DEBUG */ buf_block_t* block) /*!< in/out: block to bufferfix */ __attribute__((nonnull)); + +/*******************************************************************//** +Increments the bufferfix count. */ +UNIV_INLINE +void +buf_block_fix( +/*===========*/ + buf_block_t* block); /*!< in/out: block to bufferfix */ + +/*******************************************************************//** +Increments the bufferfix count. */ +UNIV_INLINE +void +buf_block_unfix( +/*===========*/ + buf_block_t* block); /*!< in/out: block to bufferfix */ + # ifdef UNIV_SYNC_DEBUG /** Increments the bufferfix count. @param b in/out: block to bufferfix @@ -1437,25 +1454,39 @@ struct buf_page_t{ machine word. */ /* @{ */ - unsigned space:32; /*!< tablespace id. */ - unsigned offset:32; /*!< page number. */ - + ib_uint32_t space; /*!< tablespace id. */ + ib_uint32_t offset; /*!< page number. */ + /** count of how manyfold this block is currently bufferfixed */ +#ifdef PAGE_ATOMIC_REF_COUNT + ib_uint32_t buf_fix_count; + + /** type of pending I/O operation; Transitions from BUF_IO_NONE to + BUF_IO_WRITE and back are protected by the buf_page_get_mutex() mutex + and the corresponding flush state mutex. The flush state mutex + protection for io_fix and flush_type is not strictly required, but it + ensures consistent buffer pool instance state snapshots in + buf_pool_validate_instance(). @see enum buf_io_fix */ + byte io_fix; + + byte state; +#else + unsigned buf_fix_count:19; + + /** type of pending I/O operation; also protected by + buf_pool->mutex for writes only @see enum buf_io_fix */ + unsigned io_fix:2; + + /*!< state of the control block. + State transitions from BUF_BLOCK_READY_FOR_USE to BUF_BLOCK_MEMORY + need not be protected by buf_page_get_mutex(). @see enum buf_page_state. + State changes that are relevant to page_hash are additionally protected + by the appropriate page_hash mutex i.e.: if a page is in page_hash or + is being added to/removed from page_hash then the corresponding changes + must also be protected by page_hash mutex. */ unsigned state:BUF_PAGE_STATE_BITS; - /*!< state of the control block. - State transitions from - BUF_BLOCK_READY_FOR_USE to - BUF_BLOCK_MEMORY need not be - protected by buf_page_get_mutex(). - @see enum buf_page_state. - State changes that are relevant - to page_hash are additionally - protected by the appropriate - page_hash mutex i.e.: if a page - is in page_hash or is being - added to/removed from page_hash - then the corresponding changes - must also be protected by - page_hash mutex. */ + +#endif /* PAGE_ATOMIC_REF_COUNT */ + #ifndef UNIV_HOTBACKUP unsigned flush_type:2; /*!< if this block is currently being flushed to disk, this tells the @@ -1464,18 +1495,6 @@ struct buf_page_t{ mutex and the corresponding flush state mutex. @see buf_flush_t */ - unsigned io_fix:2; /*!< type of pending I/O operation. - Transitions from BUF_IO_NONE to - BUF_IO_WRITE and back are protected by - the buf_page_get_mutex() mutex and the - corresponding flush state mutex. The - flush state mutex protection for io_fix - and flush_type is not strictly - required, but it ensures consistent - buffer pool instance state snapshots in - buf_pool_validate_instance(). */ - unsigned buf_fix_count:19;/*!< count of how manyfold this block - is currently bufferfixed */ unsigned buf_pool_index:6;/*!< index number of the buffer pool that this block belongs to */ # if MAX_BUFFER_POOLS > 64 @@ -1630,7 +1649,7 @@ struct buf_block_t{ decompressed LRU list; used in debugging */ #endif /* UNIV_DEBUG */ - ib_mutex_t mutex; /*!< mutex protecting this block: + ib_mutex_t mutex; /*!< mutex protecting this block: state, io_fix, buf_fix_count, and accessed; we introduce this new mutex in InnoDB-5.1 to relieve @@ -1816,7 +1835,7 @@ struct buf_pool_t{ /** @name General fields */ /* @{ */ - ib_mutex_t zip_mutex; /*!< Zip mutex of this buffer + ib_mutex_t zip_mutex; /*!< Zip mutex of this buffer pool instance, protects compressed only pages (of type buf_page_t, not buf_block_t */ @@ -1873,7 +1892,7 @@ struct buf_pool_t{ /* @{ */ - ib_mutex_t flush_list_mutex;/*!< mutex protecting the + ib_mutex_t flush_list_mutex;/*!< mutex protecting the flush list access. This mutex protects flush_list, flush_rbt and bpage::list pointers when @@ -1994,18 +2013,30 @@ Use these instead of accessing buffer pool mutexes directly. */ #define buf_flush_list_mutex_own(b) mutex_own(&b->flush_list_mutex) /** Acquire the flush list mutex. */ -#define buf_flush_list_mutex_enter(b) do { \ - mutex_enter(&b->flush_list_mutex); \ +#define buf_flush_list_mutex_enter(b) do { \ + mutex_enter(&b->flush_list_mutex); \ } while (0) /** Release the flush list mutex. */ -# define buf_flush_list_mutex_exit(b) do { \ - mutex_exit(&b->flush_list_mutex); \ +# define buf_flush_list_mutex_exit(b) do { \ + mutex_exit(&b->flush_list_mutex); \ } while (0) +/** Test if block->mutex is owned. */ +#define buf_block_mutex_own(b) mutex_own(&(b)->mutex) + +/** Acquire the block->mutex. */ +#define buf_block_mutex_enter(b) do { \ + mutex_enter(&(b)->mutex); \ +} while (0) + +/** Release the trx->mutex. */ +#define buf_block_mutex_exit(b) do { \ + mutex_exit(&(b)->mutex); \ +} while (0) /** Get appropriate page_hash_lock. */ -# define buf_page_hash_lock_get(b, f) \ +# define buf_page_hash_lock_get(b, f) \ hash_get_lock(b->page_hash, f) #ifdef UNIV_SYNC_DEBUG diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic index 4ef354b11ab..fa366fd2a56 100644 --- a/storage/xtradb/include/buf0buf.ic +++ b/storage/xtradb/include/buf0buf.ic @@ -345,15 +345,16 @@ buf_page_get_mutex( /*===============*/ const buf_page_t* bpage) /*!< in: pointer to control block */ { - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - switch (buf_page_get_state(bpage)) { case BUF_BLOCK_POOL_WATCH: ut_error; return(NULL); case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: + case BUF_BLOCK_ZIP_DIRTY: { + buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + return(&buf_pool->zip_mutex); + } default: return(&((buf_block_t*) bpage)->mutex); } @@ -641,9 +642,10 @@ buf_page_set_accessed( buf_page_t* bpage) /*!< in/out: control block */ { ut_ad(mutex_own(buf_page_get_mutex(bpage))); + ut_a(buf_page_in_file(bpage)); - if (!bpage->access_time) { + if (bpage->access_time == 0) { /* Make this the time of the first access. */ bpage->access_time = ut_time_ms(); } @@ -1014,6 +1016,26 @@ buf_block_get_modify_clock( Increments the bufferfix count. */ UNIV_INLINE void +buf_block_fix( +/*===========*/ + buf_block_t* block) /*!< in/out: block to bufferfix */ +{ + ut_ad(!mutex_own(buf_page_get_mutex(&block->page))); +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_increment_uint32(&block->page.buf_fix_count, 1); +#else + ib_mutex_t* block_mutex = buf_page_get_mutex(&block->page); + + mutex_enter(block_mutex); + ++block->page.buf_fix_count; + mutex_exit(block_mutex); +#endif /* PAGE_ATOMIC_REF_COUNT */ +} + +/*******************************************************************//** +Increments the bufferfix count. */ +UNIV_INLINE +void buf_block_buf_fix_inc_func( /*=======================*/ #ifdef UNIV_SYNC_DEBUG @@ -1028,9 +1050,36 @@ buf_block_buf_fix_inc_func( ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line); ut_a(ret); #endif /* UNIV_SYNC_DEBUG */ + +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_increment_uint32(&block->page.buf_fix_count, 1); +#else ut_ad(mutex_own(&block->mutex)); - block->page.buf_fix_count++; + ++block->page.buf_fix_count; +#endif /* PAGE_ATOMIC_REF_COUNT */ +} + +/*******************************************************************//** +Decrements the bufferfix count. */ +UNIV_INLINE +void +buf_block_unfix( +/*============*/ + buf_block_t* block) /*!< in/out: block to bufferunfix */ +{ + ut_ad(block->page.buf_fix_count > 0); + ut_ad(!mutex_own(buf_page_get_mutex(&block->page))); + +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_decrement_uint32(&block->page.buf_fix_count, 1); +#else + ib_mutex_t* block_mutex = buf_page_get_mutex(&block->page); + + mutex_enter(block_mutex); + --block->page.buf_fix_count; + mutex_exit(block_mutex); +#endif /* PAGE_ATOMIC_REF_COUNT */ } /*******************************************************************//** @@ -1041,9 +1090,16 @@ buf_block_buf_fix_dec( /*==================*/ buf_block_t* block) /*!< in/out: block to bufferunfix */ { - ut_ad(mutex_own(&block->mutex)); + ut_ad(block->page.buf_fix_count > 0); + +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_decrement_uint32(&block->page.buf_fix_count, 1); +#else + mutex_enter(&block->mutex); + --block->page.buf_fix_count; + mutex_exit(&block->mutex); +#endif /* PAGE_ATOMIC_REF_COUNT */ - block->page.buf_fix_count--; #ifdef UNIV_SYNC_DEBUG rw_lock_s_unlock(&block->debug_latch); #endif @@ -1302,27 +1358,20 @@ buf_page_release_zip( buf_page_t* bpage) /*!< in: buffer block */ { buf_block_t* block; - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(bpage); - ut_a(bpage->buf_fix_count > 0); + block = (buf_block_t*) bpage; switch (buf_page_get_state(bpage)) { - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: - mutex_enter(&buf_pool->zip_mutex); - bpage->buf_fix_count--; - mutex_exit(&buf_pool->zip_mutex); - return; case BUF_BLOCK_FILE_PAGE: - block = (buf_block_t*) bpage; - mutex_enter(&block->mutex); #ifdef UNIV_SYNC_DEBUG rw_lock_s_unlock(&block->debug_latch); -#endif - bpage->buf_fix_count--; - mutex_exit(&block->mutex); +#endif /* UNUV_SYNC_DEBUG */ + /* Fall through */ + case BUF_BLOCK_ZIP_PAGE: + case BUF_BLOCK_ZIP_DIRTY: + buf_block_unfix(block); return; + case BUF_BLOCK_POOL_WATCH: case BUF_BLOCK_NOT_USED: case BUF_BLOCK_READY_FOR_USE: @@ -1345,25 +1394,18 @@ buf_page_release( ulint rw_latch) /*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ { - ut_ad(block); - ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - ut_a(block->page.buf_fix_count > 0); - - mutex_enter(&block->mutex); #ifdef UNIV_SYNC_DEBUG rw_lock_s_unlock(&(block->debug_latch)); #endif - block->page.buf_fix_count--; - - mutex_exit(&block->mutex); - if (rw_latch == RW_S_LATCH) { rw_lock_s_unlock(&(block->lock)); } else if (rw_latch == RW_X_LATCH) { rw_lock_x_unlock(&(block->lock)); } + + buf_block_unfix(block); } #ifdef UNIV_SYNC_DEBUG @@ -1381,6 +1423,7 @@ buf_block_dbg_add_level( { sync_thread_add_level(&block->lock, level, FALSE); } + #endif /* UNIV_SYNC_DEBUG */ /*********************************************************************//** Get the nth chunk's buffer block in the specified buffer pool. diff --git a/storage/xtradb/include/buf0dblwr.h b/storage/xtradb/include/buf0dblwr.h index 1b9336f4002..740286d0a82 100644 --- a/storage/xtradb/include/buf0dblwr.h +++ b/storage/xtradb/include/buf0dblwr.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -29,6 +29,7 @@ Created 2011/12/19 Inaam Rana #include "univ.i" #include "ut0byte.h" #include "log0log.h" +#include "log0recv.h" #ifndef UNIV_HOTBACKUP @@ -44,18 +45,25 @@ UNIV_INTERN void buf_dblwr_create(void); /*==================*/ + /****************************************************************//** At a database startup initializes the doublewrite buffer memory structure if we already have a doublewrite buffer created in the data files. If we are upgrading to an InnoDB version which supports multiple tablespaces, then this function performs the necessary update operations. If we are in a crash -recovery, this function uses a possible doublewrite buffer to restore -half-written pages in the data files. */ +recovery, this function loads the pages from double write buffer into memory. */ UNIV_INTERN void -buf_dblwr_init_or_restore_pages( -/*============================*/ - ibool restore_corrupt_pages); /*!< in: TRUE=restore pages */ +buf_dblwr_init_or_load_pages( +/*=========================*/ + bool load_corrupt_pages); + +/****************************************************************//** +Process the double write buffer pages. */ +void +buf_dblwr_process(void); +/*===================*/ + /****************************************************************//** frees doublewrite buffer. */ UNIV_INTERN diff --git a/storage/xtradb/include/buf0flu.h b/storage/xtradb/include/buf0flu.h index 73525a5bb58..7699e4fda67 100644 --- a/storage/xtradb/include/buf0flu.h +++ b/storage/xtradb/include/buf0flu.h @@ -36,6 +36,9 @@ Created 11/5/1995 Heikki Tuuri /** Flag indicating if the page_cleaner is in active state. */ extern ibool buf_page_cleaner_is_active; +/** Flag indicating if the lru_manager is in active state. */ +extern bool buf_lru_manager_is_active; + /********************************************************************//** Remove a block from the flush list of modified blocks. */ UNIV_INTERN @@ -175,7 +178,7 @@ buf_flush_ready_for_replace( buf_page_in_file(bpage) and in the LRU list */ /******************************************************************//** page_cleaner thread tasked with flushing dirty pages from the buffer -pools. As of now we'll have only one instance of this thread. +pool flush lists. As of now we'll have only one instance of this thread. @return a dummy parameter */ extern "C" UNIV_INTERN os_thread_ret_t @@ -183,6 +186,17 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( /*==========================================*/ void* arg); /*!< in: a dummy parameter required by os_thread_create */ +/******************************************************************//** +lru_manager thread tasked with performing LRU flushes and evictions to refill +the buffer pool free lists. As of now we'll have only one instance of this +thread. +@return a dummy parameter */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(buf_flush_lru_manager_thread)( +/*=========================================*/ + void* arg); /*!< in: a dummy parameter required by + os_thread_create */ /*********************************************************************//** Clears up tail of the LRU lists: * Put replaceable pages at the tail of LRU to the free list @@ -233,16 +247,18 @@ Writes a flushable page asynchronously from the buffer pool to a file. NOTE: in simulated aio we must call os_aio_simulated_wake_handler_threads after we have posted a batch of writes! NOTE: buf_page_get_mutex(bpage) must be held upon entering this -function, and they will be released by this function. */ +function, and they will be released by this function if it returns true. +LRU_list_mutex must be held iff performing a single page flush and will be +released by the function if it returns true. +@return TRUE if the page was flushed */ UNIV_INTERN -void +bool buf_flush_page( /*===========*/ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ buf_page_t* bpage, /*!< in: buffer control block */ buf_flush_t flush_type, /*!< in: type of flush */ - bool sync) /*!< in: true if sync IO request */ - __attribute__((nonnull)); + bool sync); /*!< in: true if sync IO request */ /********************************************************************//** Returns true if the block is modified and ready for flushing. @return true if can flush immediately */ diff --git a/storage/xtradb/include/buf0types.h b/storage/xtradb/include/buf0types.h index e19eb04a2ce..4eb5ea18cef 100644 --- a/storage/xtradb/include/buf0types.h +++ b/storage/xtradb/include/buf0types.h @@ -26,6 +26,10 @@ Created 11/17/1995 Heikki Tuuri #ifndef buf0types_h #define buf0types_h +#if defined(INNODB_PAGE_ATOMIC_REF_COUNT) && defined(HAVE_ATOMIC_BUILTINS) +#define PAGE_ATOMIC_REF_COUNT +#endif /* INNODB_PAGE_ATOMIC_REF_COUNT && HAVE_ATOMIC_BUILTINS */ + /** Buffer page (uncompressed or compressed) */ struct buf_page_t; /** Buffer block for which an uncompressed page exists */ diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index 6669f60b95a..3e3fb9f896c 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -1443,20 +1443,16 @@ UNIV_INTERN void dict_table_stats_lock( /*==================*/ - const dict_table_t* table, /*!< in: table */ - ulint latch_mode) /*!< in: RW_S_LATCH or - RW_X_LATCH */ - __attribute__((nonnull)); + dict_table_t* table, /*!< in: table */ + ulint latch_mode); /*!< in: RW_S_LATCH or RW_X_LATCH */ /**********************************************************************//** Unlock the latch that has been locked by dict_table_stats_lock() */ UNIV_INTERN void dict_table_stats_unlock( /*====================*/ - const dict_table_t* table, /*!< in: table */ - ulint latch_mode) /*!< in: RW_S_LATCH or - RW_X_LATCH */ - __attribute__((nonnull)); + dict_table_t* table, /*!< in: table */ + ulint latch_mode); /*!< in: RW_S_LATCH or RW_X_LATCH */ /********************************************************************//** Checks if the database name in two table names is the same. @return TRUE if same db name */ @@ -1802,6 +1798,17 @@ const char* dict_tf_to_row_format_string( /*=========================*/ ulint table_flag); /*!< in: row format setting */ +/*****************************************************************//** +Get index by first field of the index +@return index which is having first field matches +with the field present in field_index position of table */ +UNIV_INLINE +dict_index_t* +dict_table_get_index_on_first_col( +/*==============================*/ + const dict_table_t* table, /*!< in: table */ + ulint col_index); /*!< in: position of column + in table */ #endif /* !UNIV_HOTBACKUP */ /************************************************************************* diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic index c261d6a3aee..6bfd7f6cdae 100644 --- a/storage/xtradb/include/dict0dict.ic +++ b/storage/xtradb/include/dict0dict.ic @@ -1403,4 +1403,31 @@ dict_table_is_temporary( return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)); } +/**********************************************************************//** +Get index by first field of the index +@return index which is having first field matches +with the field present in field_index position of table */ +UNIV_INLINE +dict_index_t* +dict_table_get_index_on_first_col( +/*==============================*/ + const dict_table_t* table, /*!< in: table */ + ulint col_index) /*!< in: position of column + in table */ +{ + ut_ad(col_index < table->n_cols); + + dict_col_t* column = dict_table_get_nth_col(table, col_index); + + for (dict_index_t* index = dict_table_get_first_index(table); + index != NULL; index = dict_table_get_next_index(index)) { + + if (index->fields[0].col == column) { + return(index); + } + } + ut_error; + return(0); +} + #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h index bde0ce16094..527d50019a4 100644 --- a/storage/xtradb/include/dict0mem.h +++ b/storage/xtradb/include/dict0mem.h @@ -198,7 +198,7 @@ ROW_FORMAT=REDUNDANT. InnoDB engines do not check these flags for unknown bits in order to protect backward incompatibility. */ /* @{ */ /** Total number of bits in table->flags2. */ -#define DICT_TF2_BITS 6 +#define DICT_TF2_BITS 7 #define DICT_TF2_BIT_MASK ~(~0 << DICT_TF2_BITS) /** TEMPORARY; TRUE for tables from CREATE TEMPORARY TABLE. */ @@ -216,6 +216,10 @@ use its own tablespace instead of the system tablespace. */ /** Set when we discard/detach the tablespace */ #define DICT_TF2_DISCARDED 32 + +/** This bit is set if all aux table names (both common tables and +index tables) of a FTS table are in HEX format. */ +#define DICT_TF2_FTS_AUX_HEX_NAME 64 /* @} */ #define DICT_TF2_FLAG_SET(table, flag) \ @@ -255,7 +259,10 @@ dict_mem_table_create( of the table is placed */ ulint n_cols, /*!< in: number of columns */ ulint flags, /*!< in: table flags */ - ulint flags2); /*!< in: table flags2 */ + ulint flags2, /*!< in: table flags2 */ + bool nonshared);/*!< in: whether the table object is a dummy + one that does not need the initialization of + locking-related fields. */ /****************************************************************//** Free a table memory object. */ UNIV_INTERN @@ -728,6 +735,11 @@ a foreign key constraint is enforced, therefore RESTRICT just means no flag */ #define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32 /*!< ON UPDATE NO ACTION */ /* @} */ +/* This flag is for sync SQL DDL and memcached DML. +if table->memcached_sync_count == DICT_TABLE_IN_DDL means there's DDL running on +the table, DML from memcached will be blocked. */ +#define DICT_TABLE_IN_DDL -1 + /** Data structure for a database table. Most fields will be initialized to 0, NULL or FALSE in dict_mem_table_create(). */ struct dict_table_t{ @@ -841,9 +853,29 @@ struct dict_table_t{ initialized in dict_table_add_to_cache() */ /** Statistics for query optimization */ /* @{ */ + rw_lock_t* stats_latch; /*!< this latch protects: + dict_table_t::stat_initialized + dict_table_t::stat_n_rows (*) + dict_table_t::stat_clustered_index_size + dict_table_t::stat_sum_of_other_index_sizes + dict_table_t::stat_modified_counter (*) + dict_table_t::indexes*::stat_n_diff_key_vals[] + dict_table_t::indexes*::stat_index_size + dict_table_t::indexes*::stat_n_leaf_pages + (*) those are not always protected for + performance reasons. NULL for dumy table + objects. */ unsigned stat_initialized:1; /*!< TRUE if statistics have been calculated the first time after database startup or table creation */ +#define DICT_TABLE_IN_USED -1 + lint memcached_sync_count; + /*!< count of how many handles are opened + to this table from memcached; DDL on the + table is NOT allowed until this count + goes to zero. If it's -1, means there's DDL + on the table, DML from memcached will be + blocked. */ ib_time_t stats_last_recalc; /*!< Timestamp of last recalc of the stats */ ib_uint32_t stat_persistent; @@ -954,10 +986,12 @@ struct dict_table_t{ and release it without a need to allocate space from the lock heap of the trx: otherwise the lock heap would grow rapidly - if we do a large insert from a select */ + if we do a large insert from a select. NULL + for dummy table objects. */ ib_mutex_t autoinc_mutex; /*!< mutex protecting the autoincrement - counter */ + counter. Not initialized for dummy table + objects */ ib_uint64_t autoinc;/*!< autoinc counter value to give to the next inserted row */ ulong n_waiting_or_granted_auto_inc_locks; diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index 472c57fcbfc..074906d8959 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -128,6 +128,8 @@ extern fil_addr_t fil_addr_null; at least up to this lsn */ #define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this contains the space id of the page */ +#define FIL_PAGE_SPACE_ID FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID + #define FIL_PAGE_DATA 38 /*!< start of the data on the page */ /* @} */ /** File page trailer @{ */ diff --git a/storage/xtradb/include/fts0priv.h b/storage/xtradb/include/fts0priv.h index c6aca27f6ec..b4d9e1d41ec 100644 --- a/storage/xtradb/include/fts0priv.h +++ b/storage/xtradb/include/fts0priv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -569,7 +569,10 @@ int fts_write_object_id( /*================*/ ib_id_t id, /*!< in: a table/index id */ - char* str) /*!< in: buffer to write the id to */ + char* str, /*!< in: buffer to write the id to */ + bool hex_format __attribute__((unused))) + /*!< in: true for fixed hex format, + false for old ambiguous format */ __attribute__((nonnull)); /******************************************************************//** Read the table id from the string generated by fts_write_object_id(). diff --git a/storage/xtradb/include/fts0priv.ic b/storage/xtradb/include/fts0priv.ic index 268bb7e2227..8ef877f267e 100644 --- a/storage/xtradb/include/fts0priv.ic +++ b/storage/xtradb/include/fts0priv.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -32,9 +32,24 @@ int fts_write_object_id( /*================*/ ib_id_t id, /* in: a table/index id */ - char* str) /* in: buffer to write the id to */ + char* str, /* in: buffer to write the id to */ + bool hex_format __attribute__((unused))) + /* in: true for fixed hex format, + false for old ambiguous format */ { - // FIXME: Use ut_snprintf() +#ifdef _WIN32 + /* Use this to construct old(5.6.14 and 5.7.3) ambiguous + aux table names */ + DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name", + return(sprintf(str, "%016llu", id));); + + /* As above, but this is only for those tables failing to rename. */ + if (!hex_format) { + // FIXME: Use ut_snprintf(), so does following one. + return(sprintf(str, "%016llu", id)); + } +#endif /* _WIN32 */ + return(sprintf(str, UINT64PFx, id)); } @@ -48,6 +63,9 @@ fts_read_object_id( ib_id_t* id, /* out: an id */ const char* str) /* in: buffer to read from */ { + /* NOTE: this func doesn't care about whether current table + is set with HEX_NAME, the user of the id read here will check + if the id is HEX or DEC and do the right thing with it. */ return(sscanf(str, UINT64PFx, id) == 1); } diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h index 4599547439e..66a96282b69 100644 --- a/storage/xtradb/include/ha_prototypes.h +++ b/storage/xtradb/include/ha_prototypes.h @@ -420,16 +420,6 @@ innobase_fts_text_case_cmp( const void* p1, /*!< in: key */ const void* p2); /*!< in: node */ -/******************************************************************//** -compare two character string according to their charset. */ -UNIV_INTERN -int -innobase_fts_string_cmp( -/*====================*/ - const void* cs, /*!< in: Character set */ - const void* p1, /*!< in: key */ - const void* p2); /*!< in: node */ - /****************************************************************//** Get FTS field charset info from the field's prtype @return charset info */ diff --git a/storage/xtradb/include/log0recv.h b/storage/xtradb/include/log0recv.h index a1653c10999..e21599cffab 100644 --- a/storage/xtradb/include/log0recv.h +++ b/storage/xtradb/include/log0recv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -31,6 +31,7 @@ Created 9/20/1997 Heikki Tuuri #include "buf0types.h" #include "hash0hash.h" #include "log0log.h" +#include <list> /******************************************************//** Checks the 4-byte checksum to the trailer checksum field of a log @@ -404,6 +405,14 @@ struct recv_addr_t{ hash_node_t addr_hash;/*!< hash node in the hash bucket chain */ }; +struct recv_dblwr_t { + void add(byte* page); + + byte* find_first_page(ulint space_id); + + std::list<byte *> pages; /* Pages from double write buffer */ +}; + /** Recovery system data structure */ struct recv_sys_t{ #ifndef UNIV_HOTBACKUP @@ -468,6 +477,8 @@ struct recv_sys_t{ hash_table_t* addr_hash;/*!< hash table of file addresses of pages */ ulint n_addrs;/*!< number of not processed hashed file addresses in the hash table */ + + recv_dblwr_t dblwr; }; /** The recovery system */ diff --git a/storage/xtradb/include/mem0mem.h b/storage/xtradb/include/mem0mem.h index c36ef06b554..f30034f3074 100644 --- a/storage/xtradb/include/mem0mem.h +++ b/storage/xtradb/include/mem0mem.h @@ -92,18 +92,35 @@ void mem_close(void); /*===========*/ +#ifdef UNIV_DEBUG /**************************************************************//** Use this macro instead of the corresponding function! Macro for memory heap creation. */ -#define mem_heap_create(N) mem_heap_create_func(\ - (N), MEM_HEAP_DYNAMIC, __FILE__, __LINE__) +# define mem_heap_create(N) mem_heap_create_func( \ + (N), __FILE__, __LINE__, MEM_HEAP_DYNAMIC) /**************************************************************//** Use this macro instead of the corresponding function! Macro for memory heap creation. */ -#define mem_heap_create_typed(N, T) mem_heap_create_func(\ - (N), (T), __FILE__, __LINE__) +# define mem_heap_create_typed(N, T) mem_heap_create_func( \ + (N), __FILE__, __LINE__, (T)) + +#else /* UNIV_DEBUG */ +/**************************************************************//** +Use this macro instead of the corresponding function! Macro for memory +heap creation. */ + +# define mem_heap_create(N) mem_heap_create_func( \ + (N), MEM_HEAP_DYNAMIC) +/**************************************************************//** +Use this macro instead of the corresponding function! Macro for memory +heap creation. */ + +# define mem_heap_create_typed(N, T) mem_heap_create_func( \ + (N), (T)) + +#endif /* UNIV_DEBUG */ /**************************************************************//** Use this macro instead of the corresponding function! Macro for memory heap freeing. */ @@ -124,9 +141,11 @@ mem_heap_create_func( this means that a single user buffer of size n will fit in the block, 0 creates a default size block */ - ulint type, /*!< in: heap type */ +#ifdef UNIV_DEBUG const char* file_name, /*!< in: file name where created */ - ulint line); /*!< in: line where created */ + ulint line, /*!< in: line where created */ +#endif /* UNIV_DEBUG */ + ulint type); /*!< in: heap type */ /*****************************************************************//** NOTE: Use the corresponding macro instead of this function. Frees the space occupied by a memory heap. In the debug version erases the heap memory @@ -218,8 +237,14 @@ Macro for memory buffer allocation */ #define mem_zalloc(N) memset(mem_alloc(N), 0, (N)) -#define mem_alloc(N) mem_alloc_func((N), NULL, __FILE__, __LINE__) -#define mem_alloc2(N,S) mem_alloc_func((N), (S), __FILE__, __LINE__) +#ifdef UNIV_DEBUG +#define mem_alloc(N) mem_alloc_func((N), __FILE__, __LINE__, NULL) +#define mem_alloc2(N,S) mem_alloc_func((N), __FILE__, __LINE__, (S)) +#else /* UNIV_DEBUG */ +#define mem_alloc(N) mem_alloc_func((N), NULL) +#define mem_alloc2(N,S) mem_alloc_func((N), (S)) +#endif /* UNIV_DEBUG */ + /***************************************************************//** NOTE: Use the corresponding macro instead of this function. Allocates a single buffer of memory from the dynamic memory of @@ -231,10 +256,12 @@ void* mem_alloc_func( /*===========*/ ulint n, /*!< in: requested size in bytes */ - ulint* size, /*!< out: allocated size in bytes, - or NULL */ +#ifdef UNIV_DEBUG const char* file_name, /*!< in: file name where created */ - ulint line); /*!< in: line where created */ + ulint line, /*!< in: line where created */ +#endif /* UNIV_DEBUG */ + ulint* size); /*!< out: allocated size in bytes, + or NULL */ /**************************************************************//** Use this macro instead of the corresponding function! @@ -343,8 +370,10 @@ mem_validate_all_blocks(void); /** The info structure stored at the beginning of a heap block */ struct mem_block_info_t { ulint magic_n;/* magic number for debugging */ +#ifdef UNIV_DEBUG char file_name[8];/* file name where the mem heap was created */ ulint line; /*!< line number where the mem heap was created */ +#endif /* UNIV_DEBUG */ UT_LIST_BASE_NODE_T(mem_block_t) base; /* In the first block in the the list this is the base node of the list of blocks; in subsequent blocks this is undefined */ diff --git a/storage/xtradb/include/mem0mem.ic b/storage/xtradb/include/mem0mem.ic index 7f0e128cc40..0d983d69e1a 100644 --- a/storage/xtradb/include/mem0mem.ic +++ b/storage/xtradb/include/mem0mem.ic @@ -28,21 +28,34 @@ Created 6/8/1994 Heikki Tuuri # include "mem0pool.h" #endif /* !UNIV_HOTBACKUP */ +#ifdef UNIV_DEBUG +# define mem_heap_create_block(heap, n, type, file_name, line) \ + mem_heap_create_block_func(heap, n, file_name, line, type) +# define mem_heap_create_at(N, file_name, line) \ + mem_heap_create_func(N, file_name, line, MEM_HEAP_DYNAMIC) +#else /* UNIV_DEBUG */ +# define mem_heap_create_block(heap, n, type, file_name, line) \ + mem_heap_create_block_func(heap, n, type) +# define mem_heap_create_at(N, file_name, line) \ + mem_heap_create_func(N, MEM_HEAP_DYNAMIC) +#endif /* UNIV_DEBUG */ /***************************************************************//** Creates a memory heap block where data can be allocated. @return own: memory heap block, NULL if did not succeed (only possible for MEM_HEAP_BTR_SEARCH type heaps) */ UNIV_INTERN mem_block_t* -mem_heap_create_block( -/*==================*/ +mem_heap_create_block_func( +/*=======================*/ mem_heap_t* heap, /*!< in: memory heap or NULL if first block should be created */ ulint n, /*!< in: number of bytes needed for user data */ - ulint type, /*!< in: type of heap: MEM_HEAP_DYNAMIC or - MEM_HEAP_BUFFER */ +#ifdef UNIV_DEBUG const char* file_name,/*!< in: file name where created */ - ulint line); /*!< in: line where created */ + ulint line, /*!< in: line where created */ +#endif /* UNIV_DEBUG */ + ulint type); /*!< in: type of heap: MEM_HEAP_DYNAMIC or + MEM_HEAP_BUFFER */ /******************************************************************//** Frees a block from a memory heap. */ UNIV_INTERN @@ -421,9 +434,11 @@ mem_heap_create_func( this means that a single user buffer of size n will fit in the block, 0 creates a default size block */ - ulint type, /*!< in: heap type */ +#ifdef UNIV_DEBUG const char* file_name, /*!< in: file name where created */ - ulint line) /*!< in: line where created */ + ulint line, /*!< in: line where created */ +#endif /* UNIV_DEBUG */ + ulint type) /*!< in: heap type */ { mem_block_t* block; @@ -509,15 +524,17 @@ void* mem_alloc_func( /*===========*/ ulint n, /*!< in: desired number of bytes */ - ulint* size, /*!< out: allocated size in bytes, - or NULL */ +#ifdef UNIV_DEBUG const char* file_name, /*!< in: file name where created */ - ulint line) /*!< in: line where created */ + ulint line, /*!< in: line where created */ +#endif /* UNIV_DEBUG */ + ulint* size) /*!< out: allocated size in bytes, + or NULL */ { mem_heap_t* heap; void* buf; - heap = mem_heap_create_func(n, MEM_HEAP_DYNAMIC, file_name, line); + heap = mem_heap_create_at(n, file_name, line); /* Note that as we created the first block in the heap big enough for the buffer requested by the caller, the buffer will be in the diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index 136c7b35a0a..a75091589c6 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -1,6 +1,6 @@ /*********************************************************************** -Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. Portions of this file contain modifications contributed and copyrighted @@ -155,6 +155,10 @@ enum os_file_create_t { #define OS_FILE_INSUFFICIENT_RESOURCE 78 #define OS_FILE_AIO_INTERRUPTED 79 #define OS_FILE_OPERATION_ABORTED 80 + +#define OS_FILE_ACCESS_VIOLATION 81 + +#define OS_FILE_ERROR_MAX 100 /* @} */ /** Types for aio operations @{ */ @@ -401,7 +405,8 @@ enum os_file_type_t { OS_FILE_TYPE_UNKNOWN = 0, OS_FILE_TYPE_FILE, /* regular file */ OS_FILE_TYPE_DIR, /* directory */ - OS_FILE_TYPE_LINK /* symbolic link */ + OS_FILE_TYPE_LINK, /* symbolic link */ + OS_FILE_TYPE_BLOCK /* block device */ }; /* Maximum path string length in bytes when referring to tables with in the diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h index 51c4530bb5a..91ad7e3b860 100644 --- a/storage/xtradb/include/os0sync.h +++ b/storage/xtradb/include/os0sync.h @@ -374,6 +374,9 @@ compare to, new_val is the value to swap in. */ # define os_compare_and_swap_lint(ptr, old_val, new_val) \ os_compare_and_swap(ptr, old_val, new_val) +# define os_compare_and_swap_uint32(ptr, old_val, new_val) \ + os_compare_and_swap(ptr, old_val, new_val) + # ifdef HAVE_IB_ATOMIC_PTHREAD_T_GCC # define os_compare_and_swap_thread_id(ptr, old_val, new_val) \ os_compare_and_swap(ptr, old_val, new_val) @@ -395,6 +398,9 @@ amount of increment. */ # define os_atomic_increment_lint(ptr, amount) \ os_atomic_increment(ptr, amount) +# define os_atomic_increment_uint32(ptr, amount ) \ + os_atomic_increment(ptr, amount) + # define os_atomic_increment_ulint(ptr, amount) \ os_atomic_increment(ptr, amount) @@ -407,6 +413,9 @@ amount to decrement. */ # define os_atomic_decrement(ptr, amount) \ __sync_sub_and_fetch(ptr, amount) +# define os_atomic_decrement_uint32(ptr, amount) \ + os_atomic_decrement(ptr, amount) + # define os_atomic_decrement_lint(ptr, amount) \ os_atomic_decrement(ptr, amount) @@ -439,6 +448,9 @@ intrinsics and running on Solaris >= 10 use Solaris atomics */ Returns true if swapped, ptr is pointer to target, old_val is value to compare to, new_val is the value to swap in. */ +# define os_compare_and_swap_uint32(ptr, old_val, new_val) \ + (atomic_cas_32(ptr, old_val, new_val) == old_val) + # define os_compare_and_swap_ulint(ptr, old_val, new_val) \ (atomic_cas_ulong(ptr, old_val, new_val) == old_val) @@ -467,6 +479,9 @@ compare to, new_val is the value to swap in. */ Returns the resulting value, ptr is pointer to target, amount is the amount of increment. */ +# define os_atomic_increment_uint32(ptr, amount) \ + atomic_add_32_nv(ptr, amount) + # define os_atomic_increment_ulint(ptr, amount) \ atomic_add_long_nv(ptr, amount) @@ -479,6 +494,9 @@ amount of increment. */ /* Returns the resulting value, ptr is pointer to target, amount is the amount to decrement. */ +# define os_atomic_decrement_uint32(ptr, amount) \ + os_atomic_increment_uint32(ptr, -(amount)) + # define os_atomic_decrement_lint(ptr, amount) \ os_atomic_increment_ulint((ulong_t*) ptr, -(amount)) @@ -555,6 +573,9 @@ win_cmp_and_xchg_dword( Returns true if swapped, ptr is pointer to target, old_val is value to compare to, new_val is the value to swap in. */ +# define os_compare_and_swap_uint32(ptr, old_val, new_val) \ + (win_cmp_and_xchg_dword(ptr, new_val, old_val) == old_val) + # define os_compare_and_swap_ulint(ptr, old_val, new_val) \ (win_cmp_and_xchg_ulint(ptr, new_val, old_val) == old_val) @@ -576,6 +597,9 @@ amount of increment. */ # define os_atomic_increment_lint(ptr, amount) \ (win_xchg_and_add(ptr, amount) + amount) +# define os_atomic_increment_uint32(ptr, amount) \ + ((ulint) _InterlockedExchangeAdd((long*) ptr, amount)) + # define os_atomic_increment_ulint(ptr, amount) \ ((ulint) (win_xchg_and_add((lint*) ptr, (lint) amount) + amount)) @@ -588,6 +612,9 @@ amount of increment. */ Returns the resulting value, ptr is pointer to target, amount is the amount to decrement. There is no atomic substract function on Windows */ +# define os_atomic_decrement_uint32(ptr, amount) \ + ((ulint) _InterlockedExchangeAdd((long*) ptr, (-amount))) + # define os_atomic_decrement_lint(ptr, amount) \ (win_xchg_and_add(ptr, -(lint) amount) - amount) diff --git a/storage/xtradb/include/page0page.h b/storage/xtradb/include/page0page.h index 80181bb5c30..6940040a130 100644 --- a/storage/xtradb/include/page0page.h +++ b/storage/xtradb/include/page0page.h @@ -1102,6 +1102,14 @@ page_find_rec_with_heap_no( /*=======================*/ const page_t* page, /*!< in: index page */ ulint heap_no);/*!< in: heap number */ +/** Get the last non-delete-marked record on a page. +@param[in] page index tree leaf page +@return the last record, not delete-marked +@retval infimum record if all records are delete-marked */ + +const rec_t* +page_find_rec_max_not_deleted( + const page_t* page); #ifdef UNIV_MATERIALIZE #undef UNIV_INLINE #define UNIV_INLINE UNIV_INLINE_ORIGINAL diff --git a/storage/xtradb/include/page0page.ic b/storage/xtradb/include/page0page.ic index 58add015d34..4a22a32112f 100644 --- a/storage/xtradb/include/page0page.ic +++ b/storage/xtradb/include/page0page.ic @@ -417,6 +417,8 @@ page_rec_is_user_rec( /*=================*/ const rec_t* rec) /*!< in: record */ { + ut_ad(page_rec_check(rec)); + return(page_rec_is_user_rec_low(page_offset(rec))); } @@ -429,6 +431,8 @@ page_rec_is_supremum( /*=================*/ const rec_t* rec) /*!< in: record */ { + ut_ad(page_rec_check(rec)); + return(page_rec_is_supremum_low(page_offset(rec))); } @@ -441,6 +445,8 @@ page_rec_is_infimum( /*================*/ const rec_t* rec) /*!< in: record */ { + ut_ad(page_rec_check(rec)); + return(page_rec_is_infimum_low(page_offset(rec))); } diff --git a/storage/xtradb/include/rem0rec.h b/storage/xtradb/include/rem0rec.h index 2a84aee7a6f..8e7d5ff2d48 100644 --- a/storage/xtradb/include/rem0rec.h +++ b/storage/xtradb/include/rem0rec.h @@ -440,13 +440,24 @@ rec_get_offsets_func( ulint n_fields,/*!< in: maximum number of initialized fields (ULINT_UNDEFINED if all fields) */ - mem_heap_t** heap, /*!< in/out: memory heap */ +#ifdef UNIV_DEBUG const char* file, /*!< in: file name where called */ - ulint line) /*!< in: line number where called */ - __attribute__((nonnull(1,2,5,6),warn_unused_result)); + ulint line, /*!< in: line number where called */ +#endif /* UNIV_DEBUG */ + mem_heap_t** heap) /*!< in/out: memory heap */ +#ifdef UNIV_DEBUG + __attribute__((nonnull(1,2,5,7),warn_unused_result)); +#else /* UNIV_DEBUG */ + __attribute__((nonnull(1,2,5),warn_unused_result)); +#endif /* UNIV_DEBUG */ -#define rec_get_offsets(rec,index,offsets,n,heap) \ - rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__) +#ifdef UNIV_DEBUG +# define rec_get_offsets(rec,index,offsets,n,heap) \ + rec_get_offsets_func(rec,index,offsets,n,__FILE__,__LINE__,heap) +#else /* UNIV_DEBUG */ +# define rec_get_offsets(rec, index, offsets, n, heap) \ + rec_get_offsets_func(rec, index, offsets, n, heap) +#endif /* UNIV_DEBUG */ /******************************************************//** The following function determines the offsets to each field diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index d278782daa8..4bbace02eba 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2012, Oracle and/or its affiliates. All rights reserved. +Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, 2009, Google Inc. Copyright (c) 2009, Percona Inc. @@ -452,8 +452,8 @@ expected. */ extern ulint srv_read_views_memory; extern ulint srv_descriptors_memory; -extern ibool srv_print_innodb_monitor; -extern ibool srv_print_innodb_lock_monitor; +extern my_bool srv_print_innodb_monitor; +extern my_bool srv_print_innodb_lock_monitor; extern ibool srv_print_innodb_tablespace_monitor; extern ibool srv_print_verbose_log; #define DEPRECATED_MSG_INNODB_TABLE_MONITOR \ @@ -527,6 +527,9 @@ extern const char* srv_io_thread_function[]; /* The tid of the cleaner thread */ extern os_tid_t srv_cleaner_tid; +/* The tid of the LRU manager thread */ +extern os_tid_t srv_lru_manager_tid; + /* The tids of the purge threads */ extern os_tid_t srv_purge_tids[]; @@ -536,7 +539,7 @@ extern os_tid_t srv_io_tids[]; /* The tid of the master thread */ extern os_tid_t srv_master_tid; -/* The relative scheduling priority of the cleaner thread */ +/* The relative scheduling priority of the cleaner and LRU manager threads */ extern ulint srv_sched_priority_cleaner; /* The relative scheduling priority of the purge threads */ @@ -588,6 +591,7 @@ extern my_bool srv_fake_changes_locks; # ifdef UNIV_PFS_THREAD /* Keys to register InnoDB threads with performance schema */ extern mysql_pfs_key_t buf_page_cleaner_thread_key; +extern mysql_pfs_key_t buf_lru_manager_thread_key; extern mysql_pfs_key_t trx_rollback_clean_thread_key; extern mysql_pfs_key_t io_handler_thread_key; extern mysql_pfs_key_t srv_lock_timeout_thread_key; diff --git a/storage/xtradb/include/sync0rw.h b/storage/xtradb/include/sync0rw.h index a215d4d3f60..95bb7e16b26 100644 --- a/storage/xtradb/include/sync0rw.h +++ b/storage/xtradb/include/sync0rw.h @@ -182,6 +182,9 @@ unlocking, not the corresponding function. */ # define rw_lock_s_lock_gen(M, P) \ rw_lock_s_lock_func((M), (P), __FILE__, __LINE__) +# define rw_lock_s_lock_gen_nowait(M, P) \ + rw_lock_s_lock_low((M), (P), __FILE__, __LINE__) + # define rw_lock_s_lock_nowait(M, F, L) \ rw_lock_s_lock_low((M), 0, (F), (L)) @@ -244,6 +247,9 @@ unlocking, not the corresponding function. */ # define rw_lock_s_lock_gen(M, P) \ pfs_rw_lock_s_lock_func((M), (P), __FILE__, __LINE__) +# define rw_lock_s_lock_gen_nowait(M, P) \ + pfs_rw_lock_s_lock_low((M), (P), __FILE__, __LINE__) + # define rw_lock_s_lock_nowait(M, F, L) \ pfs_rw_lock_s_lock_low((M), 0, (F), (L)) diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h index 259fcd0cf6e..aaa74724a14 100644 --- a/storage/xtradb/include/trx0trx.h +++ b/storage/xtradb/include/trx0trx.h @@ -857,8 +857,7 @@ struct trx_t{ when trx->in_rw_trx_list. Initially set to TRX_ID_MAX. */ - time_t start_time; /*!< time the trx object was created - or the state last time became + time_t start_time; /*!< time the trx state last time became TRX_STATE_ACTIVE */ trx_id_t id; /*!< transaction id */ XID xid; /*!< X/Open XA transaction diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index eaf2262481c..eded44789a8 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -44,10 +44,10 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 5 #define INNODB_VERSION_MINOR 6 -#define INNODB_VERSION_BUGFIX 15 +#define INNODB_VERSION_BUGFIX 16 #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 63.0 +#define PERCONA_INNODB_VERSION 64.2 #endif /* Enable UNIV_LOG_ARCHIVE in XtraDB */ @@ -447,7 +447,7 @@ macro ULINTPF. */ # define UINT32PF "%I32u" # define INT64PF "%I64d" # define UINT64PF "%I64u" -# define UINT64PFx "%016I64u" +# define UINT64PFx "%016I64x" # define DBUG_LSN_PF "%llu" typedef __int64 ib_int64_t; typedef unsigned __int64 ib_uint64_t; diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc index cb4b988e46c..bf239299268 100644 --- a/storage/xtradb/log/log0recv.cc +++ b/storage/xtradb/log/log0recv.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. This program is free software; you can redistribute it and/or modify it under @@ -24,6 +24,11 @@ Recovery Created 9/20/1997 Heikki Tuuri *******************************************************/ +// First include (the generated) my_config.h, to get correct platform defines. +#include "my_config.h" +#include <stdio.h> // Solaris/x86 header file bug + +#include <vector> #include "log0recv.h" #ifdef UNIV_NONINL @@ -59,6 +64,7 @@ Created 9/20/1997 Heikki Tuuri # include "sync0sync.h" #else /* !UNIV_HOTBACKUP */ + /** This is set to FALSE if the backup was originally taken with the ibbackup --include regexp option: then we do not want to create tables in directories which were not included */ @@ -429,6 +435,9 @@ recv_sys_init( recv_max_page_lsn = 0; + /* Call the constructor for recv_sys_t::dblwr member */ + new (&recv_sys->dblwr) recv_dblwr_t(); + mutex_exit(&(recv_sys->mutex)); } @@ -1379,14 +1388,23 @@ recv_parse_or_apply_log_rec_body( ptr = mlog_parse_string(ptr, end_ptr, page, page_zip); break; case MLOG_FILE_RENAME: - ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, - (recv_recovery_is_on() - ? space_id : 0), 0); + /* Do not rerun file-based log entries if this is + IO completion from a page read. */ + if (page == NULL) { + ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, + (recv_recovery_is_on() + ? space_id : 0), 0); + } break; case MLOG_FILE_CREATE: case MLOG_FILE_DELETE: case MLOG_FILE_CREATE2: - ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, 0, 0); + /* Do not rerun file-based log entries if this is + IO completion from a page read. */ + if (page == NULL) { + ptr = fil_op_log_parse_or_replay(ptr, end_ptr, + type, 0, 0); + } break; case MLOG_ZIP_WRITE_NODE_PTR: ut_ad(!page || page_type == FIL_PAGE_INDEX); @@ -3024,6 +3042,8 @@ recv_init_crash_recovery(void) ib_logf(IB_LOG_LEVEL_INFO, "Reading tablespace information from the .ibd files..."); + buf_dblwr_init_or_load_pages(true); + fil_load_single_table_tablespaces(); /* If we are using the doublewrite method, we will @@ -3039,7 +3059,7 @@ recv_init_crash_recovery(void) ib_logf(IB_LOG_LEVEL_INFO, "from the doublewrite buffer..."); - buf_dblwr_init_or_restore_pages(TRUE); + buf_dblwr_process(); /* Spawn the background thread to flush dirty pages from the buffer pools. */ @@ -3355,7 +3375,7 @@ recv_recovery_from_checkpoint_start_func( if (!recv_needed_recovery && !srv_read_only_mode) { /* Init the doublewrite buffer memory structure */ - buf_dblwr_init_or_restore_pages(FALSE); + buf_dblwr_init_or_load_pages(false); } } @@ -4059,3 +4079,46 @@ recv_recovery_from_archive_finish(void) recv_recovery_from_backup_on = FALSE; } #endif /* UNIV_LOG_ARCHIVE */ + + +void recv_dblwr_t::add(byte* page) +{ + pages.push_back(page); +} + +byte* recv_dblwr_t::find_first_page(ulint space_id) +{ + std::vector<byte*> matches; + byte* result = 0; + + for (std::list<byte*>::iterator i = pages.begin(); + i != pages.end(); ++i) { + + if ((page_get_space_id(*i) == space_id) + && (page_get_page_no(*i) == 0)) { + matches.push_back(*i); + } + } + + if (matches.size() == 1) { + result = matches[0]; + } else if (matches.size() > 1) { + + lsn_t max_lsn = 0; + lsn_t page_lsn = 0; + + for (std::vector<byte*>::iterator i = matches.begin(); + i != matches.end(); ++i) { + + page_lsn = mach_read_from_8(*i + FIL_PAGE_LSN); + + if (page_lsn > max_lsn) { + max_lsn = page_lsn; + result = *i; + } + } + } + + return(result); +} + diff --git a/storage/xtradb/mem/mem0mem.cc b/storage/xtradb/mem/mem0mem.cc index e0e6220f4d8..e066aff5b30 100644 --- a/storage/xtradb/mem/mem0mem.cc +++ b/storage/xtradb/mem/mem0mem.cc @@ -299,15 +299,17 @@ Creates a memory heap block where data can be allocated. for MEM_HEAP_BTR_SEARCH type heaps) */ UNIV_INTERN mem_block_t* -mem_heap_create_block( -/*==================*/ +mem_heap_create_block_func( +/*=======================*/ mem_heap_t* heap, /*!< in: memory heap or NULL if first block should be created */ ulint n, /*!< in: number of bytes needed for user data */ - ulint type, /*!< in: type of heap: MEM_HEAP_DYNAMIC or - MEM_HEAP_BUFFER */ +#ifdef UNIV_DEBUG const char* file_name,/*!< in: file name where created */ - ulint line) /*!< in: line where created */ + ulint line, /*!< in: line where created */ +#endif /* UNIV_DEBUG */ + ulint type) /*!< in: type of heap: MEM_HEAP_DYNAMIC or + MEM_HEAP_BUFFER */ { #ifndef UNIV_HOTBACKUP buf_block_t* buf_block = NULL; @@ -368,8 +370,9 @@ mem_heap_create_block( #endif /* !UNIV_HOTBACKUP */ block->magic_n = MEM_BLOCK_MAGIC_N; - ut_strlcpy_rev(block->file_name, file_name, sizeof(block->file_name)); - block->line = line; + ut_d(ut_strlcpy_rev(block->file_name, file_name, + sizeof(block->file_name))); + ut_d(block->line = line); #ifdef MEM_PERIODIC_CHECK mutex_enter(&(mem_comm_pool->mutex)); diff --git a/storage/xtradb/mtr/mtr0log.cc b/storage/xtradb/mtr/mtr0log.cc index 5335cb4c9ef..0660c819240 100644 --- a/storage/xtradb/mtr/mtr0log.cc +++ b/storage/xtradb/mtr/mtr0log.cc @@ -560,7 +560,7 @@ mlog_parse_index( n = n_uniq = 1; } table = dict_mem_table_create("LOG_DUMMY", DICT_HDR_SPACE, n, - comp ? DICT_TF_COMPACT : 0, 0); + comp ? DICT_TF_COMPACT : 0, 0, true); ind = dict_mem_index_create("LOG_DUMMY", "LOG_DUMMY", DICT_HDR_SPACE, 0, n); ind->table = table; diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc index 8c09beb3e9c..6ba19879847 100644 --- a/storage/xtradb/os/os0file.cc +++ b/storage/xtradb/os/os0file.cc @@ -563,8 +563,10 @@ os_file_get_last_error_low( return(OS_FILE_INSUFFICIENT_RESOURCE); } else if (err == ERROR_OPERATION_ABORTED) { return(OS_FILE_OPERATION_ABORTED); + } else if (err == ERROR_ACCESS_DENIED) { + return(OS_FILE_ACCESS_VIOLATION); } else { - return(100 + err); + return(OS_FILE_ERROR_MAX + err); } #else int err = errno; @@ -638,8 +640,10 @@ os_file_get_last_error_low( return(OS_FILE_AIO_INTERRUPTED); } break; + case EACCES: + return(OS_FILE_ACCESS_VIOLATION); } - return(100 + err); + return(OS_FILE_ERROR_MAX + err); #endif } @@ -717,6 +721,7 @@ os_file_handle_error_cond_exit( case OS_FILE_PATH_ERROR: case OS_FILE_ALREADY_EXISTS: + case OS_FILE_ACCESS_VIOLATION: return(FALSE); @@ -2587,12 +2592,13 @@ os_file_pread( os_mutex_exit(os_file_count_mutex); #endif /* HAVE_ATOMIC_BUILTINS && UNIV_WORD == 8 */ - /* Handle signal interruptions correctly */ + /* Handle partial reads and signal interruptions correctly */ for (n_bytes = 0; n_bytes < (ssize_t) n; ) { - n_read = pread(file, buf, (ssize_t)n, offs); + n_read = pread(file, buf, (ssize_t)n - n_bytes, offs); if (n_read > 0) { n_bytes += n_read; offs += n_read; + buf = (char *)buf + n_read; } else if (n_read == -1 && errno == EINTR) { continue; } else { @@ -2734,12 +2740,13 @@ os_file_pwrite( MONITOR_ATOMIC_INC(MONITOR_OS_PENDING_WRITES); #endif /* !HAVE_ATOMIC_BUILTINS || UNIV_WORD < 8 */ - /* Handle signal interruptions correctly */ + /* Handle partial writes and signal interruptions correctly */ for (ret = 0; ret < (ssize_t) n; ) { - n_written = pwrite(file, buf, (ssize_t)n, offs); - if (n_written > 0) { + n_written = pwrite(file, buf, (ssize_t)n - ret, offs); + if (n_written >= 0) { ret += n_written; offs += n_written; + buf = (char *)buf + n_written; } else if (n_written == -1 && errno == EINTR) { continue; } else { @@ -3333,30 +3340,41 @@ os_file_get_status( return(DB_FAIL); - } else if (S_ISDIR(statinfo.st_mode)) { + } + + switch (statinfo.st_mode & S_IFMT) { + case S_IFDIR: stat_info->type = OS_FILE_TYPE_DIR; - } else if (S_ISLNK(statinfo.st_mode)) { + break; + case S_IFLNK: stat_info->type = OS_FILE_TYPE_LINK; - } else if (S_ISREG(statinfo.st_mode)) { + break; + case S_IFBLK: + stat_info->type = OS_FILE_TYPE_BLOCK; + break; + case S_IFREG: stat_info->type = OS_FILE_TYPE_FILE; + break; + default: + stat_info->type = OS_FILE_TYPE_UNKNOWN; + } - if (check_rw_perm) { - int fh; - int access; - access = !srv_read_only_mode ? O_RDWR : O_RDONLY; + if (check_rw_perm && (stat_info->type == OS_FILE_TYPE_FILE + || stat_info->type == OS_FILE_TYPE_BLOCK)) { + int fh; + int access; - fh = ::open(path, access, os_innodb_umask); + access = !srv_read_only_mode ? O_RDWR : O_RDONLY; - if (fh == -1) { - stat_info->rw_perm = false; - } else { - stat_info->rw_perm = true; - close(fh); - } + fh = ::open(path, access, os_innodb_umask); + + if (fh == -1) { + stat_info->rw_perm = false; + } else { + stat_info->rw_perm = true; + close(fh); } - } else { - stat_info->type = OS_FILE_TYPE_UNKNOWN; } #endif /* _WIN_ */ @@ -5154,6 +5172,7 @@ os_aio_linux_handle( segment = os_aio_get_array_and_local_segment(&array, global_seg); n = array->n_slots / array->n_segments; + wait_for_event: /* Loop until we have found a completed request. */ for (;;) { ibool any_reserved = FALSE; @@ -5216,6 +5235,41 @@ found: if (slot->ret == 0 && slot->n_bytes == (long) slot->len) { ret = TRUE; + } else if ((slot->ret == 0) && (slot->n_bytes > 0) + && (slot->n_bytes < (long) slot->len)) { + /* Partial read or write scenario */ + int submit_ret; + struct iocb* iocb; + slot->buf = (byte*)slot->buf + slot->n_bytes; + slot->offset = slot->offset + slot->n_bytes; + slot->len = slot->len - slot->n_bytes; + /* Resetting the bytes read/written */ + slot->n_bytes = 0; + slot->io_already_done = FALSE; + iocb = &(slot->control); + + if (slot->type == OS_FILE_READ) { + io_prep_pread(&slot->control, slot->file, slot->buf, + slot->len, (off_t) slot->offset); + } else { + ut_a(slot->type == OS_FILE_WRITE); + io_prep_pwrite(&slot->control, slot->file, slot->buf, + slot->len, (off_t) slot->offset); + } + /* Resubmit an I/O request */ + submit_ret = io_submit(array->aio_ctx[segment], 1, &iocb); + if (submit_ret < 0 ) { + /* Aborting in case of submit failure */ + ib_logf(IB_LOG_LEVEL_FATAL, + "Native Linux AIO interface. io_submit()" + " call failed when resubmitting a partial" + " I/O request on the file %s.", + slot->name); + } else { + ret = FALSE; + os_mutex_exit(array->mutex); + goto wait_for_event; + } } else { errno = -slot->ret; diff --git a/storage/xtradb/page/page0cur.cc b/storage/xtradb/page/page0cur.cc index efce1f10cae..f5f7e1299ce 100644 --- a/storage/xtradb/page/page0cur.cc +++ b/storage/xtradb/page/page0cur.cc @@ -977,7 +977,8 @@ page_cur_insert_rec_low( == (ibool) !!page_is_comp(page)); ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) - == index->id || recv_recovery_is_on() || mtr->inside_ibuf); + == index->id || recv_recovery_is_on() + || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index))); ut_ad(!page_rec_is_supremum(current_rec)); @@ -1204,7 +1205,8 @@ page_cur_insert_rec_zip( ut_ad(page_is_comp(page)); ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) - == index->id || mtr->inside_ibuf || recv_recovery_is_on()); + == index->id || recv_recovery_is_on() + || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index))); ut_ad(!page_cur_is_after_last(cursor)); #ifdef UNIV_ZIP_DEBUG @@ -1977,7 +1979,8 @@ page_cur_delete_rec( const dict_index_t* index, /*!< in: record descriptor */ const ulint* offsets,/*!< in: rec_get_offsets( cursor->rec, index) */ - mtr_t* mtr) /*!< in: mini-transaction handle */ + mtr_t* mtr) /*!< in: mini-transaction handle + or NULL */ { page_dir_slot_t* cur_dir_slot; page_dir_slot_t* prev_slot; @@ -2006,7 +2009,8 @@ page_cur_delete_rec( ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) - == index->id || mtr->inside_ibuf || recv_recovery_is_on()); + == index->id || recv_recovery_is_on() + || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index))); /* The record must not be the supremum or infimum record. */ ut_ad(page_rec_is_user_rec(current_rec)); diff --git a/storage/xtradb/page/page0page.cc b/storage/xtradb/page/page0page.cc index 2faf804279c..bd5fb36af8f 100644 --- a/storage/xtradb/page/page0page.cc +++ b/storage/xtradb/page/page0page.cc @@ -2779,3 +2779,35 @@ page_delete_rec( return(no_compress_needed); } +/** Get the last non-delete-marked record on a page. +@param[in] page index tree leaf page +@return the last record, not delete-marked +@retval infimum record if all records are delete-marked */ + +const rec_t* +page_find_rec_max_not_deleted( + const page_t* page) +{ + const rec_t* rec = page_get_infimum_rec(page); + const rec_t* prev_rec = NULL; // remove warning + + /* Because the page infimum is never delete-marked, + prev_rec will always be assigned to it first. */ + ut_ad(!rec_get_deleted_flag(rec, page_rec_is_comp(rec))); + if (page_is_comp(page)) { + do { + if (!rec_get_deleted_flag(rec, true)) { + prev_rec = rec; + } + rec = page_rec_get_next_low(rec, true); + } while (rec != page + PAGE_NEW_SUPREMUM); + } else { + do { + if (!rec_get_deleted_flag(rec, false)) { + prev_rec = rec; + } + rec = page_rec_get_next_low(rec, false); + } while (rec != page + PAGE_OLD_SUPREMUM); + } + return(prev_rec); +} diff --git a/storage/xtradb/page/page0zip.cc b/storage/xtradb/page/page0zip.cc index 81c9e0ab45a..ed73fb37d41 100644 --- a/storage/xtradb/page/page0zip.cc +++ b/storage/xtradb/page/page0zip.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2005, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. This program is free software; you can redistribute it and/or modify it under @@ -24,6 +24,9 @@ Compressed page interface Created June 2005 by Marko Makela *******************************************************/ +// First include (the generated) my_config.h, to get correct platform defines. +#include "my_config.h" + #include <map> using namespace std; @@ -1571,9 +1574,8 @@ page_zip_fields_free( dict_table_t* table = index->table; os_fast_mutex_free(&index->zip_pad.mutex); mem_heap_free(index->heap); - mutex_free(&(table->autoinc_mutex)); - ut_free(table->name); - mem_heap_free(table->heap); + + dict_mem_table_free(table); } } @@ -1621,7 +1623,7 @@ page_zip_fields_decode( } table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n, - DICT_TF_COMPACT, 0); + DICT_TF_COMPACT, 0, true); index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY", DICT_HDR_SPACE, 0, n); index->table = table; @@ -4898,8 +4900,12 @@ page_zip_verify_checksum( /* declare empty pages non-corrupted */ if (stored == 0) { /* make sure that the page is really empty */ - ut_d(ulint i; for (i = 0; i < size; i++) { - ut_a(*((const char*) data + i) == 0); }); + ulint i; + for (i = 0; i < size; i++) { + if (*((const char*) data + i) != 0) { + return(FALSE); + } + } return(TRUE); } diff --git a/storage/xtradb/pars/pars0pars.cc b/storage/xtradb/pars/pars0pars.cc index e0bc00fad0d..fff0b1efd01 100644 --- a/storage/xtradb/pars/pars0pars.cc +++ b/storage/xtradb/pars/pars0pars.cc @@ -1988,10 +1988,16 @@ pars_create_table( } } + /* Set the flags2 when create table or alter tables */ + flags2 |= DICT_TF2_FTS_AUX_HEX_NAME; + DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name", + flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;); + + n_cols = que_node_list_get_len(column_defs); table = dict_mem_table_create( - table_sym->name, 0, n_cols, flags, flags2); + table_sym->name, 0, n_cols, flags, flags2, false); #ifdef UNIV_DEBUG if (not_fit_in_memory != NULL) { diff --git a/storage/xtradb/rem/rem0rec.cc b/storage/xtradb/rem/rem0rec.cc index 43072159b9e..0d7b7c16785 100644 --- a/storage/xtradb/rem/rem0rec.cc +++ b/storage/xtradb/rem/rem0rec.cc @@ -543,9 +543,11 @@ rec_get_offsets_func( ulint n_fields,/*!< in: maximum number of initialized fields (ULINT_UNDEFINED if all fields) */ - mem_heap_t** heap, /*!< in/out: memory heap */ +#ifdef UNIV_DEBUG const char* file, /*!< in: file name where called */ - ulint line) /*!< in: line number where called */ + ulint line, /*!< in: line number where called */ +#endif /* UNIV_DEBUG */ + mem_heap_t** heap) /*!< in/out: memory heap */ { ulint n; ulint size; @@ -590,9 +592,8 @@ rec_get_offsets_func( if (UNIV_UNLIKELY(!offsets) || UNIV_UNLIKELY(rec_offs_get_n_alloc(offsets) < size)) { if (UNIV_UNLIKELY(!*heap)) { - *heap = mem_heap_create_func(size * sizeof(ulint), - MEM_HEAP_DYNAMIC, - file, line); + *heap = mem_heap_create_at(size * sizeof(ulint), + file, line); } offsets = static_cast<ulint*>( mem_heap_alloc(*heap, size * sizeof(ulint))); diff --git a/storage/xtradb/row/row0ftsort.cc b/storage/xtradb/row/row0ftsort.cc index 7a673feae9b..be62aa34a07 100644 --- a/storage/xtradb/row/row0ftsort.cc +++ b/storage/xtradb/row/row0ftsort.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1434,11 +1434,17 @@ row_fts_merge_insert( ins_ctx.ins_graph = static_cast<que_t**>(mem_heap_alloc(heap, n_bytes)); memset(ins_ctx.ins_graph, 0x0, n_bytes); + /* We should set the flags2 with aux_table_name here, + in order to get the correct aux table names. */ + index->table->flags2 |= DICT_TF2_FTS_AUX_HEX_NAME; + DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name", + index->table->flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;); + ins_ctx.fts_table.type = FTS_INDEX_TABLE; ins_ctx.fts_table.index_id = index->id; ins_ctx.fts_table.table_id = table->id; ins_ctx.fts_table.parent = index->table->name; - ins_ctx.fts_table.table = NULL; + ins_ctx.fts_table.table = index->table; for (i = 0; i < fts_sort_pll_degree; i++) { if (psort_info[i].merge_file[id]->n_rec == 0) { diff --git a/storage/xtradb/row/row0log.cc b/storage/xtradb/row/row0log.cc index 0c5ae2d3125..3a01b5ed55a 100644 --- a/storage/xtradb/row/row0log.cc +++ b/storage/xtradb/row/row0log.cc @@ -78,11 +78,12 @@ struct row_log_buf_t { mrec_buf_t buf; /*!< buffer for accessing a record that spans two blocks */ ulint blocks; /*!< current position in blocks */ - ulint bytes; /*!< current position within buf */ + ulint bytes; /*!< current position within block */ ulonglong total; /*!< logical position, in bytes from the start of the row_log_table log; 0 for row_log_online_op() and row_log_apply(). */ + ulint size; /*!< allocated size of block */ }; /** Tracks BLOB allocation during online ALTER TABLE */ @@ -193,9 +194,48 @@ struct row_log_t { or by index->lock X-latch only */ row_log_buf_t head; /*!< reader context; protected by MDL only; modifiable by row_log_apply_ops() */ - ulint size; /*!< allocated size */ }; + +/** Allocate the memory for the log buffer. +@param[in,out] log_buf Buffer used for log operation +@return TRUE if success, false if not */ +static __attribute__((warn_unused_result)) +bool +row_log_block_allocate( + row_log_buf_t& log_buf) +{ + DBUG_ENTER("row_log_block_allocate"); + if (log_buf.block == NULL) { + log_buf.size = srv_sort_buf_size; + log_buf.block = (byte*) os_mem_alloc_large(&log_buf.size, + FALSE); + DBUG_EXECUTE_IF("simulate_row_log_allocation_failure", + if (log_buf.block) + os_mem_free_large(log_buf.block, log_buf.size); + log_buf.block = NULL;); + if (!log_buf.block) { + DBUG_RETURN(false); + } + } + DBUG_RETURN(true); +} + +/** Free the log buffer. +@param[in,out] log_buf Buffer used for log operation */ +static +void +row_log_block_free( + row_log_buf_t& log_buf) +{ + DBUG_ENTER("row_log_block_free"); + if (log_buf.block != NULL) { + os_mem_free_large(log_buf.block, log_buf.size); + log_buf.block = NULL; + } + DBUG_VOID_RETURN; +} + /******************************************************//** Logs an operation to a secondary index that is (or was) being created. */ UNIV_INTERN @@ -247,6 +287,11 @@ row_log_online_op( log->max_trx = trx_id; } + if (!row_log_block_allocate(log->tail)) { + log->error = DB_OUT_OF_MEMORY; + goto err_exit; + } + UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf); ut_ad(log->tail.bytes < srv_sort_buf_size); @@ -318,6 +363,7 @@ write_failed: } UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf); +err_exit: mutex_exit(&log->mutex); } @@ -352,10 +398,16 @@ row_log_table_open( UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf); if (log->error != DB_SUCCESS) { +err_exit: mutex_exit(&log->mutex); return(NULL); } + if (!row_log_block_allocate(log->tail)) { + log->error = DB_OUT_OF_MEMORY; + goto err_exit; + } + ut_ad(log->tail.bytes < srv_sort_buf_size); *avail = srv_sort_buf_size - log->tail.bytes; @@ -2266,7 +2318,9 @@ corruption: if (index->online_log->head.blocks) { #ifdef HAVE_FTRUNCATE /* Truncate the file in order to save space. */ - ftruncate(index->online_log->fd, 0); + if (ftruncate(index->online_log->fd, 0) == -1) { + perror("ftruncate"); + } #endif /* HAVE_FTRUNCATE */ index->online_log->head.blocks = index->online_log->tail.blocks = 0; @@ -2301,6 +2355,11 @@ all_done: ut_ad(dict_index_is_online_ddl(index)); + if (!row_log_block_allocate(index->online_log->head)) { + error = DB_OUT_OF_MEMORY; + goto func_exit; + } + success = os_file_read_no_error_handling( OS_FILE_FROM_FD(index->online_log->fd), index->online_log->head.block, ofs, @@ -2504,6 +2563,7 @@ func_exit: mem_heap_free(offsets_heap); mem_heap_free(heap); + row_log_block_free(index->online_log->head); ut_free(offsets); return(error); } @@ -2577,9 +2637,7 @@ row_log_allocate( const ulint* col_map)/*!< in: mapping of old column numbers to new ones, or NULL if !table */ { - byte* buf; row_log_t* log; - ulint size; DBUG_ENTER("row_log_allocate"); ut_ad(!dict_index_is_online_ddl(index)); @@ -2591,17 +2649,14 @@ row_log_allocate( #ifdef UNIV_SYNC_DEBUG ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ - size = 2 * srv_sort_buf_size + sizeof *log; - buf = (byte*) os_mem_alloc_large(&size, FALSE); - if (!buf) { + log = (row_log_t*) ut_malloc(sizeof *log); + if (!log) { DBUG_RETURN(false); } - log = (row_log_t*) &buf[2 * srv_sort_buf_size]; - log->size = size; log->fd = row_merge_file_create_low(); if (log->fd < 0) { - os_mem_free_large(buf, size); + ut_free(log); DBUG_RETURN(false); } mutex_create(index_online_log_key, &log->mutex, @@ -2613,10 +2668,9 @@ row_log_allocate( log->col_map = col_map; log->error = DB_SUCCESS; log->max_trx = 0; - log->head.block = buf; - log->tail.block = buf + srv_sort_buf_size; log->tail.blocks = log->tail.bytes = 0; log->tail.total = 0; + log->tail.block = log->head.block = NULL; log->head.blocks = log->head.bytes = 0; log->head.total = 0; dict_index_set_online_status(index, ONLINE_INDEX_CREATION); @@ -2641,9 +2695,11 @@ row_log_free( MONITOR_ATOMIC_DEC(MONITOR_ONLINE_CREATE_INDEX); delete log->blobs; + row_log_block_free(log->tail); + row_log_block_free(log->head); row_merge_file_destroy_low(log->fd); mutex_free(&log->mutex); - os_mem_free_large(log->head.block, log->size); + ut_free(log); log = 0; } @@ -3069,6 +3125,11 @@ next_block: goto interrupted; } + error = index->online_log->error; + if (error != DB_SUCCESS) { + goto func_exit; + } + if (dict_index_is_corrupted(index)) { error = DB_INDEX_CORRUPT; goto func_exit; @@ -3089,7 +3150,9 @@ corruption: if (index->online_log->head.blocks) { #ifdef HAVE_FTRUNCATE /* Truncate the file in order to save space. */ - ftruncate(index->online_log->fd, 0); + if (ftruncate(index->online_log->fd, 0) == -1) { + perror("ftruncate"); + } #endif /* HAVE_FTRUNCATE */ index->online_log->head.blocks = index->online_log->tail.blocks = 0; @@ -3120,6 +3183,11 @@ all_done: log_free_check(); + if (!row_log_block_allocate(index->online_log->head)) { + error = DB_OUT_OF_MEMORY; + goto func_exit; + } + success = os_file_read_no_error_handling( OS_FILE_FROM_FD(index->online_log->fd), index->online_log->head.block, ofs, @@ -3320,6 +3388,7 @@ func_exit: mem_heap_free(heap); mem_heap_free(offsets_heap); + row_log_block_free(index->online_log->head); ut_free(offsets); return(error); } diff --git a/storage/xtradb/row/row0mysql.cc b/storage/xtradb/row/row0mysql.cc index b590a04b3e8..c65c39b7971 100644 --- a/storage/xtradb/row/row0mysql.cc +++ b/storage/xtradb/row/row0mysql.cc @@ -1276,7 +1276,9 @@ row_insert_for_mysql( " newraw is replaced\n" "InnoDB: with raw, and innodb_force_... is removed.\n", stderr); - + if(srv_force_recovery) { + return(DB_READ_ONLY); + } return(DB_ERROR); } @@ -1665,7 +1667,9 @@ row_update_for_mysql( " is replaced\n" "InnoDB: with raw, and innodb_force_... is removed.\n", stderr); - + if(srv_force_recovery) { + return(DB_READ_ONLY); + } return(DB_ERROR); } @@ -3254,7 +3258,6 @@ row_truncate_table_for_mysql( ut_a(trx->dict_operation_lock_mode == 0); /* Prevent foreign key checks etc. while we are truncating the table */ - row_mysql_lock_data_dictionary(trx); ut_ad(mutex_own(&(dict_sys->mutex))); @@ -3318,6 +3321,25 @@ row_truncate_table_for_mysql( goto funct_exit; } + /* Check if memcached plugin is running on this table. if is, we don't + allow truncate this table. */ + if (table->memcached_sync_count != 0) { + ut_print_timestamp(stderr); + fputs(" InnoDB: Cannot truncate table ", stderr); + ut_print_name(stderr, trx, TRUE, table->name); + fputs(" by DROP+CREATE\n" + "InnoDB: because there are memcached operations" + " running on it.\n", + stderr); + err = DB_ERROR; + + goto funct_exit; + } else { + /* We need to set this counter to -1 for blocking + memcached operations. */ + table->memcached_sync_count = DICT_TABLE_IN_DDL; + } + /* Remove all locks except the table-level X lock. */ lock_remove_all_on_table(table, FALSE); @@ -3501,6 +3523,7 @@ next_rec: fts_table.name = table->name; fts_table.id = new_id; + fts_table.flags2 = table->flags2; err = fts_create_common_tables( trx, &fts_table, table->name, TRUE); @@ -3645,6 +3668,12 @@ next_rec: funct_exit: + if (table->memcached_sync_count == DICT_TABLE_IN_DDL) { + /* We need to set the memcached sync back to 0, unblock + memcached operationse. */ + table->memcached_sync_count = 0; + } + row_mysql_unlock_data_dictionary(trx); dict_stats_update(table, DICT_STATS_EMPTY_TABLE); @@ -4716,6 +4745,9 @@ row_rename_table_for_mysql( " is replaced\n" "InnoDB: with raw, and innodb_force_... is removed.\n", stderr); + if(srv_force_recovery) { + err = DB_READ_ONLY; + } goto funct_exit; } else if (row_mysql_is_system_table(new_name)) { @@ -4989,15 +5021,31 @@ row_rename_table_for_mysql( if (err != DB_SUCCESS && (table->space != 0)) { char* orig_name = table->name; + trx_t* trx_bg = trx_allocate_for_background(); + + /* If the first fts_rename fails, the trx would + be rolled back and committed, we can't use it any more, + so we have to start a new background trx here. */ + ut_a(trx_state_eq(trx, TRX_STATE_NOT_STARTED)); + trx_bg->op_info = "Revert the failing rename " + "for fts aux tables"; + trx_bg->dict_operation_lock_mode = RW_X_LATCH; + trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE); /* If rename fails and table has its own tablespace, we need to call fts_rename_aux_tables again to revert the ibd file rename, which is not under the control of trx. Also notice the parent table name - in cache is not changed yet. */ + in cache is not changed yet. If the reverting fails, + the ibd data may be left in the new database, which + can be fixed only manually. */ table->name = const_cast<char*>(new_name); - fts_rename_aux_tables(table, old_name, trx); + fts_rename_aux_tables(table, old_name, trx_bg); table->name = orig_name; + + trx_bg->dict_operation_lock_mode = 0; + trx_commit_for_mysql(trx_bg); + trx_free_for_background(trx_bg); } } diff --git a/storage/xtradb/row/row0sel.cc b/storage/xtradb/row/row0sel.cc index c68b4de1125..67107c34204 100644 --- a/storage/xtradb/row/row0sel.cc +++ b/storage/xtradb/row/row0sel.cc @@ -5340,25 +5340,40 @@ func_exit: return(value); } -/*******************************************************************//** -Get the last row. -@return current rec or NULL */ +/** Get the maximum and non-delete-marked record in an index. +@param[in] index index tree +@param[in,out] mtr mini-transaction (may be committed and restarted) +@return maximum record, page s-latched in mtr +@retval NULL if there are no records, or if all of them are delete-marked */ static const rec_t* -row_search_autoinc_get_rec( -/*=======================*/ - btr_pcur_t* pcur, /*!< in: the current cursor */ - mtr_t* mtr) /*!< in: mini transaction */ +row_search_get_max_rec( + dict_index_t* index, + mtr_t* mtr) { + btr_pcur_t pcur; + const rec_t* rec; + /* Open at the high/right end (false), and init cursor */ + btr_pcur_open_at_index_side( + false, index, BTR_SEARCH_LEAF, &pcur, true, 0, mtr); + do { - const rec_t* rec = btr_pcur_get_rec(pcur); + const page_t* page; + + page = btr_pcur_get_page(&pcur); + rec = page_find_rec_max_not_deleted(page); if (page_rec_is_user_rec(rec)) { - return(rec); + break; + } else { + rec = NULL; } - } while (btr_pcur_move_to_prev(pcur, mtr)); + btr_pcur_move_before_first_on_page(&pcur); + } while (btr_pcur_move_to_prev(&pcur, mtr)); - return(NULL); + btr_pcur_close(&pcur); + + return(rec); } /*******************************************************************//** @@ -5373,55 +5388,30 @@ row_search_max_autoinc( const char* col_name, /*!< in: name of autoinc column */ ib_uint64_t* value) /*!< out: AUTOINC value read */ { - ulint i; - ulint n_cols; - dict_field_t* dfield = NULL; + dict_field_t* dfield = dict_index_get_nth_field(index, 0); dberr_t error = DB_SUCCESS; - - n_cols = dict_index_get_n_ordering_defined_by_user(index); - - /* Search the index for the AUTOINC column name */ - for (i = 0; i < n_cols; ++i) { - dfield = dict_index_get_nth_field(index, i); - - if (strcmp(col_name, dfield->name) == 0) { - break; - } - } - *value = 0; - /* Must find the AUTOINC column name */ - if (i < n_cols && dfield) { + if (strcmp(col_name, dfield->name) != 0) { + error = DB_RECORD_NOT_FOUND; + } else { mtr_t mtr; - btr_pcur_t pcur; + const rec_t* rec; mtr_start(&mtr); - /* Open at the high/right end (false), and init cursor */ - btr_pcur_open_at_index_side( - false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr); - - if (!page_is_empty(btr_pcur_get_page(&pcur))) { - const rec_t* rec; - - rec = row_search_autoinc_get_rec(&pcur, &mtr); + rec = row_search_get_max_rec(index, &mtr); - if (rec != NULL) { - ibool unsigned_type = ( - dfield->col->prtype & DATA_UNSIGNED); + if (rec != NULL) { + ibool unsigned_type = ( + dfield->col->prtype & DATA_UNSIGNED); - *value = row_search_autoinc_read_column( - index, rec, i, - dfield->col->mtype, unsigned_type); - } + *value = row_search_autoinc_read_column( + index, rec, 0, + dfield->col->mtype, unsigned_type); } - btr_pcur_close(&pcur); - mtr_commit(&mtr); - } else { - error = DB_RECORD_NOT_FOUND; } return(error); diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index 953bbba11f7..56f8f4d3110 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -359,6 +359,9 @@ UNIV_INTERN ulong srv_flushing_avg_loops = 30; /* The tid of the cleaner thread */ UNIV_INTERN os_tid_t srv_cleaner_tid; +/* The tid of the LRU manager thread */ +UNIV_INTERN os_tid_t srv_lru_manager_tid; + /* The tids of the purge threads */ UNIV_INTERN os_tid_t srv_purge_tids[SRV_MAX_N_PURGE_THREADS]; @@ -368,7 +371,7 @@ UNIV_INTERN os_tid_t srv_io_tids[SRV_MAX_N_IO_THREADS]; /* The tid of the master thread */ UNIV_INTERN os_tid_t srv_master_tid; -/* The relative scheduling priority of the cleaner thread */ +/* The relative scheduling priority of the cleaner and LRU manager threads */ UNIV_INTERN ulint srv_sched_priority_cleaner = 19; /* The relative scheduling priority of the purge threads */ @@ -510,8 +513,8 @@ counters_pad_end[CACHE_LINE_SIZE] __attribute__((unused)) = {0}; /* Set the following to 0 if you want InnoDB to write messages on stderr on startup/shutdown. */ UNIV_INTERN ibool srv_print_verbose_log = TRUE; -UNIV_INTERN ibool srv_print_innodb_monitor = FALSE; -UNIV_INTERN ibool srv_print_innodb_lock_monitor = FALSE; +UNIV_INTERN my_bool srv_print_innodb_monitor = FALSE; +UNIV_INTERN my_bool srv_print_innodb_lock_monitor = FALSE; UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE; UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE; diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc index 64d2d4cc896..c1c2f39aaa1 100644 --- a/storage/xtradb/srv/srv0start.cc +++ b/storage/xtradb/srv/srv0start.cc @@ -222,7 +222,8 @@ srv_file_check_mode( /* Note: stat.rw_perm is only valid of files */ - if (stat.type == OS_FILE_TYPE_FILE) { + if (stat.type == OS_FILE_TYPE_FILE + || stat.type == OS_FILE_TYPE_BLOCK) { if (!stat.rw_perm) { ib_logf(IB_LOG_LEVEL_ERROR, @@ -1570,6 +1571,16 @@ innobase_start_or_create_for_mysql(void) # endif /* F_FULLFSYNC */ #endif /* HAVE_DARWIN_THREADS */ + ib_logf(IB_LOG_LEVEL_INFO, + "Using %s to ref count buffer pool pages", +#ifdef PAGE_ATOMIC_REF_COUNT + "atomics" +#else + "mutexes" +#endif /* PAGE_ATOMIC_REF_COUNT */ + ); + + if (sizeof(ulint) != sizeof(void*)) { ut_print_timestamp(stderr); fprintf(stderr, @@ -2729,6 +2740,7 @@ files_checked: if (!srv_read_only_mode) { os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL); } + os_thread_create(buf_flush_lru_manager_thread, NULL, NULL); #ifdef UNIV_DEBUG /* buf_debug_prints = TRUE; */ diff --git a/storage/xtradb/trx/trx0trx.cc b/storage/xtradb/trx/trx0trx.cc index 8c4bc087038..f29eba0bec1 100644 --- a/storage/xtradb/trx/trx0trx.cc +++ b/storage/xtradb/trx/trx0trx.cc @@ -738,6 +738,13 @@ trx_resurrect_insert( trx->no = TRX_ID_MAX; } + /* trx_start_low() is not called with resurrect, so need to initialize + start time here.*/ + if (trx->state == TRX_STATE_ACTIVE + || trx->state == TRX_STATE_PREPARED) { + trx->start_time = ut_time(); + } + if (undo->dict_operation) { trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); trx->table_id = undo->table_id; @@ -825,6 +832,13 @@ trx_resurrect_update( trx->no = TRX_ID_MAX; } + /* trx_start_low() is not called with resurrect, so need to initialize + start time here.*/ + if (trx->state == TRX_STATE_ACTIVE + || trx->state == TRX_STATE_PREPARED) { + trx->start_time = ut_time(); + } + if (undo->dict_operation) { trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); trx->table_id = undo->table_id; |