diff options
author | Sergei Golubchik <sergii@pisem.net> | 2014-05-06 09:57:39 +0200 |
---|---|---|
committer | Sergei Golubchik <sergii@pisem.net> | 2014-05-06 09:57:39 +0200 |
commit | e2e5d07b2807706fb9187f00c049474a01ab15da (patch) | |
tree | 811b3c4d12a5a75af76fc88d793c3cdb17bf4414 | |
parent | 3792693f311a90cf195ec6d2f9b3762255a249c7 (diff) | |
parent | 83759e02dc12c8fb2576e240f307bc789e9c59cd (diff) | |
download | mariadb-git-e2e5d07b2807706fb9187f00c049474a01ab15da.tar.gz |
MDEV-6184 10.0.11 merge
InnoDB 5.6.16
70 files changed, 2733 insertions, 1072 deletions
diff --git a/mysql-test/suite/innodb/r/help_url.result b/mysql-test/suite/innodb/r/help_url.result index 9a4efa3a185..10affe78f0c 100644 --- a/mysql-test/suite/innodb/r/help_url.result +++ b/mysql-test/suite/innodb/r/help_url.result @@ -1,4 +1,4 @@ create table innodb_table_monitor (a int) engine=InnoDB; Warnings: -Warning 131 Using innodb_table_monitor is deprecated and it may be removed in future releases. Please use the InnoDB INFORMATION_SCHEMA tables instead, see http://dev.mysql.com/doc/refman/5.6/en/innodb-i_s-tables.html +Warning 131 Using the table name innodb_table_monitor to enable diagnostic output is deprecated and may be removed in future releases. Use INFORMATION_SCHEMA or PERFORMANCE_SCHEMA tables or SET GLOBAL innodb_status_output=ON. drop table innodb_table_monitor; diff --git a/storage/innobase/api/api0api.cc b/storage/innobase/api/api0api.cc index d2f1a468f25..c5299156d7a 100644 --- a/storage/innobase/api/api0api.cc +++ b/storage/innobase/api/api0api.cc @@ -3870,6 +3870,7 @@ ib_table_truncate( ib_err_t trunc_err; ib_trx_t ib_trx = NULL; ib_crsr_t ib_crsr = NULL; + ib_ulint_t memcached_sync = 0; ib_trx = ib_trx_begin(IB_TRX_SERIALIZABLE, true, false); @@ -3885,6 +3886,13 @@ ib_table_truncate( err = DB_TABLE_NOT_FOUND; } + /* Remember the memcached_sync_count and set it to 0, so the + truncate can be executed. */ + if (table != NULL && err == DB_SUCCESS) { + memcached_sync = table->memcached_sync_count; + table->memcached_sync_count = 0; + } + dict_mutex_exit_for_mysql(); if (err == DB_SUCCESS) { @@ -3910,6 +3918,15 @@ ib_table_truncate( ut_a(err == DB_SUCCESS); } + /* Set the memcached_sync_count back. */ + if (table != NULL && memcached_sync != 0) { + dict_mutex_enter_for_mysql(); + + table->memcached_sync_count = memcached_sync; + + dict_mutex_exit_for_mysql(); + } + return(trunc_err); } @@ -3972,3 +3989,51 @@ ib_cfg_get_cfg() return(cfg_status); } + +/*****************************************************************//** +Increase/decrease the memcached sync count of table to sync memcached +DML with SQL DDLs. +@return DB_SUCCESS or error number */ +UNIV_INTERN +ib_err_t +ib_cursor_set_memcached_sync( +/*=========================*/ + ib_crsr_t ib_crsr, /*!< in: cursor */ + ib_bool_t flag) /*!< in: true for increase */ +{ + const ib_cursor_t* cursor = (const ib_cursor_t*) ib_crsr; + row_prebuilt_t* prebuilt = cursor->prebuilt; + dict_table_t* table = prebuilt->table; + ib_err_t err = DB_SUCCESS; + + if (table != NULL) { + /* If memcached_sync_count is -1, means table is + doing DDL, we just return error. */ + if (table->memcached_sync_count == DICT_TABLE_IN_DDL) { + return(DB_ERROR); + } + + if (flag) { +#ifdef HAVE_ATOMIC_BUILTINS + os_atomic_increment_lint(&table->memcached_sync_count, 1); +#else + dict_mutex_enter_for_mysql(); + ++table->memcached_sync_count; + dict_mutex_exit_for_mysql(); +#endif + } else { +#ifdef HAVE_ATOMIC_BUILTINS + os_atomic_decrement_lint(&table->memcached_sync_count, 1); +#else + dict_mutex_enter_for_mysql(); + --table->memcached_sync_count; + dict_mutex_exit_for_mysql(); +#endif + ut_a(table->memcached_sync_count >= 0); + } + } else { + err = DB_TABLE_NOT_FOUND; + } + + return(err); +} diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 7e1483cfe85..e39d82a6c8b 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -2088,8 +2088,7 @@ btr_cur_optimistic_update( contain trx id and roll ptr fields */ ulint cmpl_info,/*!< in: compiler info on secondary index updates */ - que_thr_t* thr, /*!< in: query thread, or NULL if - appropriate flags are set */ + que_thr_t* thr, /*!< in: query thread */ trx_id_t trx_id, /*!< in: transaction id */ mtr_t* mtr) /*!< in/out: mini-transaction; if this is a secondary index, the caller must @@ -2382,8 +2381,7 @@ btr_cur_pessimistic_update( the values in update vector have no effect */ ulint cmpl_info,/*!< in: compiler info on secondary index updates */ - que_thr_t* thr, /*!< in: query thread, or NULL if - appropriate flags are set */ + que_thr_t* thr, /*!< in: query thread */ trx_id_t trx_id, /*!< in: transaction id */ mtr_t* mtr) /*!< in/out: mini-transaction; must be committed before latching any further pages */ diff --git a/storage/innobase/buf/buf0buddy.cc b/storage/innobase/buf/buf0buddy.cc index ee400fcdf23..fcf45b7fa1a 100644 --- a/storage/innobase/buf/buf0buddy.cc +++ b/storage/innobase/buf/buf0buddy.cc @@ -533,7 +533,6 @@ buf_buddy_relocate( { buf_page_t* bpage; const ulint size = BUF_BUDDY_LOW << i; - ib_mutex_t* mutex; ulint space; ulint offset; @@ -556,7 +555,12 @@ buf_buddy_relocate( ut_ad(space != BUF_BUDDY_STAMP_FREE); - bpage = buf_page_hash_get(buf_pool, space, offset); + ulint fold = buf_page_address_fold(space, offset); + rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold); + + rw_lock_x_lock(hash_lock); + + bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); if (!bpage || bpage->zip.data != src) { /* The block has probably been freshly @@ -564,6 +568,8 @@ buf_buddy_relocate( added to buf_pool->page_hash yet. Obviously, it cannot be relocated. */ + rw_lock_x_unlock(hash_lock); + return(false); } @@ -573,6 +579,8 @@ buf_buddy_relocate( For the sake of simplicity, give up. */ ut_ad(page_zip_get_size(&bpage->zip) < size); + rw_lock_x_unlock(hash_lock); + return(false); } @@ -580,27 +588,42 @@ buf_buddy_relocate( contain uninitialized data. */ UNIV_MEM_ASSERT_W(src, size); - mutex = buf_page_get_mutex(bpage); + ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - mutex_enter(mutex); + mutex_enter(block_mutex); if (buf_page_can_relocate(bpage)) { /* Relocate the compressed page. */ - ullint usec = ut_time_us(NULL); + ullint usec = ut_time_us(NULL); + ut_a(bpage->zip.data == src); - memcpy(dst, src, size); - bpage->zip.data = (page_zip_t*) dst; - mutex_exit(mutex); + + /* Note: This is potentially expensive, we need a better + solution here. We go with correctness for now. */ + ::memcpy(dst, src, size); + + bpage->zip.data = reinterpret_cast<page_zip_t*>(dst); + + rw_lock_x_unlock(hash_lock); + + mutex_exit(block_mutex); + buf_buddy_mem_invalid( reinterpret_cast<buf_buddy_free_t*>(src), i); buf_buddy_stat_t* buddy_stat = &buf_pool->buddy_stat[i]; - buddy_stat->relocated++; + + ++buddy_stat->relocated; + buddy_stat->relocated_usec += ut_time_us(NULL) - usec; + return(true); } - mutex_exit(mutex); + rw_lock_x_unlock(hash_lock); + + mutex_exit(block_mutex); + return(false); } diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index a8e833b5fa3..697b3f203b3 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -548,8 +548,11 @@ buf_page_is_corrupted( if (checksum_field1 == 0 && checksum_field2 == 0 && mach_read_from_4(read_buf + FIL_PAGE_LSN) == 0) { /* make sure that the page is really empty */ - ut_d(for (ulint i = 0; i < UNIV_PAGE_SIZE; i++) { - ut_a(read_buf[i] == 0); }); + for (ulint i = 0; i < UNIV_PAGE_SIZE; i++) { + if (read_buf[i] != 0) { + return(TRUE); + } + } return(FALSE); } @@ -1601,14 +1604,19 @@ buf_pool_watch_set( bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); - if (UNIV_LIKELY_NULL(bpage)) { + if (bpage != NULL) { page_found: if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) { /* The page was loaded meanwhile. */ return(bpage); } + /* Add to an existing watch. */ - bpage->buf_fix_count++; +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_increment_uint32(&bpage->buf_fix_count, 1); +#else + ++bpage->buf_fix_count; +#endif /* PAGE_ATOMIC_REF_COUNT */ return(NULL); } @@ -1736,8 +1744,7 @@ buf_pool_watch_unset( buf_page_t* bpage; buf_pool_t* buf_pool = buf_pool_get(space, offset); ulint fold = buf_page_address_fold(space, offset); - rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, - fold); + rw_lock_t* hash_lock = buf_page_hash_lock_get(buf_pool, fold); /* We only need to have buf_pool mutex in case where we end up calling buf_pool_watch_remove but to obey latching order @@ -1748,22 +1755,24 @@ buf_pool_watch_unset( rw_lock_x_lock(hash_lock); - bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); - /* The page must exist because buf_pool_watch_set() - increments buf_fix_count. */ - ut_a(bpage); + /* The page must exist because buf_pool_watch_set() increments + buf_fix_count. */ - if (UNIV_UNLIKELY(!buf_pool_watch_is_sentinel(buf_pool, bpage))) { - ib_mutex_t* mutex = buf_page_get_mutex(bpage); + bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); - mutex_enter(mutex); - ut_a(bpage->buf_fix_count > 0); - bpage->buf_fix_count--; - mutex_exit(mutex); + if (!buf_pool_watch_is_sentinel(buf_pool, bpage)) { + buf_block_unfix(reinterpret_cast<buf_block_t*>(bpage)); } else { - ut_a(bpage->buf_fix_count > 0); - if (UNIV_LIKELY(!--bpage->buf_fix_count)) { + ut_ad(bpage->buf_fix_count > 0); + +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_decrement_uint32(&bpage->buf_fix_count, 1); +#else + --bpage->buf_fix_count; +#endif /* PAGE_ATOMIC_REF_COUNT */ + + if (bpage->buf_fix_count == 0) { buf_pool_watch_remove(buf_pool, fold, bpage); } } @@ -1793,10 +1802,10 @@ buf_pool_watch_occurred( rw_lock_s_lock(hash_lock); - bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); /* The page must exist because buf_pool_watch_set() increments buf_fix_count. */ - ut_a(bpage); + bpage = buf_page_hash_get_low(buf_pool, space, offset, fold); + ret = !buf_pool_watch_is_sentinel(buf_pool, bpage); rw_lock_s_unlock(hash_lock); @@ -2034,27 +2043,32 @@ err_exit: case BUF_BLOCK_READY_FOR_USE: case BUF_BLOCK_MEMORY: case BUF_BLOCK_REMOVE_HASH: - break; + ut_error; + case BUF_BLOCK_ZIP_PAGE: case BUF_BLOCK_ZIP_DIRTY: block_mutex = &buf_pool->zip_mutex; mutex_enter(block_mutex); - bpage->buf_fix_count++; +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_increment_uint32(&bpage->buf_fix_count, 1); +#else + ++bpage->buf_fix_count; +#endif /* PAGE_ATOMIC_REF_COUNT */ goto got_block; case BUF_BLOCK_FILE_PAGE: /* Discard the uncompressed page frame if possible. */ if (!discard_attempted) { rw_lock_s_unlock(hash_lock); - buf_block_try_discard_uncompressed(space, - offset); + buf_block_try_discard_uncompressed(space, offset); discard_attempted = TRUE; goto lookup; } block_mutex = &((buf_block_t*) bpage)->mutex; + mutex_enter(block_mutex); - buf_block_buf_fix_inc((buf_block_t*) bpage, - __FILE__, __LINE__); + + buf_block_buf_fix_inc((buf_block_t*) bpage, __FILE__, __LINE__); goto got_block; } @@ -2067,7 +2081,7 @@ got_block: rw_lock_s_unlock(hash_lock); #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(!bpage->file_page_was_freed); -#endif +#endif /* defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG */ buf_page_set_accessed(bpage); @@ -2378,7 +2392,7 @@ buf_block_is_uncompressed( const buf_block_t* block) /*!< in: pointer to block, not dereferenced */ { - if (UNIV_UNLIKELY((((ulint) block) % sizeof *block) != 0)) { + if ((((ulint) block) % sizeof *block) != 0) { /* The pointer should be aligned. */ return(FALSE); } @@ -2408,6 +2422,47 @@ buf_debug_execute_is_force_flush() } #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ +/** +Wait for the block to be read in. +@param block The block to check */ +static +void +buf_wait_for_read(buf_block_t* block) +{ + /* Note: For the PAGE_ATOMIC_REF_COUNT case: + + We are using the block->lock to check for IO state (and a dirty read). + We set the IO_READ state under the protection of the hash_lock + (and block->mutex). This is safe because another thread can only + access the block (and check for IO state) after the block has been + added to the page hashtable. */ + + if (buf_block_get_io_fix(block) == BUF_IO_READ) { + + /* Wait until the read operation completes */ + + ib_mutex_t* mutex = buf_page_get_mutex(&block->page); + + for (;;) { + buf_io_fix io_fix; + + mutex_enter(mutex); + + io_fix = buf_block_get_io_fix(block); + + mutex_exit(mutex); + + if (io_fix == BUF_IO_READ) { + /* Wait by temporaly s-latch */ + rw_lock_s_lock(&block->lock); + rw_lock_s_unlock(&block->lock); + } else { + break; + } + } + } +} + /********************************************************************//** This is the general function used to get access to a database page. @return pointer to the block or NULL */ @@ -2432,10 +2487,10 @@ buf_page_get_gen( ulint fold; unsigned access_time; ulint fix_type; - ibool must_read; rw_lock_t* hash_lock; - ib_mutex_t* block_mutex; ulint retries = 0; + buf_block_t* fix_block; + ib_mutex_t* fix_mutex = NULL; buf_pool_t* buf_pool = buf_pool_get(space, offset); ut_ad(mtr); @@ -2472,7 +2527,9 @@ loop: block = guess; rw_lock_s_lock(hash_lock); - if (block) { + + if (block != NULL) { + /* If the guess is a compressed page descriptor that has been allocated by buf_page_alloc_descriptor(), it may have been freed by buf_relocate(). */ @@ -2510,10 +2567,10 @@ loop: if (UNIV_LIKELY_NULL(block)) { /* We can release hash_lock after we - acquire block_mutex to make sure that - no state change takes place. */ - block_mutex = buf_page_get_mutex(&block->page); - mutex_enter(block_mutex); + increment the fix count to make + sure that no state change takes place. */ + fix_block = block; + buf_block_fix(fix_block); /* Now safe to release page_hash mutex */ rw_lock_x_unlock(hash_lock); @@ -2568,36 +2625,48 @@ loop: ut_a(++buf_dbg_counter % 5771 || buf_validate()); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ goto loop; + } else { + fix_block = block; } - - /* We can release hash_lock after we acquire block_mutex to - make sure that no state change takes place. */ - block_mutex = buf_page_get_mutex(&block->page); - mutex_enter(block_mutex); + buf_block_fix(fix_block); /* Now safe to release page_hash mutex */ rw_lock_s_unlock(hash_lock); got_block: + + fix_mutex = buf_page_get_mutex(&fix_block->page); + ut_ad(page_zip_get_size(&block->page.zip) == zip_size); - ut_ad(mutex_own(block_mutex)); - must_read = buf_block_get_io_fix(block) == BUF_IO_READ; + if (mode == BUF_GET_IF_IN_POOL || mode == BUF_PEEK_IF_IN_POOL) { - if (must_read && (mode == BUF_GET_IF_IN_POOL - || mode == BUF_PEEK_IF_IN_POOL)) { + bool must_read; - /* The page is being read to buffer pool, - but we cannot wait around for the read to - complete. */ -null_exit: - mutex_exit(block_mutex); + { + buf_page_t* fix_page = &fix_block->page; - return(NULL); + mutex_enter(fix_mutex); + + buf_io_fix io_fix = buf_page_get_io_fix(fix_page); + + must_read = (io_fix == BUF_IO_READ); + + mutex_exit(fix_mutex); + } + + if (must_read) { + /* The page is being read to buffer pool, + but we cannot wait around for the read to + complete. */ + buf_block_unfix(fix_block); + + return(NULL); + } } - switch (buf_block_get_state(block)) { + switch(buf_block_get_state(fix_block)) { buf_page_t* bpage; case BUF_BLOCK_FILE_PAGE: @@ -2610,18 +2679,22 @@ null_exit: adaptive hash index. There cannot be an adaptive hash index for a compressed-only page, so do not bother decompressing the page. */ - goto null_exit; + buf_block_unfix(fix_block); + + return(NULL); } bpage = &block->page; - if (bpage->buf_fix_count + /* Note: We have already buffer fixed this block. */ + if (bpage->buf_fix_count > 1 || buf_page_get_io_fix(bpage) != BUF_IO_NONE) { + /* This condition often occurs when the buffer is not buffer-fixed, but I/O-fixed by buf_page_init_for_read(). */ - mutex_exit(block_mutex); -wait_until_unfixed: + buf_block_unfix(fix_block); + /* The block is buffer-fixed or I/O-fixed. Try again later. */ os_thread_sleep(WAIT_FOR_READ); @@ -2632,24 +2705,32 @@ wait_until_unfixed: /* Buffer-fix the block so that it cannot be evicted or relocated while we are attempting to allocate an uncompressed page. */ - bpage->buf_fix_count++; - /* Allocate an uncompressed page. */ - mutex_exit(block_mutex); block = buf_LRU_get_free_block(buf_pool); - ut_a(block); buf_pool_mutex_enter(buf_pool); rw_lock_x_lock(hash_lock); + /* Buffer-fixing prevents the page_hash from changing. */ ut_ad(bpage == buf_page_hash_get_low( buf_pool, space, offset, fold)); - mutex_enter(&block->mutex); + buf_block_mutex_enter(block); + mutex_enter(&buf_pool->zip_mutex); - if (--bpage->buf_fix_count + ut_ad(fix_block->page.buf_fix_count > 0); + +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_decrement_uint32(&fix_block->page.buf_fix_count, 1); +#else + --fix_block->page.buf_fix_count; +#endif /* PAGE_ATOMIC_REF_COUNT */ + + fix_block = block; + + if (bpage->buf_fix_count > 0 || buf_page_get_io_fix(bpage) != BUF_IO_NONE) { mutex_exit(&buf_pool->zip_mutex); @@ -2662,23 +2743,31 @@ wait_until_unfixed: buf_LRU_block_free_non_file_page(block); buf_pool_mutex_exit(buf_pool); rw_lock_x_unlock(hash_lock); - mutex_exit(&block->mutex); + buf_block_mutex_exit(block); - goto wait_until_unfixed; + /* Try again */ + goto loop; } /* Move the compressed page from bpage to block, and uncompress it. */ + /* Note: this is the uncompressed block and it is not + accessible by other threads yet because it is not in + any list or hash table */ buf_relocate(bpage, &block->page); + buf_block_init_low(block); + + /* Set after relocate(). */ + block->page.buf_fix_count = 1; + block->lock_hash_val = lock_rec_hash(space, offset); UNIV_MEM_DESC(&block->page.zip.data, - page_zip_get_size(&block->page.zip)); + page_zip_get_size(&block->page.zip)); - if (buf_page_get_state(&block->page) - == BUF_BLOCK_ZIP_PAGE) { + if (buf_page_get_state(&block->page) == BUF_BLOCK_ZIP_PAGE) { #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG UT_LIST_REMOVE(list, buf_pool->zip_clean, &block->page); @@ -2686,8 +2775,7 @@ wait_until_unfixed: ut_ad(!block->page.in_flush_list); } else { /* Relocate buf_pool->flush_list. */ - buf_flush_relocate_on_flush_list(bpage, - &block->page); + buf_flush_relocate_on_flush_list(bpage, &block->page); } /* Buffer-fix, I/O-fix, and X-latch the block @@ -2698,7 +2786,6 @@ wait_until_unfixed: /* Insert at the front of unzip_LRU list */ buf_unzip_LRU_add_block(block, FALSE); - block->page.buf_fix_count = 1; buf_block_set_io_fix(block, BUF_IO_READ); rw_lock_x_lock_inline(&block->lock, 0, file, line); @@ -2706,11 +2793,14 @@ wait_until_unfixed: rw_lock_x_unlock(hash_lock); - buf_pool->n_pend_unzip++; + ++buf_pool->n_pend_unzip; + buf_pool_mutex_exit(buf_pool); access_time = buf_page_is_accessed(&block->page); - mutex_exit(&block->mutex); + + buf_block_mutex_exit(block); + mutex_exit(&buf_pool->zip_mutex); buf_page_free_descriptor(bpage); @@ -2721,9 +2811,12 @@ wait_until_unfixed: /* Page checksum verification is already done when the page is read from disk. Hence page checksum verification is not necessary when decompressing the page. */ - ut_a(buf_zip_decompress(block, FALSE)); + { + bool success = buf_zip_decompress(block, FALSE); + ut_a(success); + } - if (UNIV_LIKELY(!recv_no_ibuf_operations)) { + if (!recv_no_ibuf_operations) { if (access_time) { #ifdef UNIV_IBUF_COUNT_DEBUG ut_a(ibuf_count_get(space, offset) == 0); @@ -2734,13 +2827,19 @@ wait_until_unfixed: } } - /* Unfix and unlatch the block. */ buf_pool_mutex_enter(buf_pool); - mutex_enter(&block->mutex); - block->page.buf_fix_count--; - buf_block_set_io_fix(block, BUF_IO_NONE); - buf_pool->n_pend_unzip--; + + /* Unfix and unlatch the block. */ + buf_block_mutex_enter(fix_block); + + buf_block_set_io_fix(fix_block, BUF_IO_NONE); + + buf_block_mutex_exit(fix_block); + + --buf_pool->n_pend_unzip; + buf_pool_mutex_exit(buf_pool); + rw_lock_x_unlock(&block->lock); break; @@ -2754,43 +2853,40 @@ wait_until_unfixed: break; } + ut_ad(block == fix_block); + ut_ad(fix_block->page.buf_fix_count > 0); + #ifdef UNIV_SYNC_DEBUG ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)); ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED)); #endif /* UNIV_SYNC_DEBUG */ - ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); + ut_ad(buf_block_get_state(fix_block) == BUF_BLOCK_FILE_PAGE); #if UNIV_WORD_SIZE == 4 /* On 32-bit systems, there is no padding in buf_page_t. On other systems, Valgrind could complain about uninitialized pad bytes. */ - UNIV_MEM_ASSERT_RW(&block->page, sizeof block->page); + UNIV_MEM_ASSERT_RW(&fix_block->page, sizeof(fix_block->page)); #endif #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG if ((mode == BUF_GET_IF_IN_POOL || mode == BUF_GET_IF_IN_POOL_OR_WATCH) && (ibuf_debug || buf_debug_execute_is_force_flush())) { + /* Try to evict the block from the buffer pool, to use the insert buffer (change buffer) as much as possible. */ - /* To obey the latching order, release the - block->mutex before acquiring buf_pool->mutex. Protect - the block from changes by temporarily buffer-fixing it - for the time we are not holding block->mutex. */ - buf_block_buf_fix_inc(block, file, line); - mutex_exit(&block->mutex); buf_pool_mutex_enter(buf_pool); - mutex_enter(&block->mutex); - buf_block_buf_fix_dec(block); - mutex_exit(&block->mutex); + + buf_block_unfix(fix_block); /* Now we are only holding the buf_pool->mutex, not block->mutex or hash_lock. Blocks cannot be relocated or enter or exit the buf_pool while we are holding the buf_pool->mutex. */ - if (buf_LRU_free_page(&block->page, true)) { + if (buf_LRU_free_page(&fix_block->page, true)) { buf_pool_mutex_exit(buf_pool); rw_lock_x_lock(hash_lock); @@ -2807,7 +2903,7 @@ wait_until_unfixed: rw_lock_x_unlock(hash_lock); - if (UNIV_LIKELY_NULL(block)) { + if (block != NULL) { /* Either the page has been read in or a watch was set on that in the window where we released the buf_pool::mutex @@ -2823,103 +2919,114 @@ wait_until_unfixed: return(NULL); } - mutex_enter(&block->mutex); + mutex_enter(&fix_block->mutex); - if (buf_flush_page_try(buf_pool, block)) { + if (buf_flush_page_try(buf_pool, fix_block)) { fprintf(stderr, "innodb_change_buffering_debug flush %u %u\n", (unsigned) space, (unsigned) offset); - guess = block; + guess = fix_block; goto loop; } + buf_block_mutex_exit(fix_block); + + buf_block_fix(fix_block); + /* Failed to evict the page; change it directly */ buf_pool_mutex_exit(buf_pool); } #endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - buf_block_buf_fix_inc(block, file, line); + ut_ad(fix_block->page.buf_fix_count > 0); + +#ifdef UNIV_SYNC_DEBUG + /* We have already buffer fixed the page, and we are committed to + returning this page to the caller. Register for debugging. */ + { + ibool ret; + ret = rw_lock_s_lock_nowait(&fix_block->debug_latch, file, line); + ut_a(ret); + } +#endif /* UNIV_SYNC_DEBUG */ + #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG ut_a(mode == BUF_GET_POSSIBLY_FREED - || !block->page.file_page_was_freed); + || !fix_block->page.file_page_was_freed); #endif /* Check if this is the first access to the page */ - access_time = buf_page_is_accessed(&block->page); + access_time = buf_page_is_accessed(&fix_block->page); - buf_page_set_accessed(&block->page); + /* This is a heuristic and we don't care about ordering issues. */ + if (access_time == 0) { + buf_block_mutex_enter(fix_block); - mutex_exit(&block->mutex); + buf_page_set_accessed(&fix_block->page); + + buf_block_mutex_exit(fix_block); + } if (mode != BUF_PEEK_IF_IN_POOL) { - buf_page_make_young_if_needed(&block->page); + buf_page_make_young_if_needed(&fix_block->page); } #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(++buf_dbg_counter % 5771 || buf_validate()); - ut_a(block->page.buf_fix_count > 0); - ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); + ut_a(fix_block->page.buf_fix_count > 0); + ut_a(buf_block_get_state(fix_block) == BUF_BLOCK_FILE_PAGE); #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ +#ifdef PAGE_ATOMIC_REF_COUNT + /* We have to wait here because the IO_READ state was set + under the protection of the hash_lock and the block->mutex + but not the block->lock. */ + buf_wait_for_read(fix_block); +#endif /* PAGE_ATOMIC_REF_COUNT */ + switch (rw_latch) { case RW_NO_LATCH: - if (must_read) { - /* Let us wait until the read operation - completes */ - - for (;;) { - enum buf_io_fix io_fix; - - mutex_enter(&block->mutex); - io_fix = buf_block_get_io_fix(block); - mutex_exit(&block->mutex); - if (io_fix == BUF_IO_READ) { - /* wait by temporaly s-latch */ - rw_lock_s_lock(&(block->lock)); - rw_lock_s_unlock(&(block->lock)); - } else { - break; - } - } - } +#ifndef PAGE_ATOMIC_REF_COUNT + buf_wait_for_read(fix_block); +#endif /* !PAGE_ATOMIC_REF_COUNT */ fix_type = MTR_MEMO_BUF_FIX; break; case RW_S_LATCH: - rw_lock_s_lock_inline(&(block->lock), 0, file, line); + rw_lock_s_lock_inline(&fix_block->lock, 0, file, line); fix_type = MTR_MEMO_PAGE_S_FIX; break; default: ut_ad(rw_latch == RW_X_LATCH); - rw_lock_x_lock_inline(&(block->lock), 0, file, line); + rw_lock_x_lock_inline(&fix_block->lock, 0, file, line); fix_type = MTR_MEMO_PAGE_X_FIX; break; } - mtr_memo_push(mtr, block, fix_type); + mtr_memo_push(mtr, fix_block, fix_type); if (mode != BUF_PEEK_IF_IN_POOL && !access_time) { /* In the case of a first access, try to apply linear read-ahead */ - buf_read_ahead_linear(space, zip_size, offset, - ibuf_inside(mtr)); + buf_read_ahead_linear( + space, zip_size, offset, ibuf_inside(mtr)); } #ifdef UNIV_IBUF_COUNT_DEBUG - ut_a(ibuf_count_get(buf_block_get_space(block), - buf_block_get_page_no(block)) == 0); + ut_a(ibuf_count_get(buf_block_get_space(fix_block), + buf_block_get_page_no(fix_block)) == 0); #endif #ifdef UNIV_SYNC_DEBUG ut_ad(!rw_lock_own(hash_lock, RW_LOCK_EX)); ut_ad(!rw_lock_own(hash_lock, RW_LOCK_SHARED)); #endif /* UNIV_SYNC_DEBUG */ - return(block); + return(fix_block); } /********************************************************************//** @@ -2982,9 +3089,7 @@ buf_page_optimistic_get( } if (UNIV_UNLIKELY(!success)) { - mutex_enter(&block->mutex); buf_block_buf_fix_dec(block); - mutex_exit(&block->mutex); return(FALSE); } @@ -2998,9 +3103,7 @@ buf_page_optimistic_get( rw_lock_x_unlock(&(block->lock)); } - mutex_enter(&block->mutex); buf_block_buf_fix_dec(block); - mutex_exit(&block->mutex); return(FALSE); } @@ -3105,9 +3208,7 @@ buf_page_get_known_nowait( } if (!success) { - mutex_enter(&block->mutex); buf_block_buf_fix_dec(block); - mutex_exit(&block->mutex); return(FALSE); } @@ -3206,9 +3307,7 @@ buf_page_try_get_func( } if (!success) { - mutex_enter(&block->mutex); buf_block_buf_fix_dec(block); - mutex_exit(&block->mutex); return(NULL); } @@ -3306,13 +3405,20 @@ buf_page_init( hash_page = buf_page_hash_get_low(buf_pool, space, offset, fold); - if (UNIV_LIKELY(!hash_page)) { + if (hash_page == NULL) { + /* Block not found in the hash table */ } else if (buf_pool_watch_is_sentinel(buf_pool, hash_page)) { - /* Preserve the reference count. */ - ulint buf_fix_count = hash_page->buf_fix_count; + ib_uint32_t buf_fix_count = hash_page->buf_fix_count; + + ut_a(buf_fix_count > 0); + +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_increment_uint32( + &block->page.buf_fix_count, buf_fix_count); +#else + block->page.buf_fix_count += ulint(buf_fix_count); +#endif /* PAGE_ATOMIC_REF_COUNT */ - ut_a(buf_fix_count > 0); - block->page.buf_fix_count += buf_fix_count; buf_pool_watch_remove(buf_pool, fold, hash_page); } else { fprintf(stderr, @@ -3335,8 +3441,9 @@ buf_page_init( ut_ad(!block->page.in_zip_hash); ut_ad(!block->page.in_page_hash); ut_d(block->page.in_page_hash = TRUE); - HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, - fold, &block->page); + + HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, &block->page); + if (zip_size) { page_zip_set_size(&block->page.zip, zip_size); } @@ -3446,6 +3553,15 @@ err_exit: ut_ad(buf_pool_from_bpage(bpage) == buf_pool); buf_page_init(buf_pool, space, offset, fold, zip_size, block); + +#ifdef PAGE_ATOMIC_REF_COUNT + /* Note: We set the io state without the protection of + the block->lock. This is because other threads cannot + access this block unless it is in the hash table. */ + + buf_page_set_io_fix(bpage, BUF_IO_READ); +#endif /* PAGE_ATOMIC_REF_COUNT */ + rw_lock_x_unlock(hash_lock); /* The block must be put to the LRU list, to the old blocks */ @@ -3461,7 +3577,10 @@ err_exit: io-handler thread. */ rw_lock_x_lock_gen(&block->lock, BUF_IO_READ); + +#ifndef PAGE_ATOMIC_REF_COUNT buf_page_set_io_fix(bpage, BUF_IO_READ); +#endif /* !PAGE_ATOMIC_REF_COUNT */ if (zip_size) { /* buf_pool->mutex may be released and @@ -3549,12 +3668,22 @@ err_exit: ut_d(bpage->in_page_hash = TRUE); - if (UNIV_LIKELY_NULL(watch_page)) { + if (watch_page != NULL) { /* Preserve the reference count. */ - ulint buf_fix_count = watch_page->buf_fix_count; + ib_uint32_t buf_fix_count; + + buf_fix_count = watch_page->buf_fix_count; + ut_a(buf_fix_count > 0); + +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_increment_uint32( + &bpage->buf_fix_count, buf_fix_count); +#else bpage->buf_fix_count += buf_fix_count; +#endif /* PAGE_ATOMIC_REF_COUNT */ + ut_ad(buf_pool_watch_is_sentinel(buf_pool, watch_page)); buf_pool_watch_remove(buf_pool, fold, watch_page); } @@ -3649,8 +3778,7 @@ buf_page_create( buf_block_free(free_block); - return(buf_page_get_with_no_latch(space, zip_size, - offset, mtr)); + return(buf_page_get_with_no_latch(space, zip_size, offset, mtr)); } /* If we get here, the page was not in buf_pool: init it there */ diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index 2b2483fde6d..e1018c89e9b 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -351,13 +351,12 @@ At a database startup initializes the doublewrite buffer memory structure if we already have a doublewrite buffer created in the data files. If we are upgrading to an InnoDB version which supports multiple tablespaces, then this function performs the necessary update operations. If we are in a crash -recovery, this function uses a possible doublewrite buffer to restore -half-written pages in the data files. */ +recovery, this function loads the pages from double write buffer into memory. */ UNIV_INTERN void -buf_dblwr_init_or_restore_pages( -/*============================*/ - ibool restore_corrupt_pages) /*!< in: TRUE=restore pages */ +buf_dblwr_init_or_load_pages( +/*==========================*/ + bool load_corrupt_pages) { byte* buf; byte* read_buf; @@ -368,8 +367,8 @@ buf_dblwr_init_or_restore_pages( ibool reset_space_ids = FALSE; byte* doublewrite; ulint space_id; - ulint page_no; ulint i; + recv_dblwr_t& recv_dblwr = recv_sys->dblwr; /* We do the file i/o past the buffer pool */ @@ -431,13 +430,12 @@ buf_dblwr_init_or_restore_pages( for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) { ulint source_page_no; - page_no = mach_read_from_4(page + FIL_PAGE_OFFSET); if (reset_space_ids) { space_id = 0; mach_write_to_4(page - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0); + + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id); /* We do not need to calculate new checksums for the pages because the field .._SPACE_ID does not affect them. Write the page back to where we read it from. */ @@ -449,19 +447,50 @@ buf_dblwr_init_or_restore_pages( + i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; } - fil_io(OS_FILE_WRITE, true, 0, 0, source_page_no, 0, + fil_io(OS_FILE_WRITE, true, space_id, 0, source_page_no, 0, UNIV_PAGE_SIZE, page, NULL); - } else { - space_id = mach_read_from_4( - page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + } else if (load_corrupt_pages) { + + recv_dblwr.add(page); } - if (!restore_corrupt_pages) { - /* The database was shut down gracefully: no need to - restore pages */ + page += UNIV_PAGE_SIZE; + } + + fil_flush_file_spaces(FIL_TABLESPACE); + +leave_func: + ut_free(unaligned_read_buf); +} + +/****************************************************************//** +Process the double write buffer pages. */ +void +buf_dblwr_process() +/*===============*/ +{ + ulint space_id; + ulint page_no; + ulint page_no_dblwr = 0; + byte* page; + byte* read_buf; + byte* unaligned_read_buf; + recv_dblwr_t& recv_dblwr = recv_sys->dblwr; + + unaligned_read_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); + + read_buf = static_cast<byte*>( + ut_align(unaligned_read_buf, UNIV_PAGE_SIZE)); + + for (std::list<byte*>::iterator i = recv_dblwr.pages.begin(); + i != recv_dblwr.pages.end(); ++i, ++page_no_dblwr ) { - } else if (!fil_tablespace_exists_in_mem(space_id)) { + page = *i; + page_no = mach_read_from_4(page + FIL_PAGE_OFFSET); + space_id = mach_read_from_4(page + FIL_PAGE_SPACE_ID); + + if (!fil_tablespace_exists_in_mem(space_id)) { /* Maybe we have dropped the single-table tablespace and this page once belonged to it: do nothing */ @@ -472,19 +501,8 @@ buf_dblwr_init_or_restore_pages( "within space bounds; space id %lu " "page number %lu, page %lu in " "doublewrite buf.", - (ulong) space_id, (ulong) page_no, (ulong) i); - - } else if (space_id == TRX_SYS_SPACE - && ((page_no >= block1 - && page_no - < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) - || (page_no >= block2 - && page_no - < (block2 - + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)))) { - - /* It is an unwritten doublewrite buffer page: - do nothing */ + (ulong) space_id, (ulong) page_no, + page_no_dblwr); } else { ulint zip_size = fil_space_get_zip_size(space_id); @@ -551,14 +569,11 @@ buf_dblwr_init_or_restore_pages( " the doublewrite buffer."); } } - - page += UNIV_PAGE_SIZE; } fil_flush_file_spaces(FIL_TABLESPACE); - -leave_func: ut_free(unaligned_read_buf); + recv_dblwr.pages.clear(); } /****************************************************************//** @@ -776,6 +791,7 @@ buf_dblwr_write_block_to_datafile( fil_io(flags, sync, buf_block_get_space(block), 0, buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE, (void*) block->frame, (void*) block); + } /********************************************************************//** diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc index 3af434b77f4..0fa5c744e51 100644 --- a/storage/innobase/buf/buf0flu.cc +++ b/storage/innobase/buf/buf0flu.cc @@ -502,15 +502,15 @@ buf_flush_ready_for_replace( #ifdef UNIV_DEBUG buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); ut_ad(buf_pool_mutex_own(buf_pool)); -#endif +#endif /* UNIV_DEBUG */ ut_ad(mutex_own(buf_page_get_mutex(bpage))); ut_ad(bpage->in_LRU_list); - if (UNIV_LIKELY(buf_page_in_file(bpage))) { + if (buf_page_in_file(bpage)) { return(bpage->oldest_modification == 0 - && buf_page_get_io_fix(bpage) == BUF_IO_NONE - && bpage->buf_fix_count == 0); + && bpage->buf_fix_count == 0 + && buf_page_get_io_fix(bpage) == BUF_IO_NONE); } ut_print_timestamp(stderr); @@ -553,17 +553,10 @@ buf_flush_ready_for_flush( switch (flush_type) { case BUF_FLUSH_LIST: - return(true); - case BUF_FLUSH_LRU: case BUF_FLUSH_SINGLE_PAGE: - /* Because any thread may call single page flush, even - when owning locks on pages, to avoid deadlocks, we must - make sure that the that it is not buffer fixed. - The same holds true for LRU flush because a user thread - may end up waiting for an LRU flush to end while - holding locks on other pages. */ - return(bpage->buf_fix_count == 0); + return(true); + case BUF_FLUSH_N_TYPES: break; } @@ -991,9 +984,10 @@ NOTE: in simulated aio we must call os_aio_simulated_wake_handler_threads after we have posted a batch of writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be held upon entering this function, and they will be released by this -function. */ +function if it returns true. +@return TRUE if the page was flushed */ UNIV_INTERN -void +bool buf_flush_page( /*===========*/ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ @@ -1001,109 +995,84 @@ buf_flush_page( buf_flush_t flush_type, /*!< in: type of flush */ bool sync) /*!< in: true if sync IO request */ { - ib_mutex_t* block_mutex; - ibool is_uncompressed; - ut_ad(flush_type < BUF_FLUSH_N_TYPES); ut_ad(buf_pool_mutex_own(buf_pool)); ut_ad(buf_page_in_file(bpage)); ut_ad(!sync || flush_type == BUF_FLUSH_SINGLE_PAGE); - block_mutex = buf_page_get_mutex(bpage); + ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); + ut_ad(mutex_own(block_mutex)); ut_ad(buf_flush_ready_for_flush(bpage, flush_type)); - buf_page_set_io_fix(bpage, BUF_IO_WRITE); + bool is_uncompressed; - buf_page_set_flush_type(bpage, flush_type); + is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); + ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex)); - if (buf_pool->n_flush[flush_type] == 0) { + ibool flush; + rw_lock_t* rw_lock; + bool no_fix_count = bpage->buf_fix_count == 0; - os_event_reset(buf_pool->no_flush[flush_type]); - } + if (!is_uncompressed) { + flush = TRUE; + rw_lock = NULL; - buf_pool->n_flush[flush_type]++; + } else if (!(no_fix_count || flush_type == BUF_FLUSH_LIST)) { + /* This is a heuristic, to avoid expensive S attempts. */ + flush = FALSE; + } else { - is_uncompressed = (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); - ut_ad(is_uncompressed == (block_mutex != &buf_pool->zip_mutex)); + rw_lock = &reinterpret_cast<buf_block_t*>(bpage)->lock; - switch (flush_type) { - ibool is_s_latched; - case BUF_FLUSH_LIST: - /* If the simulated aio thread is not running, we must - not wait for any latch, as we may end up in a deadlock: - if buf_fix_count == 0, then we know we need not wait */ - - is_s_latched = (bpage->buf_fix_count == 0); - if (is_s_latched && is_uncompressed) { - rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock, - BUF_IO_WRITE); + if (flush_type != BUF_FLUSH_LIST) { + flush = rw_lock_s_lock_gen_nowait( + rw_lock, BUF_IO_WRITE); + } else { + /* Will S lock later */ + flush = TRUE; } + } - mutex_exit(block_mutex); - buf_pool_mutex_exit(buf_pool); + if (flush) { - /* Even though bpage is not protected by any mutex at - this point, it is safe to access bpage, because it is - io_fixed and oldest_modification != 0. Thus, it - cannot be relocated in the buffer pool or removed from - flush_list or LRU_list. */ + /* We are committed to flushing by the time we get here */ - if (!is_s_latched) { - buf_dblwr_flush_buffered_writes(); + buf_page_set_io_fix(bpage, BUF_IO_WRITE); - if (is_uncompressed) { - rw_lock_s_lock_gen(&((buf_block_t*) bpage) - ->lock, BUF_IO_WRITE); - } - } + buf_page_set_flush_type(bpage, flush_type); - break; + if (buf_pool->n_flush[flush_type] == 0) { - case BUF_FLUSH_LRU: - case BUF_FLUSH_SINGLE_PAGE: - /* VERY IMPORTANT: - Because any thread may call single page flush, even when - owning locks on pages, to avoid deadlocks, we must make - sure that the s-lock is acquired on the page without - waiting: this is accomplished because - buf_flush_ready_for_flush() must hold, and that requires - the page not to be bufferfixed. - The same holds true for LRU flush because a user thread - may end up waiting for an LRU flush to end while - holding locks on other pages. */ - - if (is_uncompressed) { - rw_lock_s_lock_gen(&((buf_block_t*) bpage)->lock, - BUF_IO_WRITE); + os_event_reset(buf_pool->no_flush[flush_type]); } - /* Note that the s-latch is acquired before releasing the - buf_pool mutex: this ensures that the latch is acquired - immediately. */ + ++buf_pool->n_flush[flush_type]; mutex_exit(block_mutex); buf_pool_mutex_exit(buf_pool); - break; - default: - ut_error; - } + if (flush_type == BUF_FLUSH_LIST + && is_uncompressed + && !rw_lock_s_lock_gen_nowait(rw_lock, BUF_IO_WRITE)) { + /* avoiding deadlock possibility involves doublewrite + buffer, should flush it, because it might hold the + another block->lock. */ + buf_dblwr_flush_buffered_writes(); - /* Even though bpage is not protected by any mutex at this - point, it is safe to access bpage, because it is io_fixed and - oldest_modification != 0. Thus, it cannot be relocated in the - buffer pool or removed from flush_list or LRU_list. */ + rw_lock_s_lock_gen(rw_lock, BUF_IO_WRITE); + } -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, - "Flushing %u space %u page %u\n", - flush_type, bpage->space, bpage->offset); - } -#endif /* UNIV_DEBUG */ - buf_flush_write_block_low(bpage, flush_type, sync); + /* Even though bpage is not protected by any mutex at this + point, it is safe to access bpage, because it is io_fixed and + oldest_modification != 0. Thus, it cannot be relocated in the + buffer pool or removed from flush_list or LRU_list. */ + + buf_flush_write_block_low(bpage, flush_type, sync); + } + + return(flush); } # if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG @@ -1130,8 +1099,8 @@ buf_flush_page_try( /* The following call will release the buffer pool and block mutex. */ - buf_flush_page(buf_pool, &block->page, BUF_FLUSH_SINGLE_PAGE, true); - return(TRUE); + return(buf_flush_page( + buf_pool, &block->page, BUF_FLUSH_SINGLE_PAGE, true)); } # endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ /***********************************************************//** @@ -1203,7 +1172,6 @@ buf_flush_try_neighbors( ulint i; ulint low; ulint high; - ulint count = 0; buf_pool_t* buf_pool = buf_pool_get(space, offset); ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST); @@ -1259,9 +1227,9 @@ buf_flush_try_neighbors( high = fil_space_get_size(space); } - for (i = low; i < high; i++) { + ulint count = 0; - buf_page_t* bpage; + for (i = low; i < high; i++) { if ((count + n_flushed) >= n_to_flush) { @@ -1283,9 +1251,9 @@ buf_flush_try_neighbors( buf_pool_mutex_enter(buf_pool); /* We only want to flush pages from this buffer pool. */ - bpage = buf_page_hash_get(buf_pool, space, i); + buf_page_t* bpage = buf_page_hash_get(buf_pool, space, i); - if (!bpage) { + if (bpage == NULL) { buf_pool_mutex_exit(buf_pool); continue; @@ -1299,30 +1267,24 @@ buf_flush_try_neighbors( if (flush_type != BUF_FLUSH_LRU || i == offset || buf_page_is_old(bpage)) { + ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); mutex_enter(block_mutex); if (buf_flush_ready_for_flush(bpage, flush_type) - && (i == offset || !bpage->buf_fix_count)) { - /* We only try to flush those - neighbors != offset where the buf fix - count is zero, as we then know that we - probably can latch the page without a - semaphore wait. Semaphore waits are - expensive because we must flush the - doublewrite buffer before we start - waiting. */ - - buf_flush_page(buf_pool, bpage, flush_type, false); - ut_ad(!mutex_own(block_mutex)); - ut_ad(!buf_pool_mutex_own(buf_pool)); - count++; + && (i == offset || bpage->buf_fix_count == 0) + && buf_flush_page( + buf_pool, bpage, flush_type, false)) { + + ++count; + continue; - } else { - mutex_exit(block_mutex); } + + mutex_exit(block_mutex); } + buf_pool_mutex_exit(buf_pool); } @@ -1358,8 +1320,8 @@ buf_flush_page_and_try_neighbors( ulint* count) /*!< in/out: number of pages flushed */ { + ibool flushed; ib_mutex_t* block_mutex; - ibool flushed = FALSE; #ifdef UNIV_DEBUG buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); #endif /* UNIV_DEBUG */ @@ -1372,8 +1334,6 @@ buf_flush_page_and_try_neighbors( ut_a(buf_page_in_file(bpage)); if (buf_flush_ready_for_flush(bpage, flush_type)) { - ulint space; - ulint offset; buf_pool_t* buf_pool; buf_pool = buf_pool_from_bpage(bpage); @@ -1382,22 +1342,22 @@ buf_flush_page_and_try_neighbors( /* These fields are protected by both the buffer pool mutex and block mutex. */ - space = buf_page_get_space(bpage); - offset = buf_page_get_page_no(bpage); + ulint space = buf_page_get_space(bpage); + ulint offset = buf_page_get_page_no(bpage); mutex_exit(block_mutex); /* Try to flush also all the neighbors */ - *count += buf_flush_try_neighbors(space, - offset, - flush_type, - *count, - n_to_flush); + *count += buf_flush_try_neighbors( + space, offset, flush_type, *count, n_to_flush); buf_pool_mutex_enter(buf_pool); + flushed = TRUE; + } else { mutex_exit(block_mutex); + flushed = FALSE; } ut_ad(buf_pool_mutex_own(buf_pool)); @@ -1480,8 +1440,8 @@ buf_flush_LRU_list_batch( blocks in the free_list */ { buf_page_t* bpage; - ulint scanned = 0; ulint count = 0; + ulint scanned = 0; ulint free_len = UT_LIST_GET_LEN(buf_pool->free); ulint lru_len = UT_LIST_GET_LEN(buf_pool->LRU); @@ -1520,15 +1480,44 @@ buf_flush_LRU_list_batch( } else { bpage = UT_LIST_GET_PREV(LRU, bpage); } - } else if (buf_flush_page_and_try_neighbors( - bpage, - BUF_FLUSH_LRU, max, &count)) { - - /* buf_pool->mutex was released. - Restart the scan. */ - bpage = UT_LIST_GET_LAST(buf_pool->LRU); } else { - bpage = UT_LIST_GET_PREV(LRU, bpage); + ulint space; + ulint offset; + buf_page_t* prev_bpage; + + prev_bpage = UT_LIST_GET_PREV(LRU, bpage); + + /* Save the previous bpage */ + + if (prev_bpage != NULL) { + space = prev_bpage->space; + offset = prev_bpage->offset; + } else { + space = ULINT_UNDEFINED; + offset = ULINT_UNDEFINED; + } + + if (!buf_flush_page_and_try_neighbors( + bpage, BUF_FLUSH_LRU, max, &count)) { + + bpage = prev_bpage; + } else { + /* buf_pool->mutex was released. + reposition the iterator. Note: the + prev block could have been repositioned + too but that should be rare. */ + + if (prev_bpage != NULL) { + + ut_ad(space != ULINT_UNDEFINED); + ut_ad(offset != ULINT_UNDEFINED); + + prev_bpage = buf_page_hash_get( + buf_pool, space, offset); + } + + bpage = prev_bpage; + } } free_len = UT_LIST_GET_LEN(buf_pool->free); @@ -1836,7 +1825,7 @@ buf_flush_wait_batch_end( } } else { thd_wait_begin(NULL, THD_WAIT_DISKIO); - os_event_wait(buf_pool->no_flush[type]); + os_event_wait(buf_pool->no_flush[type]); thd_wait_end(NULL); } } @@ -1985,9 +1974,6 @@ buf_flush_single_page_from_LRU( { ulint scanned; buf_page_t* bpage; - ib_mutex_t* block_mutex; - ibool freed; - bool evict_zip; buf_pool_mutex_enter(buf_pool); @@ -1995,14 +1981,25 @@ buf_flush_single_page_from_LRU( bpage != NULL; bpage = UT_LIST_GET_PREV(LRU, bpage), ++scanned) { - block_mutex = buf_page_get_mutex(bpage); + ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); + mutex_enter(block_mutex); - if (buf_flush_ready_for_flush(bpage, - BUF_FLUSH_SINGLE_PAGE)) { - /* buf_flush_page() will release the block - mutex */ - break; + + if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE)) { + + /* The following call will release the buffer pool + and block mutex. */ + + ibool flushed = buf_flush_page( + buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, true); + + if (flushed) { + /* buf_flush_page() will release the + block mutex */ + break; + } } + mutex_exit(block_mutex); } @@ -2012,15 +2009,14 @@ buf_flush_single_page_from_LRU( MONITOR_LRU_SINGLE_FLUSH_SCANNED_PER_CALL, scanned); - if (!bpage) { + if (bpage == NULL) { /* Can't find a single flushable page. */ buf_pool_mutex_exit(buf_pool); return(FALSE); } - /* The following call will release the buffer pool and - block mutex. */ - buf_flush_page(buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, true); + + ibool freed = FALSE; /* At this point the page has been written to the disk. As we are not holding buffer pool or block mutex therefore @@ -2035,27 +2031,25 @@ buf_flush_single_page_from_LRU( bpage != NULL; bpage = UT_LIST_GET_PREV(LRU, bpage)) { - ibool ready; + ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - block_mutex = buf_page_get_mutex(bpage); mutex_enter(block_mutex); - ready = buf_flush_ready_for_replace(bpage); + + ibool ready = buf_flush_ready_for_replace(bpage); + mutex_exit(block_mutex); + if (ready) { - break; - } + bool evict_zip; - } + evict_zip = !buf_LRU_evict_from_unzip_LRU(buf_pool);; - if (!bpage) { - /* Can't find a single replaceable page. */ - buf_pool_mutex_exit(buf_pool); - return(FALSE); - } + freed = buf_LRU_free_page(bpage, evict_zip); - evict_zip = !buf_LRU_evict_from_unzip_LRU(buf_pool);; + break; + } + } - freed = buf_LRU_free_page(bpage, evict_zip); buf_pool_mutex_exit(buf_pool); return(freed); diff --git a/storage/innobase/buf/buf0lru.cc b/storage/innobase/buf/buf0lru.cc index bc73119c227..98d0ec2d2ec 100644 --- a/storage/innobase/buf/buf0lru.cc +++ b/storage/innobase/buf/buf0lru.cc @@ -473,12 +473,8 @@ buf_flush_or_remove_page( yet; maybe the system is currently reading it in, or flushing the modifications to the file */ return(false); - } - ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); - bool processed = false; - /* We have to release the flush_list_mutex to obey the latching order. We are however guaranteed that the page will stay in the flush_list and won't be relocated because @@ -487,6 +483,9 @@ buf_flush_or_remove_page( buf_flush_list_mutex_exit(buf_pool); + bool processed; + ib_mutex_t* block_mutex = buf_page_get_mutex(bpage); + mutex_enter(block_mutex); ut_ad(bpage->oldest_modification != 0); @@ -494,18 +493,11 @@ buf_flush_or_remove_page( if (!flush) { buf_flush_remove(bpage); - - mutex_exit(block_mutex); - processed = true; - } else if (buf_flush_ready_for_flush(bpage, - BUF_FLUSH_SINGLE_PAGE)) { - - /* The following call will release the buffer pool - and block mutex. */ - buf_flush_page(buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, false); - ut_ad(!mutex_own(block_mutex)); + } else if (buf_flush_ready_for_flush(bpage, BUF_FLUSH_SINGLE_PAGE) + && buf_flush_page( + buf_pool, bpage, BUF_FLUSH_SINGLE_PAGE, false)) { /* Wake possible simulated aio thread to actually post the writes to the operating system */ @@ -513,15 +505,16 @@ buf_flush_or_remove_page( buf_pool_mutex_enter(buf_pool); - processed = true; + buf_flush_list_mutex_enter(buf_pool); + + return(true); + } else { - /* Not ready for flush. It can't be IO fixed because we - checked for that at the start of the function. It must - be buffer fixed. */ - ut_ad(bpage->buf_fix_count > 0); - mutex_exit(block_mutex); + processed = false; } + mutex_exit(block_mutex); + buf_flush_list_mutex_enter(buf_pool); ut_ad(!mutex_own(block_mutex)); @@ -1684,8 +1677,6 @@ buf_LRU_add_block_low( { buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(buf_pool); - ut_ad(bpage); ut_ad(buf_pool_mutex_own(buf_pool)); ut_a(buf_page_in_file(bpage)); @@ -1835,7 +1826,7 @@ buf_LRU_free_page( if (!buf_page_can_relocate(bpage)) { - /* Do not free buffer-fixed or I/O-fixed blocks. */ + /* Do not free buffer fixed or I/O-fixed blocks. */ goto func_exit; } @@ -1850,12 +1841,10 @@ buf_LRU_free_page( if (bpage->oldest_modification) { goto func_exit; } - } else if ((bpage->oldest_modification) - && (buf_page_get_state(bpage) - != BUF_BLOCK_FILE_PAGE)) { + } else if (bpage->oldest_modification > 0 + && buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) { - ut_ad(buf_page_get_state(bpage) - == BUF_BLOCK_ZIP_DIRTY); + ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_DIRTY); func_exit: rw_lock_x_unlock(hash_lock); @@ -1915,10 +1904,8 @@ func_exit: rw_lock_x_lock(hash_lock); mutex_enter(block_mutex); - ut_a(!buf_page_hash_get_low(buf_pool, - bpage->space, - bpage->offset, - fold)); + ut_a(!buf_page_hash_get_low( + buf_pool, b->space, b->offset, fold)); b->state = b->oldest_modification ? BUF_BLOCK_ZIP_DIRTY @@ -2333,6 +2320,11 @@ buf_LRU_block_remove_hashed( UNIV_PAGE_SIZE); buf_page_set_state(bpage, BUF_BLOCK_REMOVE_HASH); + if (buf_pool->flush_rbt == NULL) { + bpage->space = ULINT32_UNDEFINED; + bpage->offset = ULINT32_UNDEFINED; + } + /* Question: If we release bpage and hash mutex here then what protects us against: 1) Some other thread buffer fixing this page diff --git a/storage/innobase/dict/dict0crea.cc b/storage/innobase/dict/dict0crea.cc index eba5417dc76..ff892749d4f 100644 --- a/storage/innobase/dict/dict0crea.cc +++ b/storage/innobase/dict/dict0crea.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -270,6 +270,12 @@ dict_build_table_def_step( thr_get_trx(thr)->table_id = table->id; + /* Always set this bit for all new created tables */ + DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME); + DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name", + DICT_TF2_FLAG_UNSET(table, + DICT_TF2_FTS_AUX_HEX_NAME);); + if (use_tablespace) { /* This table will not use the system tablespace. Get a new space id. */ diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index d16296179b0..bbae608efdb 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -121,19 +121,6 @@ UNIV_INTERN mysql_pfs_key_t dict_foreign_err_mutex_key; /** Identifies generated InnoDB foreign key names */ static char dict_ibfk[] = "_ibfk_"; -/** array of rw locks protecting -dict_table_t::stat_initialized -dict_table_t::stat_n_rows (*) -dict_table_t::stat_clustered_index_size -dict_table_t::stat_sum_of_other_index_sizes -dict_table_t::stat_modified_counter (*) -dict_table_t::indexes*::stat_n_diff_key_vals[] -dict_table_t::indexes*::stat_index_size -dict_table_t::indexes*::stat_n_leaf_pages -(*) those are not always protected for performance reasons */ -#define DICT_TABLE_STATS_LATCHES_SIZE 64 -static rw_lock_t dict_table_stats_latches[DICT_TABLE_STATS_LATCHES_SIZE]; - /*******************************************************************//** Tries to find column names for the index and sets the col field of the index. @@ -332,32 +319,31 @@ dict_mutex_exit_for_mysql(void) mutex_exit(&(dict_sys->mutex)); } -/** Get the latch that protects the stats of a given table */ -#define GET_TABLE_STATS_LATCH(table) \ - (&dict_table_stats_latches[ut_fold_ull((ib_uint64_t) table) \ - % DICT_TABLE_STATS_LATCHES_SIZE]) - /**********************************************************************//** -Lock the appropriate latch to protect a given table's statistics. -table->id is used to pick the corresponding latch from a global array of -latches. */ +Lock the appropriate latch to protect a given table's statistics. */ UNIV_INTERN void dict_table_stats_lock( /*==================*/ - const dict_table_t* table, /*!< in: table */ - ulint latch_mode) /*!< in: RW_S_LATCH or - RW_X_LATCH */ + dict_table_t* table, /*!< in: table */ + ulint latch_mode) /*!< in: RW_S_LATCH or RW_X_LATCH */ { ut_ad(table != NULL); ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + if (table->stats_latch == NULL) { + /* This is a dummy table object that is private in the current + thread and is not shared between multiple threads, thus we + skip any locking. */ + return; + } + switch (latch_mode) { case RW_S_LATCH: - rw_lock_s_lock(GET_TABLE_STATS_LATCH(table)); + rw_lock_s_lock(table->stats_latch); break; case RW_X_LATCH: - rw_lock_x_lock(GET_TABLE_STATS_LATCH(table)); + rw_lock_x_lock(table->stats_latch); break; case RW_NO_LATCH: /* fall through */ @@ -372,19 +358,26 @@ UNIV_INTERN void dict_table_stats_unlock( /*====================*/ - const dict_table_t* table, /*!< in: table */ - ulint latch_mode) /*!< in: RW_S_LATCH or + dict_table_t* table, /*!< in: table */ + ulint latch_mode) /*!< in: RW_S_LATCH or RW_X_LATCH */ { ut_ad(table != NULL); ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + if (table->stats_latch == NULL) { + /* This is a dummy table object that is private in the current + thread and is not shared between multiple threads, thus we + skip any locking. */ + return; + } + switch (latch_mode) { case RW_S_LATCH: - rw_lock_s_unlock(GET_TABLE_STATS_LATCH(table)); + rw_lock_s_unlock(table->stats_latch); break; case RW_X_LATCH: - rw_lock_x_unlock(GET_TABLE_STATS_LATCH(table)); + rw_lock_x_unlock(table->stats_latch); break; case RW_NO_LATCH: /* fall through */ @@ -880,8 +873,6 @@ void dict_init(void) /*===========*/ { - int i; - dict_sys = static_cast<dict_sys_t*>(mem_zalloc(sizeof(*dict_sys))); mutex_create(dict_sys_mutex_key, &dict_sys->mutex, SYNC_DICT); @@ -902,11 +893,6 @@ dict_init(void) mutex_create(dict_foreign_err_mutex_key, &dict_foreign_err_mutex, SYNC_NO_ORDER_CHECK); } - - for (i = 0; i < DICT_TABLE_STATS_LATCHES_SIZE; i++) { - rw_lock_create(dict_table_stats_latch_key, - &dict_table_stats_latches[i], SYNC_INDEX_TREE); - } } /**********************************************************************//** @@ -5968,6 +5954,17 @@ dict_table_check_for_dup_indexes( } #endif /* UNIV_DEBUG */ +/** Auxiliary macro used inside dict_table_schema_check(). */ +#define CREATE_TYPES_NAMES() \ + dtype_sql_name((unsigned) req_schema->columns[i].mtype, \ + (unsigned) req_schema->columns[i].prtype_mask, \ + (unsigned) req_schema->columns[i].len, \ + req_type, sizeof(req_type)); \ + dtype_sql_name(table->cols[j].mtype, \ + table->cols[j].prtype, \ + table->cols[j].len, \ + actual_type, sizeof(actual_type)) + /*********************************************************************//** Checks whether a table exists and whether it has the given structure. The table must have the same number of columns with the same names and @@ -5987,6 +5984,8 @@ dict_table_schema_check( size_t errstr_sz) /*!< in: errstr size */ { char buf[MAX_FULL_NAME_LEN]; + char req_type[64]; + char actual_type[64]; dict_table_t* table; ulint i; @@ -6038,9 +6037,6 @@ dict_table_schema_check( for (i = 0; i < req_schema->n_cols; i++) { ulint j; - char req_type[64]; - char actual_type[64]; - /* check if i'th column is the same in both arrays */ if (innobase_strcasecmp(req_schema->columns[i].name, dict_table_get_col_name(table, i)) == 0) { @@ -6082,19 +6078,11 @@ dict_table_schema_check( /* we found a column with the same name on j'th position, compare column types and flags */ - dtype_sql_name(req_schema->columns[i].mtype, - req_schema->columns[i].prtype_mask, - req_schema->columns[i].len, - req_type, sizeof(req_type)); - - dtype_sql_name(table->cols[j].mtype, - table->cols[j].prtype, - table->cols[j].len, - actual_type, sizeof(actual_type)); - /* check length for exact match */ if (req_schema->columns[i].len != table->cols[j].len) { + CREATE_TYPES_NAMES(); + ut_snprintf(errstr, errstr_sz, "Column %s in table %s is %s " "but should be %s (length mismatch).", @@ -6117,6 +6105,8 @@ dict_table_schema_check( !(req_schema->columns[i].mtype == DATA_INT && table->cols[j].mtype == DATA_FIXBINARY)) { + CREATE_TYPES_NAMES(); + ut_snprintf(errstr, errstr_sz, "Column %s in table %s is %s " "but should be %s (type mismatch).", @@ -6134,6 +6124,8 @@ dict_table_schema_check( & req_schema->columns[i].prtype_mask) != req_schema->columns[i].prtype_mask) { + CREATE_TYPES_NAMES(); + ut_snprintf(errstr, errstr_sz, "Column %s in table %s is %s " "but should be %s (flags mismatch).", @@ -6292,10 +6284,6 @@ dict_close(void) mem_free(dict_sys); dict_sys = NULL; - - for (i = 0; i < DICT_TABLE_STATS_LATCHES_SIZE; i++) { - rw_lock_free(&dict_table_stats_latches[i]); - } } #ifdef UNIV_DEBUG diff --git a/storage/innobase/dict/dict0load.cc b/storage/innobase/dict/dict0load.cc index 81ab765b524..c8defc1d021 100644 --- a/storage/innobase/dict/dict0load.cc +++ b/storage/innobase/dict/dict0load.cc @@ -1092,10 +1092,34 @@ loop: case DICT_CHECK_ALL_LOADED: /* All tablespaces should have been found in fil_load_single_table_tablespaces(). */ - - fil_space_for_table_exists_in_mem( + if (fil_space_for_table_exists_in_mem( space_id, name, TRUE, !(is_temp || discarded), - false, NULL, 0); + false, NULL, 0) + && !(is_temp || discarded)) { + /* If user changes the path of .ibd files in + *.isl files before doing crash recovery , + then this leads to inconsistency in + SYS_DATAFILES system table because the + tables are loaded from the updated path + but the SYS_DATAFILES still points to the + old path.Therefore after crash recovery + update SYS_DATAFILES with the updated path.*/ + ut_ad(space_id); + ut_ad(recv_needed_recovery); + char *dict_path = dict_get_first_path(space_id, + name); + char *remote_path = fil_read_link_file(name); + if(dict_path && remote_path) { + if(strcmp(dict_path,remote_path)) { + dict_update_filepath(space_id, + remote_path); + } + } + if(dict_path) + mem_free(dict_path); + if(remote_path) + mem_free(remote_path); + } break; case DICT_CHECK_SOME_LOADED: diff --git a/storage/innobase/dict/dict0mem.cc b/storage/innobase/dict/dict0mem.cc index 565a46b1832..60daeea3a96 100644 --- a/storage/innobase/dict/dict0mem.cc +++ b/storage/innobase/dict/dict0mem.cc @@ -95,6 +95,10 @@ dict_mem_table_create( ut_d(table->magic_n = DICT_TABLE_MAGIC_N); + table->stats_latch = new rw_lock_t; + rw_lock_create(dict_table_stats_latch_key, table->stats_latch, + SYNC_INDEX_TREE); + #ifndef UNIV_HOTBACKUP table->autoinc_lock = static_cast<ib_lock_t*>( mem_heap_alloc(heap, lock_get_size())); @@ -149,6 +153,10 @@ dict_mem_table_free( #ifndef UNIV_HOTBACKUP mutex_free(&(table->autoinc_mutex)); #endif /* UNIV_HOTBACKUP */ + + rw_lock_free(table->stats_latch); + delete table->stats_latch; + ut_free(table->name); mem_heap_free(table->heap); } diff --git a/storage/innobase/dict/dict0stats.cc b/storage/innobase/dict/dict0stats.cc index 8bf02f9785c..68c02a301cd 100644 --- a/storage/innobase/dict/dict0stats.cc +++ b/storage/innobase/dict/dict0stats.cc @@ -400,6 +400,11 @@ dict_stats_table_clone_create( t->corrupted = table->corrupted; + /* This private object "t" is not shared with other threads, so + we do not need the stats_latch. The lock/unlock routines will do + nothing if stats_latch is NULL. */ + t->stats_latch = NULL; + UT_LIST_INIT(t->indexes); for (index = dict_table_get_first_index(table); @@ -731,7 +736,7 @@ static dict_table_t* dict_stats_snapshot_create( /*=======================*/ - const dict_table_t* table) /*!< in: table whose stats to copy */ + dict_table_t* table) /*!< in: table whose stats to copy */ { mutex_enter(&dict_sys->mutex); @@ -2131,8 +2136,16 @@ dict_stats_save_index_stat( ret = dict_stats_exec_sql( pinfo, - "PROCEDURE INDEX_STATS_SAVE_INSERT () IS\n" + "PROCEDURE INDEX_STATS_SAVE () IS\n" "BEGIN\n" + + "DELETE FROM \"" INDEX_STATS_NAME "\"\n" + "WHERE\n" + "database_name = :database_name AND\n" + "table_name = :table_name AND\n" + "index_name = :index_name AND\n" + "stat_name = :stat_name;\n" + "INSERT INTO \"" INDEX_STATS_NAME "\"\n" "VALUES\n" "(\n" @@ -2147,47 +2160,6 @@ dict_stats_save_index_stat( ");\n" "END;"); - if (ret == DB_DUPLICATE_KEY) { - - pinfo = pars_info_create(); - pars_info_add_str_literal(pinfo, "database_name", db_utf8); - pars_info_add_str_literal(pinfo, "table_name", table_utf8); - UNIV_MEM_ASSERT_RW_ABORT(index->name, strlen(index->name)); - pars_info_add_str_literal(pinfo, "index_name", index->name); - UNIV_MEM_ASSERT_RW_ABORT(&last_update, 4); - pars_info_add_int4_literal(pinfo, "last_update", last_update); - UNIV_MEM_ASSERT_RW_ABORT(stat_name, strlen(stat_name)); - pars_info_add_str_literal(pinfo, "stat_name", stat_name); - UNIV_MEM_ASSERT_RW_ABORT(&stat_value, 8); - pars_info_add_ull_literal(pinfo, "stat_value", stat_value); - if (sample_size != NULL) { - UNIV_MEM_ASSERT_RW_ABORT(sample_size, 8); - pars_info_add_ull_literal(pinfo, "sample_size", *sample_size); - } else { - pars_info_add_literal(pinfo, "sample_size", NULL, - UNIV_SQL_NULL, DATA_FIXBINARY, 0); - } - UNIV_MEM_ASSERT_RW_ABORT(stat_description, strlen(stat_description)); - pars_info_add_str_literal(pinfo, "stat_description", - stat_description); - - ret = dict_stats_exec_sql( - pinfo, - "PROCEDURE INDEX_STATS_SAVE_UPDATE () IS\n" - "BEGIN\n" - "UPDATE \"" INDEX_STATS_NAME "\" SET\n" - "last_update = :last_update,\n" - "stat_value = :stat_value,\n" - "sample_size = :sample_size,\n" - "stat_description = :stat_description\n" - "WHERE\n" - "database_name = :database_name AND\n" - "table_name = :table_name AND\n" - "index_name = :index_name AND\n" - "stat_name = :stat_name;\n" - "END;"); - } - if (ret != DB_SUCCESS) { char buf_table[MAX_FULL_NAME_LEN]; char buf_index[MAX_FULL_NAME_LEN]; @@ -2205,14 +2177,18 @@ dict_stats_save_index_stat( return(ret); } -/*********************************************************************//** -Save the table's statistics into the persistent statistics storage. +/** Save the table's statistics into the persistent statistics storage. +@param[in] table_orig table whose stats to save +@param[in] only_for_index if this is non-NULL, then stats for indexes +that are not equal to it will not be saved, if NULL, then all +indexes' stats are saved @return DB_SUCCESS or error code */ static dberr_t dict_stats_save( /*============*/ - dict_table_t* table_orig) /*!< in: table */ + dict_table_t* table_orig, + const index_id_t* only_for_index) { pars_info_t* pinfo; lint now; @@ -2234,26 +2210,27 @@ dict_stats_save( lint */ now = (lint) ut_time(); -#define PREPARE_PINFO_FOR_TABLE_SAVE(p, t, n) \ - do { \ - pars_info_add_str_literal((p), "database_name", db_utf8); \ - pars_info_add_str_literal((p), "table_name", table_utf8); \ - pars_info_add_int4_literal((p), "last_update", (n)); \ - pars_info_add_ull_literal((p), "n_rows", (t)->stat_n_rows); \ - pars_info_add_ull_literal((p), "clustered_index_size", \ - (t)->stat_clustered_index_size); \ - pars_info_add_ull_literal((p), "sum_of_other_index_sizes", \ - (t)->stat_sum_of_other_index_sizes); \ - } while(false); - pinfo = pars_info_create(); - PREPARE_PINFO_FOR_TABLE_SAVE(pinfo, table, now); + pars_info_add_str_literal(pinfo, "database_name", db_utf8); + pars_info_add_str_literal(pinfo, "table_name", table_utf8); + pars_info_add_int4_literal(pinfo, "last_update", now); + pars_info_add_ull_literal(pinfo, "n_rows", table->stat_n_rows); + pars_info_add_ull_literal(pinfo, "clustered_index_size", + table->stat_clustered_index_size); + pars_info_add_ull_literal(pinfo, "sum_of_other_index_sizes", + table->stat_sum_of_other_index_sizes); ret = dict_stats_exec_sql( pinfo, - "PROCEDURE TABLE_STATS_SAVE_INSERT () IS\n" + "PROCEDURE TABLE_STATS_SAVE () IS\n" "BEGIN\n" + + "DELETE FROM \"" TABLE_STATS_NAME "\"\n" + "WHERE\n" + "database_name = :database_name AND\n" + "table_name = :table_name;\n" + "INSERT INTO \"" TABLE_STATS_NAME "\"\n" "VALUES\n" "(\n" @@ -2266,27 +2243,6 @@ dict_stats_save( ");\n" "END;"); - if (ret == DB_DUPLICATE_KEY) { - pinfo = pars_info_create(); - - PREPARE_PINFO_FOR_TABLE_SAVE(pinfo, table, now); - - ret = dict_stats_exec_sql( - pinfo, - "PROCEDURE TABLE_STATS_SAVE_UPDATE () IS\n" - "BEGIN\n" - "UPDATE \"" TABLE_STATS_NAME "\" SET\n" - "last_update = :last_update,\n" - "n_rows = :n_rows,\n" - "clustered_index_size = :clustered_index_size,\n" - "sum_of_other_index_sizes = " - " :sum_of_other_index_sizes\n" - "WHERE\n" - "database_name = :database_name AND\n" - "table_name = :table_name;\n" - "END;"); - } - if (ret != DB_SUCCESS) { char buf[MAX_FULL_NAME_LEN]; ut_print_timestamp(stderr); @@ -2304,6 +2260,10 @@ dict_stats_save( index != NULL; index = dict_table_get_next_index(index)) { + if (only_for_index != NULL && index->id != *only_for_index) { + continue; + } + if (dict_stats_should_ignore_index(index)) { continue; } @@ -2860,7 +2820,7 @@ dict_stats_update_for_index( dict_table_stats_lock(index->table, RW_X_LATCH); dict_stats_analyze_index(index); dict_table_stats_unlock(index->table, RW_X_LATCH); - dict_stats_save(index->table); + dict_stats_save(index->table, &index->id); DBUG_VOID_RETURN; } /* else */ @@ -2955,7 +2915,7 @@ dict_stats_update( return(err); } - err = dict_stats_save(table); + err = dict_stats_save(table, NULL); return(err); } @@ -2988,7 +2948,7 @@ dict_stats_update( if (dict_stats_persistent_storage_check(false)) { - return(dict_stats_save(table)); + return(dict_stats_save(table, NULL)); } return(DB_STATS_DO_NOT_EXIST); @@ -3834,7 +3794,7 @@ test_dict_stats_save() index2_stat_n_sample_sizes[2] = TEST_IDX2_N_DIFF3_SAMPLE_SIZE; index2_stat_n_sample_sizes[3] = TEST_IDX2_N_DIFF4_SAMPLE_SIZE; - ret = dict_stats_save(&table); + ret = dict_stats_save(&table, NULL); ut_a(ret == DB_SUCCESS); diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 11b28d78f21..6254b3b7e9f 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -2383,27 +2383,21 @@ fil_op_log_parse_or_replay( break; case MLOG_FILE_RENAME: - /* We do the rename based on space id, not old file name; - this should guarantee that after the log replay each .ibd file - has the correct name for the latest log sequence number; the - proof is left as an exercise :) */ - - if (fil_tablespace_exists_in_mem(space_id)) { + /* In order to replay the rename, the following must hold: + * The new name is not already used. + * A tablespace is open in memory with the old name. + * The space ID for that tablepace matches this log entry. + This will prevent unintended renames during recovery. */ + + if (fil_get_space_id_for_table(new_name) == ULINT_UNDEFINED + && space_id == fil_get_space_id_for_table(name)) { /* Create the database directory for the new name, if it does not exist yet */ fil_create_directory_for_tablename(new_name); - /* Rename the table if there is not yet a tablespace - with the same name */ - - if (fil_get_space_id_for_table(new_name) - == ULINT_UNDEFINED) { - /* We do not care about the old name, that - is why we pass NULL as the first argument. */ - if (!fil_rename_tablespace(NULL, space_id, - new_name, NULL)) { - ut_error; - } + if (!fil_rename_tablespace(name, space_id, + new_name, NULL)) { + ut_error; } } @@ -4021,6 +4015,175 @@ fil_make_ibbackup_old_name( } #endif /* UNIV_HOTBACKUP */ + +/*******************************************************************//** +Determine the space id of the given file descriptor by reading a few +pages from the beginning of the .ibd file. +@return true if space id was successfully identified, or false. */ +static +bool +fil_user_tablespace_find_space_id( +/*==============================*/ + fsp_open_info* fsp) /* in/out: contains file descriptor, which is + used as input. contains space_id, which is + the output */ +{ + bool st; + os_offset_t file_size; + + file_size = os_file_get_size(fsp->file); + + if (file_size == (os_offset_t) -1) { + ib_logf(IB_LOG_LEVEL_ERROR, "Could not get file size: %s", + fsp->filepath); + return(false); + } + + /* Assuming a page size, read the space_id from each page and store it + in a map. Find out which space_id is agreed on by majority of the + pages. Choose that space_id. */ + for (ulint page_size = UNIV_ZIP_SIZE_MIN; + page_size <= UNIV_PAGE_SIZE_MAX; page_size <<= 1) { + + /* map[space_id] = count of pages */ + std::map<ulint, ulint> verify; + + ulint page_count = 64; + ulint valid_pages = 0; + + /* Adjust the number of pages to analyze based on file size */ + while ((page_count * page_size) > file_size) { + --page_count; + } + + ib_logf(IB_LOG_LEVEL_INFO, "Page size:%lu Pages to analyze:" + "%lu", page_size, page_count); + + byte* buf = static_cast<byte*>(ut_malloc(2*page_size)); + byte* page = static_cast<byte*>(ut_align(buf, page_size)); + + for (ulint j = 0; j < page_count; ++j) { + + st = os_file_read(fsp->file, page, (j* page_size), page_size); + + if (!st) { + ib_logf(IB_LOG_LEVEL_INFO, + "READ FAIL: page_no:%lu", j); + continue; + } + + bool uncompressed_ok = false; + + /* For uncompressed pages, the page size must be equal + to UNIV_PAGE_SIZE. */ + if (page_size == UNIV_PAGE_SIZE) { + uncompressed_ok = !buf_page_is_corrupted( + false, page, 0); + } + + bool compressed_ok = !buf_page_is_corrupted( + false, page, page_size); + + if (uncompressed_ok || compressed_ok) { + + ulint space_id = mach_read_from_4(page + + FIL_PAGE_SPACE_ID); + + if (space_id > 0) { + ib_logf(IB_LOG_LEVEL_INFO, + "VALID: space:%lu " + "page_no:%lu page_size:%lu", + space_id, j, page_size); + verify[space_id]++; + ++valid_pages; + } + } + } + + ut_free(buf); + + ib_logf(IB_LOG_LEVEL_INFO, "Page size: %lu, Possible space_id " + "count:%lu", page_size, verify.size()); + + const ulint pages_corrupted = 3; + for (ulint missed = 0; missed <= pages_corrupted; ++missed) { + + for (std::map<ulint, ulint>::iterator + m = verify.begin(); m != verify.end(); ++m ) { + + ib_logf(IB_LOG_LEVEL_INFO, "space_id:%lu, " + "Number of pages matched: %lu/%lu " + "(%lu)", m->first, m->second, + valid_pages, page_size); + + if (m->second == (valid_pages - missed)) { + + ib_logf(IB_LOG_LEVEL_INFO, + "Chosen space:%lu\n", m->first); + + fsp->id = m->first; + return(true); + } + } + + } + } + + return(false); +} + +/*******************************************************************//** +Finds the page 0 of the given space id from the double write buffer, and +copies it to the corresponding .ibd file. +@return true if copy was successful, or false. */ +static +bool +fil_user_tablespace_restore_page0( +/*==============================*/ + fsp_open_info* fsp) /* in: contains space id and .ibd file + information */ +{ + bool err; + ulint flags; + ulint zip_size; + ulint page_no; + ulint page_size; + ulint buflen; + byte* page; + + ib_logf(IB_LOG_LEVEL_INFO, "Restoring first page of tablespace %lu", + fsp->id); + + if (fsp->id == 0) { + err = false; + goto out; + } + + // find if double write buffer has page0 of given space id + page = recv_sys->dblwr.find_first_page(fsp->id); + + if (!page) { + err = false; + goto out; + } + + flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page); + zip_size = fsp_flags_get_zip_size(flags); + page_no = page_get_page_no(page); + page_size = fsp_flags_get_page_size(flags); + + ut_ad(page_no == 0); + + buflen = zip_size ? zip_size: page_size; + + ib_logf(IB_LOG_LEVEL_INFO, "Writing %lu bytes into file: %s", + buflen, fsp->filepath); + + err = os_file_write(fsp->filepath, fsp->file, page, 0, buflen); +out: + return(err); +} + /********************************************************************//** Opens an .ibd file and adds the associated single-table tablespace to the InnoDB fil0fil.cc data structures. @@ -4032,6 +4195,10 @@ fil_validate_single_table_tablespace( const char* tablename, /*!< in: database/tablename */ fsp_open_info* fsp) /*!< in/out: tablespace info */ { + bool restore_attempted = false; + +check_first_page: + fsp->success = TRUE; if (const char* check_msg = fil_read_first_page( fsp->file, FALSE, &fsp->flags, &fsp->id, #ifdef UNIV_LOG_ARCHIVE @@ -4042,6 +4209,19 @@ fil_validate_single_table_tablespace( "%s in tablespace %s (table %s)", check_msg, fsp->filepath, tablename); fsp->success = FALSE; + } + + if (!fsp->success) { + if (!restore_attempted) { + if (!fil_user_tablespace_find_space_id(fsp)) { + return; + } + restore_attempted = true; + if (!fil_user_tablespace_restore_page0(fsp)) { + return; + } + goto check_first_page; + } return; } @@ -4159,7 +4339,7 @@ fil_load_single_table_tablespace( /* Try to open the tablespace in the datadir. */ def.file = os_file_create_simple_no_error_handling( innodb_file_data_key, def.filepath, OS_FILE_OPEN, - OS_FILE_READ_ONLY, &def.success); + OS_FILE_READ_WRITE, &def.success); /* Read the first page of the remote tablespace */ if (def.success) { diff --git a/storage/innobase/fts/fts0ast.cc b/storage/innobase/fts/fts0ast.cc index 3a03fc63303..d6c19c0050a 100644 --- a/storage/innobase/fts/fts0ast.cc +++ b/storage/innobase/fts/fts0ast.cc @@ -112,9 +112,11 @@ fts_ast_create_node_term( if (str.f_n_char > 0) { /* If the subsequent term (after the first one)'s size - is less than fts_min_token_size, we shall ignore - that. This is to make consistent with MyISAM behavior */ - if (first_node && (str.f_n_char < fts_min_token_size)) { + is less than fts_min_token_size or the term is greater + than fts_max_token_size, we shall ignore that. This is + to make consistent with MyISAM behavior */ + if ((first_node && (str.f_n_char < fts_min_token_size)) + || str.f_n_char > fts_max_token_size) { continue; } @@ -394,6 +396,10 @@ fts_ast_term_set_distance( ulint distance) /*!< in: the text proximity distance */ { + if (node == NULL) { + return; + } + ut_a(node->type == FTS_AST_TEXT); ut_a(node->text.distance == ULINT_UNDEFINED); @@ -551,14 +557,6 @@ fts_ast_visit( break; - case FTS_AST_SUBEXP_LIST: - if (visit_pass != FTS_PASS_FIRST) { - break; - } - - error = fts_ast_visit_sub_exp(node, visitor, arg); - break; - case FTS_AST_OPER: oper = node->oper; oper_node = node; diff --git a/storage/innobase/fts/fts0blex.cc b/storage/innobase/fts/fts0blex.cc index dccedac0212..6082261e74c 100644 --- a/storage/innobase/fts/fts0blex.cc +++ b/storage/innobase/fts/fts0blex.cc @@ -305,9 +305,9 @@ YY_BUFFER_STATE fts0b_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner YY_BUFFER_STATE fts0b_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); YY_BUFFER_STATE fts0b_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner ); -void *fts0balloc (yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); -void *fts0brealloc (void *,yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); -void fts0bfree (void * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); +void *fts0balloc (yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); +void *fts0brealloc (void *,yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); +void fts0bfree (void * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); #define yy_new_buffer fts0b_create_buffer @@ -347,7 +347,7 @@ typedef int yy_state_type; static yy_state_type yy_get_previous_state (yyscan_t yyscanner ); static yy_state_type yy_try_NUL_trans (yy_state_type current_state ,yyscan_t yyscanner); static int yy_get_next_buffer (yyscan_t yyscanner ); -static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); +static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); /* Done after the current pattern has been matched and before the * corresponding action - sets up yytext. @@ -579,11 +579,11 @@ extern int fts0bwrap (yyscan_t yyscanner ); #endif #ifndef yytext_ptr -static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))); +static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))); #endif #ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))); +static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))); #endif #ifndef YY_NO_INPUT @@ -1609,9 +1609,9 @@ YY_BUFFER_STATE fts0b_scan_bytes (yyconst char * yybytes, int _yybytes_len , y #define YY_EXIT_FAILURE 2 #endif -static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { - (void) fprintf( stderr, "%s\n", msg ); + (void) fprintf( stderr, "%s\n", msg ); exit( YY_EXIT_FAILURE ); } @@ -1910,7 +1910,7 @@ int fts0blex_destroy (yyscan_t yyscanner) */ #ifndef yytext_ptr -static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { register int i; for ( i = 0; i < n; ++i ) @@ -1919,7 +1919,7 @@ static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t #endif #ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { register int n; for ( n = 0; s[n]; ++n ) @@ -1929,12 +1929,12 @@ static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribu } #endif -void *fts0balloc (yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +void *fts0balloc (yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { return (void *) malloc( size ); } -void *fts0brealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +void *fts0brealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { /* The cast to (char *) in the following accommodates both * implementations that use char* generic pointers, and those @@ -1946,7 +1946,7 @@ void *fts0brealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __at return (void *) realloc( (char *) ptr, size ); } -void fts0bfree (void * ptr , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +void fts0bfree (void * ptr , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { free( (char *) ptr ); /* see fts0brealloc() for (char *) cast */ } diff --git a/storage/innobase/fts/fts0config.cc b/storage/innobase/fts/fts0config.cc index c5cf38ca7f9..5b4ae5c39f7 100644 --- a/storage/innobase/fts/fts0config.cc +++ b/storage/innobase/fts/fts0config.cc @@ -151,7 +151,9 @@ fts_config_create_index_param_name( strcpy(name, param); name[len] = '_'; - fts_write_object_id(index->id, name + len + 1); + fts_write_object_id(index->id, name + len + 1, + DICT_TF2_FLAG_IS_SET(index->table, + DICT_TF2_FTS_AUX_HEX_NAME)); return(name); } diff --git a/storage/innobase/fts/fts0fts.cc b/storage/innobase/fts/fts0fts.cc index 41f20c1dbb9..b12f3de7ff2 100644 --- a/storage/innobase/fts/fts0fts.cc +++ b/storage/innobase/fts/fts0fts.cc @@ -1608,7 +1608,8 @@ fts_rename_aux_tables( new_name, old_table_name, trx); DBUG_EXECUTE_IF("fts_rename_failure", - err = DB_DEADLOCK;); + err = DB_DEADLOCK; + fts_sql_rollback(trx);); mem_free(old_table_name); @@ -2018,7 +2019,7 @@ fts_create_index_tables_low( fts_table.index_id = index->id; fts_table.table_id = table_id; fts_table.parent = table_name; - fts_table.table = NULL; + fts_table.table = index->table; #ifdef FTS_DOC_STATS_DEBUG char* sql; @@ -4479,7 +4480,7 @@ fts_sync_table( ut_ad(table->fts); - if (table->fts->cache) { + if (!dict_table_is_discarded(table) && table->fts->cache) { err = fts_sync(table->fts->cache->sync); } @@ -4506,15 +4507,11 @@ fts_process_token( fts_string_t str; ulint offset = 0; fts_doc_t* result_doc; - byte buf[FTS_MAX_WORD_LEN + 1]; - - str.f_str = buf; /* Determine where to save the result. */ result_doc = (result) ? result : doc; /* The length of a string in characters is set here only. */ - ret = innobase_mysql_fts_get_token( doc->charset, doc->text.f_str + start_pos, doc->text.f_str + doc->text.f_len, &str, &offset); @@ -4545,6 +4542,7 @@ fts_process_token( (char*) t_str.f_str, t_str.f_len); t_str.f_len = newlen; + t_str.f_str[newlen] = 0; /* Add the word to the document statistics. If the word hasn't been seen before we create a new entry for it. */ @@ -5797,7 +5795,7 @@ fts_is_aux_table_name( my_name[len] = 0; end = my_name + len; - ptr = static_cast<const char*>(memchr(my_name, '/', len)); + ptr = static_cast<const char*>(memchr(my_name, '/', len)); if (ptr != NULL) { /* We will start the match after the '/' */ @@ -5940,6 +5938,374 @@ fts_read_tables( return(TRUE); } +/******************************************************************//** +Callback that sets a hex formatted FTS table's flags2 in +SYS_TABLES. The flags is stored in MIX_LEN column. +@return FALSE if all OK */ +static +ibool +fts_set_hex_format( +/*===============*/ + void* row, /*!< in: sel_node_t* */ + void* user_arg) /*!< in: bool set/unset flag */ +{ + sel_node_t* node = static_cast<sel_node_t*>(row); + dfield_t* dfield = que_node_get_val(node->select_list); + + ut_ad(dtype_get_mtype(dfield_get_type(dfield)) == DATA_INT); + ut_ad(dfield_get_len(dfield) == sizeof(ib_uint32_t)); + /* There should be at most one matching record. So the value + must be the default value. */ + ut_ad(mach_read_from_4(static_cast<byte*>(user_arg)) + == ULINT32_UNDEFINED); + + ulint flags2 = mach_read_from_4( + static_cast<byte*>(dfield_get_data(dfield))); + + flags2 |= DICT_TF2_FTS_AUX_HEX_NAME; + + mach_write_to_4(static_cast<byte*>(user_arg), flags2); + + return(FALSE); +} + +/*****************************************************************//** +Update the DICT_TF2_FTS_AUX_HEX_NAME flag in SYS_TABLES. +@return DB_SUCCESS or error code. */ +UNIV_INTERN +dberr_t +fts_update_hex_format_flag( +/*=======================*/ + trx_t* trx, /*!< in/out: transaction that + covers the update */ + table_id_t table_id, /*!< in: Table for which we want + to set the root table->flags2 */ + bool dict_locked) /*!< in: set to true if the + caller already owns the + dict_sys_t::mutex. */ +{ + pars_info_t* info; + ib_uint32_t flags2; + + static const char sql[] = + "PROCEDURE UPDATE_HEX_FORMAT_FLAG() IS\n" + "DECLARE FUNCTION my_func;\n" + "DECLARE CURSOR c IS\n" + " SELECT MIX_LEN " + " FROM SYS_TABLES " + " WHERE ID = :table_id FOR UPDATE;" + "\n" + "BEGIN\n" + "OPEN c;\n" + "WHILE 1 = 1 LOOP\n" + " FETCH c INTO my_func();\n" + " IF c % NOTFOUND THEN\n" + " EXIT;\n" + " END IF;\n" + "END LOOP;\n" + "UPDATE SYS_TABLES" + " SET MIX_LEN = :flags2" + " WHERE ID = :table_id;\n" + "CLOSE c;\n" + "END;\n"; + + flags2 = ULINT32_UNDEFINED; + + info = pars_info_create(); + + pars_info_add_ull_literal(info, "table_id", table_id); + pars_info_bind_int4_literal(info, "flags2", &flags2); + + pars_info_bind_function( + info, "my_func", fts_set_hex_format, &flags2); + + if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) { + trx_set_dict_operation(trx, TRX_DICT_OP_INDEX); + } + + dberr_t err = que_eval_sql(info, sql, !dict_locked, trx); + + ut_a(flags2 != ULINT32_UNDEFINED); + + return (err); +} + +#ifdef _WIN32 + +/*********************************************************************//** +Rename an aux table to HEX format. It's called when "%016llu" is used +to format an object id in table name, which only happens in Windows. */ +static __attribute__((nonnull, warn_unused_result)) +dberr_t +fts_rename_one_aux_table_to_hex_format( +/*===================================*/ + trx_t* trx, /*!< in: transaction */ + const fts_aux_table_t* aux_table, /*!< in: table info */ + const dict_table_t* parent_table) /*!< in: parent table name */ +{ + const char* ptr; + fts_table_t fts_table; + char* new_name; + dberr_t error; + + ptr = strchr(aux_table->name, '/'); + ut_a(ptr != NULL); + ++ptr; + /* Skip "FTS_", table id and underscore */ + for (ulint i = 0; i < 2; ++i) { + ptr = strchr(ptr, '_'); + ut_a(ptr != NULL); + ++ptr; + } + + fts_table.suffix = NULL; + if (aux_table->index_id == 0) { + fts_table.type = FTS_COMMON_TABLE; + + for (ulint i = 0; fts_common_tables[i] != NULL; ++i) { + if (strcmp(ptr, fts_common_tables[i]) == 0) { + fts_table.suffix = fts_common_tables[i]; + break; + } + } + } else { + fts_table.type = FTS_INDEX_TABLE; + + /* Skip index id and underscore */ + ptr = strchr(ptr, '_'); + ut_a(ptr != NULL); + ++ptr; + + for (ulint i = 0; fts_index_selector[i].value; ++i) { + if (strcmp(ptr, fts_get_suffix(i)) == 0) { + fts_table.suffix = fts_get_suffix(i); + break; + } + } + } + + ut_a(fts_table.suffix != NULL); + + fts_table.parent = parent_table->name; + fts_table.table_id = aux_table->parent_id; + fts_table.index_id = aux_table->index_id; + fts_table.table = parent_table; + + new_name = fts_get_table_name(&fts_table); + ut_ad(strcmp(new_name, aux_table->name) != 0); + + if (trx_get_dict_operation(trx) == TRX_DICT_OP_NONE) { + trx_set_dict_operation(trx, TRX_DICT_OP_INDEX); + } + + error = row_rename_table_for_mysql(aux_table->name, new_name, trx, + FALSE); + + if (error != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_WARN, + "Failed to rename aux table \'%s\' to " + "new format \'%s\'. ", + aux_table->name, new_name); + } else { + ib_logf(IB_LOG_LEVEL_INFO, + "Renamed aux table \'%s\' to \'%s\'.", + aux_table->name, new_name); + } + + mem_free(new_name); + + return (error); +} + +/**********************************************************************//** +Rename all aux tables of a parent table to HEX format. Also set aux tables' +flags2 and parent table's flags2 with DICT_TF2_FTS_AUX_HEX_NAME. +It's called when "%016llu" is used to format an object id in table name, +which only happens in Windows. +Note the ids in tables are correct but the names are old ambiguous ones. + +This function should make sure that either all the parent table and aux tables +are set DICT_TF2_FTS_AUX_HEX_NAME with flags2 or none of them are set */ +static __attribute__((nonnull, warn_unused_result)) +dberr_t +fts_rename_aux_tables_to_hex_format( +/*================================*/ + trx_t* trx, /*!< in: transaction */ + dict_table_t* parent_table, /*!< in: parent table */ + ib_vector_t* tables) /*!< in: aux tables to rename. */ +{ + dberr_t error; + ulint count; + + ut_ad(!DICT_TF2_FLAG_IS_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME)); + ut_ad(!ib_vector_is_empty(tables)); + + error = fts_update_hex_format_flag(trx, parent_table->id, true); + + if (error != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_WARN, + "Setting parent table %s to hex format failed.", + parent_table->name); + + fts_sql_rollback(trx); + return (error); + } + + DICT_TF2_FLAG_SET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME); + + for (count = 0; count < ib_vector_size(tables); ++count) { + dict_table_t* table; + fts_aux_table_t* aux_table; + + aux_table = static_cast<fts_aux_table_t*>( + ib_vector_get(tables, count)); + + table = dict_table_open_on_id(aux_table->id, TRUE, + DICT_TABLE_OP_NORMAL); + + ut_ad(table != NULL); + ut_ad(!DICT_TF2_FLAG_IS_SET(table, DICT_TF2_FTS_AUX_HEX_NAME)); + + /* Set HEX_NAME flag here to make sure we can get correct + new table name in following function */ + DICT_TF2_FLAG_SET(table, DICT_TF2_FTS_AUX_HEX_NAME); + error = fts_rename_one_aux_table_to_hex_format(trx, + aux_table, parent_table); + /* We will rollback the trx if the error != DB_SUCCESS, + so setting the flag here is the same with setting it in + row_rename_table_for_mysql */ + DBUG_EXECUTE_IF("rename_aux_table_fail", error = DB_ERROR;); + + if (error != DB_SUCCESS) { + dict_table_close(table, TRUE, FALSE); + + ib_logf(IB_LOG_LEVEL_WARN, + "Failed to rename one aux table %s " + "Will revert all successful rename " + "operations.", aux_table->name); + + fts_sql_rollback(trx); + break; + } + + error = fts_update_hex_format_flag(trx, aux_table->id, true); + dict_table_close(table, TRUE, FALSE); + + if (error != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_WARN, + "Setting aux table %s to hex format failed.", + aux_table->name); + + fts_sql_rollback(trx); + break; + } + } + + if (error != DB_SUCCESS) { + ut_ad(count != ib_vector_size(tables)); + /* If rename fails, thr trx would be rolled back, we can't + use it any more, we'll start a new background trx to do + the reverting. */ + ut_a(trx->state == TRX_STATE_NOT_STARTED); + bool not_rename = false; + + /* Try to revert those succesful rename operations + in order to revert the ibd file rename. */ + for (ulint i = 0; i <= count; ++i) { + dict_table_t* table; + fts_aux_table_t* aux_table; + trx_t* trx_bg; + dberr_t err; + + aux_table = static_cast<fts_aux_table_t*>( + ib_vector_get(tables, i)); + + table = dict_table_open_on_id(aux_table->id, TRUE, + DICT_TABLE_OP_NORMAL); + ut_ad(table != NULL); + + if (not_rename) { + DICT_TF2_FLAG_UNSET(table, + DICT_TF2_FTS_AUX_HEX_NAME); + } + + if (!DICT_TF2_FLAG_IS_SET(table, + DICT_TF2_FTS_AUX_HEX_NAME)) { + dict_table_close(table, TRUE, FALSE); + continue; + } + + trx_bg = trx_allocate_for_background(); + trx_bg->op_info = "Revert half done rename"; + trx_bg->dict_operation_lock_mode = RW_X_LATCH; + trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE); + + DICT_TF2_FLAG_UNSET(table, DICT_TF2_FTS_AUX_HEX_NAME); + err = row_rename_table_for_mysql(table->name, + aux_table->name, + trx_bg, FALSE); + + trx_bg->dict_operation_lock_mode = 0; + dict_table_close(table, TRUE, FALSE); + + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_WARN, "Failed to revert " + "table %s. Please revert manually.", + table->name); + fts_sql_rollback(trx_bg); + /* Continue to clear aux tables' flags2 */ + not_rename = true; + continue; + } + + fts_sql_commit(trx_bg); + } + + DICT_TF2_FLAG_UNSET(parent_table, DICT_TF2_FTS_AUX_HEX_NAME); + } + + return (error); +} + +/**********************************************************************//** +Convert an id, which is actually a decimal number but was regard as a HEX +from a string, to its real value. */ +static +ib_id_t +fts_fake_hex_to_dec( +/*================*/ + ib_id_t id) /*!< in: number to convert */ +{ + ib_id_t dec_id = 0; + char tmp_id[FTS_AUX_MIN_TABLE_ID_LENGTH]; + int ret; + + ret = sprintf(tmp_id, UINT64PFx, id); + ut_ad(ret == 16); + ret = sscanf(tmp_id, "%016llu", &dec_id); + ut_ad(ret == 1); + + return dec_id; +} + +/*********************************************************************//** +Compare two fts_aux_table_t parent_ids. +@return < 0 if n1 < n2, 0 if n1 == n2, > 0 if n1 > n2 */ +UNIV_INLINE +int +fts_check_aux_table_parent_id_cmp( +/*==============================*/ + const void* p1, /*!< in: id1 */ + const void* p2) /*!< in: id2 */ +{ + const fts_aux_table_t* fa1 = static_cast<const fts_aux_table_t*>(p1); + const fts_aux_table_t* fa2 = static_cast<const fts_aux_table_t*>(p2); + + return static_cast<int>(fa1->parent_id - fa2->parent_id); +} + +#endif /* _WIN32 */ + /**********************************************************************//** Check and drop all orphaned FTS auxiliary tables, those that don't have a parent table or FTS index defined on them. @@ -5951,18 +6317,75 @@ fts_check_and_drop_orphaned_tables( trx_t* trx, /*!< in: transaction */ ib_vector_t* tables) /*!< in: tables to check */ { +#ifdef _WIN32 + mem_heap_t* heap; + ib_vector_t* aux_tables_to_rename; + ib_alloc_t* heap_alloc; + + heap = mem_heap_create(1024); + heap_alloc = ib_heap_allocator_create(heap); + + /* We store all aux tables belonging to the same parent table here, + and rename all these tables in a batch mode. */ + aux_tables_to_rename = ib_vector_create(heap_alloc, + sizeof(fts_aux_table_t), 128); + + /* Sort by parent_id first, in case rename will fail */ + ib_vector_sort(tables, fts_check_aux_table_parent_id_cmp); +#endif /* _WIN32 */ + for (ulint i = 0; i < ib_vector_size(tables); ++i) { - dict_table_t* table; + dict_table_t* parent_table; fts_aux_table_t* aux_table; bool drop = false; +#ifdef _WIN32 + dict_table_t* table; + fts_aux_table_t* next_aux_table = NULL; + ib_id_t orig_parent_id = 0; + bool rename = false; +#endif /* _WIN32 */ aux_table = static_cast<fts_aux_table_t*>( ib_vector_get(tables, i)); +#ifdef _WIN32 table = dict_table_open_on_id( + aux_table->id, TRUE, DICT_TABLE_OP_NORMAL); + orig_parent_id = aux_table->parent_id; + + if (table == NULL || strcmp(table->name, aux_table->name)) { + /* Skip these aux tables, which are common tables + with wrong table ids */ + if (table) { + dict_table_close(table, TRUE, FALSE); + } + + continue; + + } else if (!DICT_TF2_FLAG_IS_SET(table, + DICT_TF2_FTS_AUX_HEX_NAME)) { + + aux_table->parent_id = fts_fake_hex_to_dec( + aux_table->parent_id); + + if (aux_table->index_id != 0) { + aux_table->index_id = fts_fake_hex_to_dec( + aux_table->index_id); + } + + ut_ad(aux_table->id > aux_table->parent_id); + rename = true; + } + + if (table) { + dict_table_close(table, TRUE, FALSE); + } +#endif /* _WIN32 */ + + parent_table = dict_table_open_on_id( aux_table->parent_id, TRUE, DICT_TABLE_OP_NORMAL); - if (table == NULL || table->fts == NULL) { + if (parent_table == NULL || parent_table->fts == NULL) { drop = true; @@ -5971,7 +6394,7 @@ fts_check_and_drop_orphaned_tables( fts_t* fts; drop = true; - fts = table->fts; + fts = parent_table->fts; id = aux_table->index_id; /* Search for the FT index in the table's list. */ @@ -5979,33 +6402,28 @@ fts_check_and_drop_orphaned_tables( j < ib_vector_size(fts->indexes); ++j) { - const dict_index_t* index; + const dict_index_t* index; index = static_cast<const dict_index_t*>( ib_vector_getp_const(fts->indexes, j)); if (index->id == id) { - drop = false; break; } } } - if (table) { - dict_table_close(table, TRUE, FALSE); - } - if (drop) { ib_logf(IB_LOG_LEVEL_WARN, "Parent table of FTS auxiliary table %s not " "found.", aux_table->name); - dberr_t err = fts_drop_table(trx, aux_table->name); + dberr_t err = fts_drop_table(trx, aux_table->name); if (err == DB_FAIL) { - char* path; + char* path; path = fil_make_ibd_name( aux_table->name, false); @@ -6016,7 +6434,120 @@ fts_check_and_drop_orphaned_tables( mem_free(path); } } +#ifdef _WIN32 + if (!drop && rename) { + ib_vector_push(aux_tables_to_rename, aux_table); + } + + if (i + 1 < ib_vector_size(tables)) { + next_aux_table = static_cast<fts_aux_table_t*>( + ib_vector_get(tables, i + 1)); + } + + if ((next_aux_table == NULL + || orig_parent_id != next_aux_table->parent_id) + && !ib_vector_is_empty(aux_tables_to_rename)) { + /* All aux tables of parent table, whose id is + last_parent_id, have been checked, try to rename + them if necessary. We had better use a new background + trx to rename rather than the original trx, in case + any failure would cause a complete rollback. */ + dberr_t err; + trx_t* trx_rename = trx_allocate_for_background(); + trx_rename->op_info = "Rename aux tables to " + "hex format"; + trx_rename->dict_operation_lock_mode = RW_X_LATCH; + trx_start_for_ddl(trx_rename, TRX_DICT_OP_TABLE); + + err = fts_rename_aux_tables_to_hex_format(trx_rename, + parent_table, aux_tables_to_rename); + + trx_rename->dict_operation_lock_mode = 0; + + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_WARN, + "Rollback operations on all " + "aux tables of table %s. " + "Please check why renaming aux tables " + "failed, and restart the server to " + "upgrade again to " + "get the table work.", + parent_table->name); + + fts_sql_rollback(trx_rename); + } else { + fts_sql_commit(trx_rename); + } + + trx_free_for_background(trx_rename); + ib_vector_reset(aux_tables_to_rename); + } +#else /* _WIN32 */ + if (!drop) { + dict_table_t* table; + + table = dict_table_open_on_id( + aux_table->id, TRUE, DICT_TABLE_OP_NORMAL); + if (table != NULL + && strcmp(table->name, aux_table->name)) { + dict_table_close(table, TRUE, FALSE); + table = NULL; + } + + if (table != NULL + && !DICT_TF2_FLAG_IS_SET( + table, + DICT_TF2_FTS_AUX_HEX_NAME)) { + dberr_t err = fts_update_hex_format_flag( + trx, table->id, true); + + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_WARN, + "Setting aux table %s to hex " + "format failed.", table->name); + } else { + DICT_TF2_FLAG_SET(table, + DICT_TF2_FTS_AUX_HEX_NAME); + } + } + + if (table != NULL) { + dict_table_close(table, TRUE, FALSE); + } + + ut_ad(parent_table != NULL); + if (!DICT_TF2_FLAG_IS_SET(parent_table, + DICT_TF2_FTS_AUX_HEX_NAME)) { + dberr_t err = fts_update_hex_format_flag( + trx, parent_table->id, true); + + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_WARN, + "Setting parent table %s of " + "FTS auxiliary %s to hex " + "format failed.", + parent_table->name, + aux_table->name); + } else { + DICT_TF2_FLAG_SET(parent_table, + DICT_TF2_FTS_AUX_HEX_NAME); + } + } + } + +#endif /* _WIN32 */ + + if (parent_table) { + dict_table_close(parent_table, TRUE, FALSE); + } + } + +#ifdef _WIN32 + /* Free the memory allocated at the beginning */ + if (heap != NULL) { + mem_heap_free(heap); } +#endif /* _WIN32 */ } /**********************************************************************//** diff --git a/storage/innobase/fts/fts0opt.cc b/storage/innobase/fts/fts0opt.cc index 7cdad522564..2efb5d05c21 100644 --- a/storage/innobase/fts/fts0opt.cc +++ b/storage/innobase/fts/fts0opt.cc @@ -1624,10 +1624,12 @@ fts_optimize_create( optim->fts_common_table.parent = table->name; optim->fts_common_table.table_id = table->id; optim->fts_common_table.type = FTS_COMMON_TABLE; + optim->fts_common_table.table = table; optim->fts_index_table.parent = table->name; optim->fts_index_table.table_id = table->id; optim->fts_index_table.type = FTS_INDEX_TABLE; + optim->fts_index_table.table = table; /* The common prefix for all this parent table's aux tables. */ optim->name_prefix = fts_get_table_name_prefix( diff --git a/storage/innobase/fts/fts0pars.cc b/storage/innobase/fts/fts0pars.cc index a4009106c83..ef361b3c9c6 100644 --- a/storage/innobase/fts/fts0pars.cc +++ b/storage/innobase/fts/fts0pars.cc @@ -467,9 +467,9 @@ static const yytype_int8 yyrhs[] = /* YYRLINE[YYN] -- source line where rule number YYN was defined. */ static const yytype_uint8 yyrline[] = { - 0, 79, 79, 85, 89, 99, 111, 115, 124, 128, - 132, 136, 141, 147, 152, 159, 165, 169, 173, 177, - 181, 186, 191, 197, 202 + 0, 79, 79, 85, 89, 99, 111, 119, 129, 133, + 137, 141, 146, 152, 157, 164, 170, 174, 178, 182, + 186, 191, 196, 202, 207 }; #endif @@ -1458,7 +1458,7 @@ yyreduce: (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node)); if (!(yyval.node)) { - (yyval.node) = fts_ast_create_node_subexp_list(state, (yyvsp[(2) - (2)].node)); + (yyval.node) = (yyvsp[(2) - (2)].node); } else { fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node)); } @@ -1471,18 +1471,23 @@ yyreduce: #line 111 "fts0pars.y" { (yyval.node) = (yyvsp[(2) - (3)].node); + + if ((yyval.node)) { + (yyval.node) = fts_ast_create_node_subexp_list(state, (yyval.node)); + } } break; case 7: /* Line 1806 of yacc.c */ -#line 115 "fts0pars.y" +#line 119 "fts0pars.y" { - (yyval.node) = fts_ast_create_node_subexp_list(state, (yyvsp[(1) - (4)].node)); + (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (4)].node)); if ((yyvsp[(3) - (4)].node)) { - fts_ast_add_node((yyval.node), (yyvsp[(3) - (4)].node)); + fts_ast_add_node((yyval.node), + fts_ast_create_node_subexp_list(state, (yyvsp[(3) - (4)].node))); } } break; @@ -1490,7 +1495,7 @@ yyreduce: case 8: /* Line 1806 of yacc.c */ -#line 124 "fts0pars.y" +#line 129 "fts0pars.y" { (yyval.node) = (yyvsp[(1) - (1)].node); } @@ -1499,7 +1504,7 @@ yyreduce: case 9: /* Line 1806 of yacc.c */ -#line 128 "fts0pars.y" +#line 133 "fts0pars.y" { (yyval.node) = (yyvsp[(1) - (1)].node); } @@ -1508,7 +1513,7 @@ yyreduce: case 10: /* Line 1806 of yacc.c */ -#line 132 "fts0pars.y" +#line 137 "fts0pars.y" { fts_ast_term_set_wildcard((yyvsp[(1) - (2)].node)); } @@ -1517,7 +1522,7 @@ yyreduce: case 11: /* Line 1806 of yacc.c */ -#line 136 "fts0pars.y" +#line 141 "fts0pars.y" { fts_ast_term_set_distance((yyvsp[(1) - (3)].node), strtoul((yyvsp[(3) - (3)].token), NULL, 10)); free((yyvsp[(3) - (3)].token)); @@ -1527,7 +1532,7 @@ yyreduce: case 12: /* Line 1806 of yacc.c */ -#line 141 "fts0pars.y" +#line 146 "fts0pars.y" { (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (3)].node)); fts_ast_add_node((yyval.node), (yyvsp[(2) - (3)].node)); @@ -1538,7 +1543,7 @@ yyreduce: case 13: /* Line 1806 of yacc.c */ -#line 147 "fts0pars.y" +#line 152 "fts0pars.y" { (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node)); fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node)); @@ -1548,7 +1553,7 @@ yyreduce: case 14: /* Line 1806 of yacc.c */ -#line 152 "fts0pars.y" +#line 157 "fts0pars.y" { (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (4)].node)); fts_ast_add_node((yyval.node), (yyvsp[(2) - (4)].node)); @@ -1560,7 +1565,7 @@ yyreduce: case 15: /* Line 1806 of yacc.c */ -#line 159 "fts0pars.y" +#line 164 "fts0pars.y" { (yyval.node) = fts_ast_create_node_list(state, (yyvsp[(1) - (2)].node)); fts_ast_add_node((yyval.node), (yyvsp[(2) - (2)].node)); @@ -1570,7 +1575,7 @@ yyreduce: case 16: /* Line 1806 of yacc.c */ -#line 165 "fts0pars.y" +#line 170 "fts0pars.y" { (yyval.node) = fts_ast_create_node_oper(state, FTS_IGNORE); } @@ -1579,7 +1584,7 @@ yyreduce: case 17: /* Line 1806 of yacc.c */ -#line 169 "fts0pars.y" +#line 174 "fts0pars.y" { (yyval.node) = fts_ast_create_node_oper(state, FTS_EXIST); } @@ -1588,7 +1593,7 @@ yyreduce: case 18: /* Line 1806 of yacc.c */ -#line 173 "fts0pars.y" +#line 178 "fts0pars.y" { (yyval.node) = fts_ast_create_node_oper(state, FTS_NEGATE); } @@ -1597,7 +1602,7 @@ yyreduce: case 19: /* Line 1806 of yacc.c */ -#line 177 "fts0pars.y" +#line 182 "fts0pars.y" { (yyval.node) = fts_ast_create_node_oper(state, FTS_DECR_RATING); } @@ -1606,7 +1611,7 @@ yyreduce: case 20: /* Line 1806 of yacc.c */ -#line 181 "fts0pars.y" +#line 186 "fts0pars.y" { (yyval.node) = fts_ast_create_node_oper(state, FTS_INCR_RATING); } @@ -1615,7 +1620,7 @@ yyreduce: case 21: /* Line 1806 of yacc.c */ -#line 186 "fts0pars.y" +#line 191 "fts0pars.y" { (yyval.node) = fts_ast_create_node_term(state, (yyvsp[(1) - (1)].token)); free((yyvsp[(1) - (1)].token)); @@ -1625,7 +1630,7 @@ yyreduce: case 22: /* Line 1806 of yacc.c */ -#line 191 "fts0pars.y" +#line 196 "fts0pars.y" { (yyval.node) = fts_ast_create_node_term(state, (yyvsp[(1) - (1)].token)); free((yyvsp[(1) - (1)].token)); @@ -1635,7 +1640,7 @@ yyreduce: case 23: /* Line 1806 of yacc.c */ -#line 197 "fts0pars.y" +#line 202 "fts0pars.y" { (yyval.node) = (yyvsp[(2) - (2)].node); } @@ -1644,7 +1649,7 @@ yyreduce: case 24: /* Line 1806 of yacc.c */ -#line 202 "fts0pars.y" +#line 207 "fts0pars.y" { (yyval.node) = fts_ast_create_node_text(state, (yyvsp[(1) - (1)].token)); free((yyvsp[(1) - (1)].token)); @@ -1654,7 +1659,7 @@ yyreduce: /* Line 1806 of yacc.c */ -#line 1658 "fts0pars.cc" +#line 1663 "fts0pars.cc" default: break; } /* User semantic actions sometimes alter yychar, and that requires @@ -1885,7 +1890,7 @@ yyreturn: /* Line 2067 of yacc.c */ -#line 207 "fts0pars.y" +#line 212 "fts0pars.y" /******************************************************************** diff --git a/storage/innobase/fts/fts0pars.y b/storage/innobase/fts/fts0pars.y index 73d71bc87c5..ff22e9a9873 100644 --- a/storage/innobase/fts/fts0pars.y +++ b/storage/innobase/fts/fts0pars.y @@ -101,7 +101,7 @@ expr_lst: /* Empty */ { $$ = fts_ast_create_node_list(state, $1); if (!$$) { - $$ = fts_ast_create_node_subexp_list(state, $2); + $$ = $2; } else { fts_ast_add_node($$, $2); } @@ -110,13 +110,18 @@ expr_lst: /* Empty */ { sub_expr: '(' expr_lst ')' { $$ = $2; + + if ($$) { + $$ = fts_ast_create_node_subexp_list(state, $$); + } } | prefix '(' expr_lst ')' { - $$ = fts_ast_create_node_subexp_list(state, $1); + $$ = fts_ast_create_node_list(state, $1); if ($3) { - fts_ast_add_node($$, $3); + fts_ast_add_node($$, + fts_ast_create_node_subexp_list(state, $3)); } } ; diff --git a/storage/innobase/fts/fts0que.cc b/storage/innobase/fts/fts0que.cc index a70c742da0c..7c1b79ba35c 100644 --- a/storage/innobase/fts/fts0que.cc +++ b/storage/innobase/fts/fts0que.cc @@ -40,9 +40,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang #include "fts0vlc.ic" #endif -#include <string> #include <vector> -#include <map> #define FTS_ELEM(t, n, i, j) (t[(i) * n + (j)]) @@ -66,8 +64,7 @@ static const double FTS_NORMALIZE_COEFF = 0.0115F; // FIXME: Need to have a generic iterator that traverses the ilist. -typedef std::map<std::string, ulint> word_map_t; -typedef std::vector<std::string> word_vector_t; +typedef std::vector<fts_string_t> word_vector_t; struct fts_word_freq_t; @@ -92,7 +89,7 @@ struct fts_query_t { fts_ast_node_t* cur_node; /*!< Current tree node */ - word_map_t* word_map; /*!< Matched word map for + ib_rbt_t* word_map; /*!< Matched word map for searching by word*/ word_vector_t* word_vector; /*!< Matched word vector for @@ -229,7 +226,7 @@ struct fts_doc_freq_t { /** To determine the word frequency per document. */ struct fts_word_freq_t { - byte* word; /*!< Word for which we need the freq, + fts_string_t word; /*!< Word for which we need the freq, it's allocated on the query heap */ ib_rbt_t* doc_freqs; /*!< RB Tree for storing per document @@ -257,15 +254,14 @@ static dberr_t fts_query_filter_doc_ids( /*=====================*/ - fts_query_t* query, /*!< in: query instance */ - const byte* word, /*!< in: the current word */ - fts_word_freq_t*word_freq, /*!< in/out: word frequency */ - const fts_node_t* - node, /*!< in: current FTS node */ - void* data, /*!< in: doc id ilist */ - ulint len, /*!< in: doc id ilist size */ - ibool calc_doc_count);/*!< in: whether to remember doc - count */ + fts_query_t* query, /*!< in: query instance */ + const fts_string_t* word, /*!< in: the current word */ + fts_word_freq_t* word_freq, /*!< in/out: word frequency */ + const fts_node_t* node, /*!< in: current FTS node */ + void* data, /*!< in: doc id ilist */ + ulint len, /*!< in: doc id ilist size */ + ibool calc_doc_count);/*!< in: whether to remember doc + count */ #if 0 /*****************************************************************//*** @@ -575,27 +571,41 @@ static void fts_ranking_words_add( /*==================*/ - fts_query_t* query, /*!< in: query instance */ - fts_ranking_t* ranking, /*!< in: ranking instance */ - const char* word) /*!< in: term/word to add */ + fts_query_t* query, /*!< in: query instance */ + fts_ranking_t* ranking, /*!< in: ranking instance */ + const fts_string_t* word) /*!< in: term/word to add */ { ulint pos; ulint byte_offset; ulint bit_offset; - word_map_t::iterator it; - - /* Note: we suppose the word map and vector are append-only */ - /* Check if need to add it to word map */ - it = query->word_map->lower_bound(word); - if (it != query->word_map->end() - && !query->word_map->key_comp()(word, it->first)) { - pos = it->second; + ib_rbt_bound_t parent; + + /* Note: we suppose the word map and vector are append-only. */ + ut_ad(query->word_vector->size() == rbt_size(query->word_map)); + + /* We use ib_rbt to simulate a map, f_n_char means position. */ + if (rbt_search(query->word_map, &parent, word) == 0) { + fts_string_t* result_word; + + result_word = rbt_value(fts_string_t, parent.last); + pos = result_word->f_n_char; + ut_ad(pos < rbt_size(query->word_map)); } else { - pos = query->word_map->size(); - query->word_map->insert(it, - std::pair<std::string, ulint>(word, pos)); + /* Add the word to map. */ + fts_string_t new_word; - query->word_vector->push_back(word); + pos = rbt_size(query->word_map); + + new_word.f_str = static_cast<byte*>(mem_heap_alloc(query->heap, + word->f_len + 1)); + memcpy(new_word.f_str, word->f_str, word->f_len); + new_word.f_str[word->f_len] = 0; + new_word.f_len = word->f_len; + new_word.f_n_char = pos; + + rbt_add_node(query->word_map, &parent, &new_word); + ut_ad(rbt_validate(query->word_map)); + query->word_vector->push_back(new_word); } /* Check words len */ @@ -630,7 +640,7 @@ fts_ranking_words_get_next( const fts_query_t* query, /*!< in: query instance */ fts_ranking_t* ranking,/*!< in: ranking instance */ ulint* pos, /*!< in/out: word start pos */ - byte** word) /*!< in/out: term/word to add */ + fts_string_t* word) /*!< in/out: term/word to add */ { bool ret = false; ulint max_pos = ranking->words_len * CHAR_BIT; @@ -651,7 +661,7 @@ fts_ranking_words_get_next( /* Get next word from word vector */ if (ret) { ut_ad(*pos < query->word_vector->size()); - *word = (byte*)query->word_vector->at((size_t)*pos).c_str(); + *word = query->word_vector->at((size_t)*pos); *pos += 1; } @@ -666,23 +676,22 @@ static fts_word_freq_t* fts_query_add_word_freq( /*====================*/ - fts_query_t* query, /*!< in: query instance */ - const byte* word) /*!< in: term/word to add */ + fts_query_t* query, /*!< in: query instance */ + const fts_string_t* word) /*!< in: term/word to add */ { ib_rbt_bound_t parent; /* Lookup the word in our rb tree and add if it doesn't exist. */ if (rbt_search(query->word_freqs, &parent, word) != 0) { fts_word_freq_t word_freq; - ulint len = ut_strlen((char*) word) + 1; memset(&word_freq, 0, sizeof(word_freq)); - word_freq.word = static_cast<byte*>( - mem_heap_alloc(query->heap, len)); - - /* Need to copy the NUL character too. */ - memcpy(word_freq.word, word, len); + word_freq.word.f_str = static_cast<byte*>( + mem_heap_alloc(query->heap, word->f_len + 1)); + memcpy(word_freq.word.f_str, word->f_str, word->f_len); + word_freq.word.f_str[word->f_len] = 0; + word_freq.word.f_len = word->f_len; word_freq.doc_count = 0; @@ -692,7 +701,7 @@ fts_query_add_word_freq( parent.last = rbt_add_node( query->word_freqs, &parent, &word_freq); - query->total_size += len + query->total_size += word->f_len + SIZEOF_RBT_CREATE + SIZEOF_RBT_NODE_ADD + sizeof(fts_word_freq_t); @@ -956,7 +965,7 @@ fts_query_add_word_to_document( /*===========================*/ fts_query_t* query, /*!< in: query to update */ doc_id_t doc_id, /*!< in: the document to update */ - const byte* word) /*!< in: the token to add */ + const fts_string_t* word) /*!< in: the token to add */ { ib_rbt_bound_t parent; fts_ranking_t* ranking = NULL; @@ -980,7 +989,7 @@ fts_query_add_word_to_document( } if (ranking != NULL) { - fts_ranking_words_add(query, ranking, (char*)word); + fts_ranking_words_add(query, ranking, word); } } @@ -1010,13 +1019,13 @@ fts_query_check_node( fts_word_freq_t*word_freqs; /* The word must exist. */ - ret = rbt_search(query->word_freqs, &parent, token->f_str); + ret = rbt_search(query->word_freqs, &parent, token); ut_a(ret == 0); word_freqs = rbt_value(fts_word_freq_t, parent.last); query->error = fts_query_filter_doc_ids( - query, token->f_str, word_freqs, node, + query, token, word_freqs, node, node->ilist, ilist_size, TRUE); } } @@ -1073,7 +1082,7 @@ fts_cache_find_wildcard( ret = rbt_search(query->word_freqs, &freq_parent, - srch_text.f_str); + &srch_text); ut_a(ret == 0); @@ -1082,7 +1091,7 @@ fts_cache_find_wildcard( freq_parent.last); query->error = fts_query_filter_doc_ids( - query, srch_text.f_str, + query, &srch_text, word_freqs, node, node->ilist, node->ilist_size, TRUE); @@ -1542,7 +1551,7 @@ fts_merge_doc_ids( for (node = rbt_first(doc_ids); node; node = rbt_next(doc_ids, node)) { fts_ranking_t* ranking; ulint pos = 0; - byte* word = NULL; + fts_string_t word; ranking = rbt_value(fts_ranking_t, node); @@ -1557,7 +1566,7 @@ fts_merge_doc_ids( ut_a(ranking->words); while (fts_ranking_words_get_next(query, ranking, &pos, &word)) { fts_query_add_word_to_document(query, ranking->doc_id, - word); + &word); } } @@ -2472,8 +2481,7 @@ fts_query_search_phrase( token = static_cast<fts_string_t*>( ib_vector_get(tokens, z)); fts_query_add_word_to_document( - query, match->doc_id, - token->f_str); + query, match->doc_id, token); } } } @@ -2562,7 +2570,7 @@ fts_query_phrase_search( && result_str.f_n_char <= fts_max_token_size) { /* Add the word to the RB tree so that we can calculate it's frequencey within a document. */ - fts_query_add_word_freq(query, token->f_str); + fts_query_add_word_freq(query, token); } else { ib_vector_pop(tokens); } @@ -2687,7 +2695,7 @@ fts_query_phrase_search( } fts_query_add_word_to_document( - query, match->doc_id, token->f_str); + query, match->doc_id, token); } query->oper = oper; goto func_exit; @@ -2837,6 +2845,8 @@ fts_query_visitor( ut_ad(query->intersection == NULL); query->intersection = rbt_create( sizeof(fts_ranking_t), fts_ranking_doc_id_cmp); + + query->total_size += SIZEOF_RBT_CREATE; } /* Set the current proximity distance. */ @@ -2858,10 +2868,12 @@ fts_query_visitor( break; case FTS_AST_TERM: + token.f_str = node->term.ptr; + token.f_len = ut_strlen(reinterpret_cast<char*>(token.f_str)); /* Add the word to our RB tree that will be used to calculate this terms per document frequency. */ - fts_query_add_word_freq(query, node->term.ptr); + fts_query_add_word_freq(query, &token); ptr = fts_query_get_token(node, &token); query->error = fts_query_execute(query, &token); @@ -2871,6 +2883,10 @@ fts_query_visitor( } break; + case FTS_AST_SUBEXP_LIST: + query->error = fts_ast_visit_sub_exp(node, fts_query_visitor, arg); + break; + default: ut_error; } @@ -2905,13 +2921,7 @@ fts_ast_visit_sub_exp( ut_a(node->type == FTS_AST_SUBEXP_LIST); - node = node->list.head; - - if (!node || !node->next) { - return(error); - } - - cur_oper = node->oper; + cur_oper = query->oper; /* Save current result set */ parent_doc_ids = query->doc_ids; @@ -2927,26 +2937,20 @@ fts_ast_visit_sub_exp( query->multi_exist = false; /* Process nodes in current sub-expression and store its result set in query->doc_ids we created above. */ - error = fts_ast_visit(FTS_NONE, node->next, visitor, + error = fts_ast_visit(FTS_NONE, node, visitor, arg, &will_be_ignored); - /* Reinstate parent node state and prepare for merge. */ + /* Reinstate parent node state */ query->multi_exist = multi_exist; query->oper = cur_oper; - subexpr_doc_ids = query->doc_ids; - - /* Restore current result set. */ - query->doc_ids = parent_doc_ids; /* Merge the sub-expression result with the parent result set. */ + subexpr_doc_ids = query->doc_ids; + query->doc_ids = parent_doc_ids; if (error == DB_SUCCESS && !rbt_empty(subexpr_doc_ids)) { error = fts_merge_doc_ids(query, subexpr_doc_ids); } - if (query->oper == FTS_EXIST) { - query->multi_exist = true; - } - /* Free current result set. Result already merged into parent. */ fts_query_free_doc_ids(query, subexpr_doc_ids); @@ -3033,14 +3037,13 @@ static dberr_t fts_query_filter_doc_ids( /*=====================*/ - fts_query_t* query, /*!< in: query instance */ - const byte* word, /*!< in: the current word */ - fts_word_freq_t*word_freq, /*!< in/out: word frequency */ - const fts_node_t* - node, /*!< in: current FTS node */ - void* data, /*!< in: doc id ilist */ - ulint len, /*!< in: doc id ilist size */ - ibool calc_doc_count) /*!< in: whether to remember doc count */ + fts_query_t* query, /*!< in: query instance */ + const fts_string_t* word, /*!< in: the current word */ + fts_word_freq_t* word_freq, /*!< in/out: word frequency */ + const fts_node_t* node, /*!< in: current FTS node */ + void* data, /*!< in: doc id ilist */ + ulint len, /*!< in: doc id ilist size */ + ibool calc_doc_count) /*!< in: whether to remember doc count */ { byte* ptr = static_cast<byte*>(data); doc_id_t doc_id = 0; @@ -3163,13 +3166,15 @@ fts_query_read_node( ib_rbt_bound_t parent; fts_word_freq_t* word_freq; ibool skip = FALSE; - byte term[FTS_MAX_WORD_LEN + 1]; + fts_string_t term; + byte buf[FTS_MAX_WORD_LEN + 1]; dberr_t error = DB_SUCCESS; ut_a(query->cur_node->type == FTS_AST_TERM || query->cur_node->type == FTS_AST_TEXT); memset(&node, 0, sizeof(node)); + term.f_str = buf; /* Need to consider the wildcard search case, the word frequency is created on the search string not the actual word. So we need @@ -3179,15 +3184,18 @@ fts_query_read_node( /* These cast are safe since we only care about the terminating NUL character as an end of string marker. */ - ut_strcpy((char*) term, (char*) query->cur_node->term.ptr); + term.f_len = ut_strlen(reinterpret_cast<char*> + (query->cur_node->term.ptr)); + ut_ad(FTS_MAX_WORD_LEN >= term.f_len); + memcpy(term.f_str, query->cur_node->term.ptr, term.f_len); } else { - /* Need to copy the NUL character too. */ - memcpy(term, word->f_str, word->f_len); - term[word->f_len] = 0; + term.f_len = word->f_len; + ut_ad(FTS_MAX_WORD_LEN >= word->f_len); + memcpy(term.f_str, word->f_str, word->f_len); } /* Lookup the word in our rb tree, it must exist. */ - ret = rbt_search(query->word_freqs, &parent, term); + ret = rbt_search(query->word_freqs, &parent, &term); ut_a(ret == 0); @@ -3239,7 +3247,7 @@ fts_query_read_node( case 4: /* ILIST */ error = fts_query_filter_doc_ids( - query, word_freq->word, word_freq, + query, &word_freq->word, word_freq, &node, data, len, FALSE); break; @@ -3332,7 +3340,7 @@ fts_query_calculate_idf( if (fts_enable_diag_print) { fprintf(stderr,"'%s' -> " UINT64PF "/" UINT64PF " %6.5lf\n", - word_freq->word, + word_freq->word.f_str, query->total_docs, word_freq->doc_count, word_freq->idf); } @@ -3349,12 +3357,12 @@ fts_query_calculate_ranking( fts_ranking_t* ranking) /*!< in: Document to rank */ { ulint pos = 0; - byte* word = NULL; + fts_string_t word; /* At this stage, ranking->rank should not exceed the 1.0 bound */ ut_ad(ranking->rank <= 1.0 && ranking->rank >= -1.0); - ut_ad(query->word_map->size() == query->word_vector->size()); + ut_ad(rbt_size(query->word_map) == query->word_vector->size()); while (fts_ranking_words_get_next(query, ranking, &pos, &word)) { int ret; @@ -3363,8 +3371,7 @@ fts_query_calculate_ranking( fts_doc_freq_t* doc_freq; fts_word_freq_t* word_freq; - ut_ad(word != NULL); - ret = rbt_search(query->word_freqs, &parent, word); + ret = rbt_search(query->word_freqs, &parent, &word); /* It must exist. */ ut_a(ret == 0); @@ -3620,18 +3627,18 @@ fts_query_free( ut_a(!query->intersection); - if (query->heap) { - mem_heap_free(query->heap); - } - if (query->word_map) { - delete query->word_map; + rbt_free(query->word_map); } if (query->word_vector) { delete query->word_vector; } + if (query->heap) { + mem_heap_free(query->heap); + } + memset(query, 0, sizeof(*query)); } @@ -3820,6 +3827,7 @@ fts_query( query.fts_common_table.type = FTS_COMMON_TABLE; query.fts_common_table.table_id = index->table->id; query.fts_common_table.parent = index->table->name; + query.fts_common_table.table = index->table; charset = fts_index_get_charset(index); @@ -3828,15 +3836,18 @@ fts_query( query.fts_index_table.table_id = index->table->id; query.fts_index_table.parent = index->table->name; query.fts_index_table.charset = charset; + query.fts_index_table.table = index->table; - query.word_map = new word_map_t; + query.word_map = rbt_create_arg_cmp( + sizeof(fts_string_t), innobase_fts_text_cmp, + (void *) charset); query.word_vector = new word_vector_t; query.error = DB_SUCCESS; /* Setup the RB tree that will be used to collect per term statistics. */ query.word_freqs = rbt_create_arg_cmp( - sizeof(fts_word_freq_t), innobase_fts_string_cmp, + sizeof(fts_word_freq_t), innobase_fts_text_cmp, (void*) charset); query.total_size += SIZEOF_RBT_CREATE; @@ -4061,13 +4072,14 @@ fts_print_doc_id( fts_ranking_t* ranking; ranking = rbt_value(fts_ranking_t, node); - fprintf(stderr, "doc_ids info, doc_id: %ld \n", + ib_logf(IB_LOG_LEVEL_INFO, "doc_ids info, doc_id: %ld \n", (ulint) ranking->doc_id); - ulint pos = 0; - byte* value = NULL; - while (fts_ranking_words_get_next(query, ranking, &pos, &value)) { - fprintf(stderr, "doc_ids info, value: %s \n", value); + ulint pos = 0; + fts_string_t word; + + while (fts_ranking_words_get_next(query, ranking, &pos, &word)) { + ib_logf(IB_LOG_LEVEL_INFO, "doc_ids info, value: %s \n", word.f_str); } } } @@ -4123,7 +4135,7 @@ fts_expand_query( fts_ranking_t* ranking; ulint pos; - byte* word; + fts_string_t word; ulint prev_token_size; ulint estimate_size; @@ -4145,22 +4157,17 @@ fts_expand_query( /* Remove words that have already been searched in the first pass */ pos = 0; - word = NULL; while (fts_ranking_words_get_next(query, ranking, &pos, - &word)) { - fts_string_t str; + &word)) { ibool ret; - /* FIXME: We are discarding a const qualifier here. */ - str.f_str = word; - str.f_len = ut_strlen((const char*) str.f_str); - ret = rbt_delete(result_doc.tokens, &str); + ret = rbt_delete(result_doc.tokens, &word); /* The word must exist in the doc we found */ if (!ret) { - fprintf(stderr, " InnoDB: Error: Did not " + ib_logf(IB_LOG_LEVEL_ERROR, "Did not " "find word %s in doc %ld for query " - "expansion search.\n", str.f_str, + "expansion search.\n", word.f_str, (ulint) ranking->doc_id); } } @@ -4185,7 +4192,8 @@ fts_expand_query( fts_token_t* mytoken; mytoken = rbt_value(fts_token_t, token_node); - fts_query_add_word_freq(query, mytoken->text.f_str); + ut_ad(mytoken->text.f_str[mytoken->text.f_len] == 0); + fts_query_add_word_freq(query, &mytoken->text); error = fts_query_union(query, &mytoken->text); if (error != DB_SUCCESS) { @@ -4324,8 +4332,7 @@ fts_phrase_or_proximity_search( token = static_cast<fts_string_t*>( ib_vector_get(tokens, z)); fts_query_add_word_to_document( - query, match[0]->doc_id, - token->f_str); + query, match[0]->doc_id, token); } } } diff --git a/storage/innobase/fts/fts0sql.cc b/storage/innobase/fts/fts0sql.cc index 03c19d93af6..14bc3ec44c9 100644 --- a/storage/innobase/fts/fts0sql.cc +++ b/storage/innobase/fts/fts0sql.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2007, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -61,21 +61,28 @@ fts_get_table_id( long */ { int len; + bool hex_name = DICT_TF2_FLAG_IS_SET(fts_table->table, + DICT_TF2_FTS_AUX_HEX_NAME); + + ut_a(fts_table->table != NULL); switch (fts_table->type) { case FTS_COMMON_TABLE: - len = fts_write_object_id(fts_table->table_id, table_id); + len = fts_write_object_id(fts_table->table_id, table_id, + hex_name); break; case FTS_INDEX_TABLE: - len = fts_write_object_id(fts_table->table_id, table_id); + len = fts_write_object_id(fts_table->table_id, table_id, + hex_name); table_id[len] = '_'; ++len; table_id += len; - len += fts_write_object_id(fts_table->index_id, table_id); + len += fts_write_object_id(fts_table->index_id, table_id, + hex_name); break; default: @@ -191,7 +198,7 @@ fts_parse_sql( str = ut_str3cat(fts_sql_begin, str_tmp, fts_sql_end); mem_free(str_tmp); - dict_locked = (fts_table && fts_table->table + dict_locked = (fts_table && fts_table->table->fts && (fts_table->table->fts->fts_status & TABLE_DICT_LOCKED)); diff --git a/storage/innobase/fts/fts0tlex.cc b/storage/innobase/fts/fts0tlex.cc index 717ddb8a77e..f78456d8795 100644 --- a/storage/innobase/fts/fts0tlex.cc +++ b/storage/innobase/fts/fts0tlex.cc @@ -305,9 +305,9 @@ YY_BUFFER_STATE fts0t_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner YY_BUFFER_STATE fts0t_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); YY_BUFFER_STATE fts0t_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner ); -void *fts0talloc (yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); -void *fts0trealloc (void *,yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); -void fts0tfree (void * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); +void *fts0talloc (yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); +void *fts0trealloc (void *,yy_size_t , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); +void fts0tfree (void * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); #define yy_new_buffer fts0t_create_buffer @@ -347,7 +347,7 @@ typedef int yy_state_type; static yy_state_type yy_get_previous_state (yyscan_t yyscanner ); static yy_state_type yy_try_NUL_trans (yy_state_type current_state ,yyscan_t yyscanner); static int yy_get_next_buffer (yyscan_t yyscanner ); -static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); +static void yy_fatal_error (yyconst char msg[] , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) ); /* Done after the current pattern has been matched and before the * corresponding action - sets up yytext. @@ -575,11 +575,11 @@ extern int fts0twrap (yyscan_t yyscanner ); #endif #ifndef yytext_ptr -static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))); +static void yy_flex_strncpy (char *,yyconst char *,int , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))); #endif #ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))); +static int yy_flex_strlen (yyconst char * , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))); #endif #ifndef YY_NO_INPUT @@ -1601,7 +1601,7 @@ YY_BUFFER_STATE fts0t_scan_bytes (yyconst char * yybytes, int _yybytes_len , y #define YY_EXIT_FAILURE 2 #endif -static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +static void yy_fatal_error (yyconst char* msg , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { (void) fprintf( stderr, "%s\n", msg ); exit( YY_EXIT_FAILURE ); @@ -1902,7 +1902,7 @@ int fts0tlex_destroy (yyscan_t yyscanner) */ #ifndef yytext_ptr -static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { register int i; for ( i = 0; i < n; ++i ) @@ -1911,7 +1911,7 @@ static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t #endif #ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { register int n; for ( n = 0; s[n]; ++n ) @@ -1921,12 +1921,12 @@ static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner __attribu } #endif -void *fts0talloc (yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +void *fts0talloc (yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { return (void *) malloc( size ); } -void *fts0trealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +void *fts0trealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { /* The cast to (char *) in the following accommodates both * implementations that use char* generic pointers, and those @@ -1938,7 +1938,7 @@ void *fts0trealloc (void * ptr, yy_size_t size , yyscan_t yyscanner __at return (void *) realloc( (char *) ptr, size ); } -void fts0tfree (void * ptr , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) +void fts0tfree (void * ptr , yyscan_t yyscanner __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused)) __attribute__((unused))) { free( (char *) ptr ); /* see fts0trealloc() for (char *) cast */ } diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 3557c5066e8..e5bd71a100d 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -487,6 +487,7 @@ ib_cb_t innodb_api_cb[] = { (ib_cb_t) ib_cursor_open_index_using_name, (ib_cb_t) ib_close_thd, (ib_cb_t) ib_cfg_get_cfg, + (ib_cb_t) ib_cursor_set_memcached_sync, (ib_cb_t) ib_cursor_set_cluster_access, (ib_cb_t) ib_cursor_commit_trx, (ib_cb_t) ib_cfg_trx_level, @@ -760,6 +761,19 @@ innobase_rollback_to_savepoint( void* savepoint); /*!< in: savepoint data */ /*****************************************************************//** +Check whether innodb state allows to safely release MDL locks after +rollback to savepoint. +@return true if it is safe, false if its not safe. */ +static +bool +innobase_rollback_to_savepoint_can_release_mdl( +/*===========================================*/ + handlerton* hton, /*!< in/out: InnoDB handlerton */ + THD* thd); /*!< in: handle to the MySQL thread of + the user whose XA transaction should + be rolled back to savepoint */ + +/*****************************************************************//** Sets a transaction savepoint. @return always 0, that is, always succeeds */ static @@ -2840,6 +2854,8 @@ innobase_init( innobase_hton->close_connection = innobase_close_connection; innobase_hton->savepoint_set = innobase_savepoint; innobase_hton->savepoint_rollback = innobase_rollback_to_savepoint; + innobase_hton->savepoint_rollback_can_release_mdl = + innobase_rollback_to_savepoint_can_release_mdl; innobase_hton->savepoint_release = innobase_release_savepoint; innobase_hton->prepare_ordered= NULL; innobase_hton->commit_ordered= innobase_commit_ordered; @@ -4007,6 +4023,38 @@ innobase_rollback_to_savepoint( } /*****************************************************************//** +Check whether innodb state allows to safely release MDL locks after +rollback to savepoint. +When binlog is on, MDL locks acquired after savepoint unit are not +released if there are any locks held in InnoDB. +@return true if it is safe, false if its not safe. */ +static +bool +innobase_rollback_to_savepoint_can_release_mdl( +/*===========================================*/ + handlerton* hton, /*!< in: InnoDB handlerton */ + THD* thd) /*!< in: handle to the MySQL thread + of the user whose transaction should + be rolled back to savepoint */ +{ + trx_t* trx; + + DBUG_ENTER("innobase_rollback_to_savepoint_can_release_mdl"); + DBUG_ASSERT(hton == innodb_hton_ptr); + + trx = check_trx_exists(thd); + ut_ad(trx); + + /* If transaction has not acquired any locks then it is safe + to release MDL after rollback to savepoint */ + if (!(UT_LIST_GET_LEN(trx->lock.trx_locks))) { + DBUG_RETURN(true); + } + + DBUG_RETURN(false); +} + +/*****************************************************************//** Release transaction savepoint name. @return 0 if success, HA_ERR_NO_SAVEPOINT if no savepoint with the given name */ @@ -5695,23 +5743,7 @@ innobase_fts_text_cmp_prefix( to negate the result */ return(-result); } -/******************************************************************//** -compare two character string according to their charset. */ -UNIV_INTERN -int -innobase_fts_string_cmp( -/*====================*/ - const void* cs, /*!< in: Character set */ - const void* p1, /*!< in: key */ - const void* p2) /*!< in: node */ -{ - const CHARSET_INFO* charset = (const CHARSET_INFO*) cs; - uchar* s1 = (uchar*) p1; - uchar* s2 = *(uchar**) p2; - return(ha_compare_text(charset, s1, strlen((const char*) s1), - s2, strlen((const char*) s2), 0, 0)); -} /******************************************************************//** Makes all characters in a string lower case. */ UNIV_INTERN @@ -8717,12 +8749,6 @@ ha_innobase::position( } } -/* limit innodb monitor access to users with PROCESS privilege. -See http://bugs.mysql.com/32710 for expl. why we choose PROCESS. */ -#define IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name, thd) \ - (row_is_magic_monitor_table(table_name) \ - && check_global_access(thd, PROCESS_ACL)) - /*****************************************************************//** Check whether there exist a column named as "FTS_DOC_ID", which is reserved for InnoDB FTS Doc ID @@ -8837,16 +8863,6 @@ create_table_def( DBUG_RETURN(ER_TABLE_NAME); } - /* table_name must contain '/'. Later in the code we assert if it - does not */ - if (strcmp(strchr(table_name, '/') + 1, - "innodb_table_monitor") == 0) { - push_warning( - thd, Sql_condition::WARN_LEVEL_WARN, - HA_ERR_WRONG_COMMAND, - DEPRECATED_MSG_INNODB_TABLE_MONITOR); - } - n_cols = form->s->fields; /* Check whether there already exists a FTS_DOC_ID column */ @@ -9766,6 +9782,11 @@ index_bad: *flags2 |= DICT_TF2_USE_TABLESPACE; } + /* Set the flags2 when create table or alter tables */ + *flags2 |= DICT_TF2_FTS_AUX_HEX_NAME; + DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name", + *flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;); + DBUG_RETURN(true); } @@ -9857,8 +9878,23 @@ ha_innobase::create( DBUG_RETURN(-1); } - if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(norm_name, thd)) { - DBUG_RETURN(HA_ERR_GENERIC); + if (row_is_magic_monitor_table(norm_name)) { + push_warning_printf(thd, + Sql_condition::WARN_LEVEL_WARN, + HA_ERR_WRONG_COMMAND, + "Using the table name %s to enable " + "diagnostic output is deprecated " + "and may be removed in future releases. " + "Use INFORMATION_SCHEMA or " + "PERFORMANCE_SCHEMA tables or " + "SET GLOBAL innodb_status_output=ON.", + dict_remove_db_name(norm_name)); + + /* Limit innodb monitor access to users with PROCESS privilege. + See http://bugs.mysql.com/32710 why we chose PROCESS. */ + if (check_global_access(thd, PROCESS_ACL)) { + DBUG_RETURN(HA_ERR_GENERIC); + } } /* Get the transaction associated with the current thd, or create one @@ -10302,7 +10338,8 @@ ha_innobase::delete_table( if (srv_read_only_mode) { DBUG_RETURN(HA_ERR_TABLE_READONLY); - } else if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(norm_name, thd)) { + } else if (row_is_magic_monitor_table(norm_name) + && check_global_access(thd, PROCESS_ACL)) { DBUG_RETURN(HA_ERR_GENERIC); } @@ -10538,17 +10575,7 @@ innobase_rename_table( } } - if (error != DB_SUCCESS) { - if (!srv_read_only_mode) { - FILE* ef = dict_foreign_err_file; - - fputs("InnoDB: Renaming table ", ef); - ut_print_name(ef, trx, TRUE, norm_from); - fputs(" to ", ef); - ut_print_name(ef, trx, TRUE, norm_to); - fputs(" failed!\n", ef); - } - } else { + if (error == DB_SUCCESS) { #ifndef __WIN__ sql_print_warning("Rename partition table %s " "succeeds after converting to lower " @@ -11531,7 +11558,8 @@ ha_innobase::optimize( calls to OPTIMIZE, which is undesirable. */ if (innodb_optimize_fulltext_only) { - if (prebuilt->table->fts && prebuilt->table->fts->cache) { + if (prebuilt->table->fts && prebuilt->table->fts->cache + && !dict_table_is_discarded(prebuilt->table)) { fts_sync_table(prebuilt->table); fts_optimize_table(prebuilt->table); } @@ -15567,6 +15595,7 @@ innobase_fts_find_ranking( static my_bool innodb_purge_run_now = TRUE; static my_bool innodb_purge_stop_now = TRUE; static my_bool innodb_log_checkpoint_now = TRUE; +static my_bool innodb_buf_flush_list_now = TRUE; /****************************************************************//** Set the purge state to RUN. If purge is disabled then it @@ -15640,6 +15669,29 @@ checkpoint_now_set( fil_flush_file_spaces(FIL_TABLESPACE); } } + +/****************************************************************//** +Force a dirty pages flush now. */ +static +void +buf_flush_list_now_set( +/*===================*/ + THD* thd /*!< in: thread handle */ + __attribute__((unused)), + struct st_mysql_sys_var* var /*!< in: pointer to system + variable */ + __attribute__((unused)), + void* var_ptr /*!< out: where the formal + string goes */ + __attribute__((unused)), + const void* save) /*!< in: immediate result from + check function */ +{ + if (*(my_bool*) save) { + buf_flush_list(ULINT_MAX, LSN_MAX, NULL); + buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); + } +} #endif /* UNIV_DEBUG */ /*********************************************************************** @@ -15789,6 +15841,26 @@ buffer_pool_load_abort( } } +/** Update innodb_status_output or innodb_status_output_locks, +which control InnoDB "status monitor" output to the error log. +@param[in] thd thread handle +@param[in] var system variable +@param[out] var_ptr current value +@param[in] save to-be-assigned value */ +static +void +innodb_status_output_update( + THD* thd __attribute__((unused)), + struct st_mysql_sys_var* var __attribute__((unused)), + void* var_ptr __attribute__((unused)), + const void* save __attribute__((unused))) +{ + *static_cast<my_bool*>(var_ptr) = *static_cast<const my_bool*>(save); + /* The lock timeout monitor thread also takes care of this + output. */ + os_event_set(lock_sys->timeout_event); +} + static SHOW_VAR innodb_status_variables_export[]= { {"Innodb", (char*) &show_innodb_vars, SHOW_FUNC}, {NullS, NullS, SHOW_LONG} @@ -15883,6 +15955,11 @@ static MYSQL_SYSVAR_BOOL(log_checkpoint_now, innodb_log_checkpoint_now, PLUGIN_VAR_OPCMDARG, "Force checkpoint now", NULL, checkpoint_now_set, FALSE); + +static MYSQL_SYSVAR_BOOL(buf_flush_list_now, innodb_buf_flush_list_now, + PLUGIN_VAR_OPCMDARG, + "Force dirty page flush now", + NULL, buf_flush_list_now_set, FALSE); #endif /* UNIV_DEBUG */ static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size, @@ -16559,6 +16636,15 @@ static MYSQL_SYSVAR_STR(monitor_reset_all, innobase_reset_all_monitor_counter, innodb_monitor_validate, innodb_reset_all_monitor_update, NULL); +static MYSQL_SYSVAR_BOOL(status_output, srv_print_innodb_monitor, + PLUGIN_VAR_OPCMDARG, "Enable InnoDB monitor output to the error log.", + NULL, innodb_status_output_update, FALSE); + +static MYSQL_SYSVAR_BOOL(status_output_locks, srv_print_innodb_lock_monitor, + PLUGIN_VAR_OPCMDARG, "Enable InnoDB lock monitor output to the error log." + " Requires innodb_status_output=ON.", + NULL, innodb_status_output_update, FALSE); + static MYSQL_SYSVAR_BOOL(print_all_deadlocks, srv_print_all_deadlocks, PLUGIN_VAR_OPCMDARG, "Print all deadlocks to MySQL error log (off by default)", @@ -16740,11 +16826,14 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(purge_run_now), MYSQL_SYSVAR(purge_stop_now), MYSQL_SYSVAR(log_checkpoint_now), + MYSQL_SYSVAR(buf_flush_list_now), #endif /* UNIV_DEBUG */ #if defined UNIV_DEBUG || defined UNIV_PERF_DEBUG MYSQL_SYSVAR(page_hash_locks), MYSQL_SYSVAR(doublewrite_batch_size), #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */ + MYSQL_SYSVAR(status_output), + MYSQL_SYSVAR(status_output_locks), MYSQL_SYSVAR(print_all_deadlocks), MYSQL_SYSVAR(cmp_per_index_enabled), MYSQL_SYSVAR(undo_logs), diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index d7e16d93c6a..191050bdce2 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -46,7 +46,7 @@ Smart ALTER TABLE #include "srv0mon.h" #include "fts0priv.h" #include "pars0pars.h" - +#include "row0sel.h" #include "ha_innodb.h" /** Operations for creating secondary indexes (no rebuild needed) */ @@ -240,6 +240,7 @@ ha_innobase::check_if_supported_inplace_alter( innobase_get_err_msg(ER_READ_ONLY_MODE); DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); } else if (srv_created_new_raw || srv_force_recovery) { + ha_alter_info->unsupported_reason = innobase_get_err_msg(ER_READ_ONLY_MODE); DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); @@ -2531,15 +2532,16 @@ innobase_drop_fts_index_table( /** Get the new column names if any columns were renamed @param ha_alter_info Data used during in-place alter @param altered_table MySQL table that is being altered +@param table MySQL table as it is before the ALTER operation @param user_table InnoDB table as it is before the ALTER operation @param heap Memory heap for the allocation @return array of new column names in rebuilt_table, or NULL if not renamed */ static __attribute__((nonnull, warn_unused_result)) const char** innobase_get_col_names( -/*===================*/ Alter_inplace_info* ha_alter_info, const TABLE* altered_table, + const TABLE* table, const dict_table_t* user_table, mem_heap_t* heap) { @@ -2547,19 +2549,31 @@ innobase_get_col_names( uint i; DBUG_ENTER("innobase_get_col_names"); - DBUG_ASSERT(user_table->n_def > altered_table->s->fields); + DBUG_ASSERT(user_table->n_def > table->s->fields); DBUG_ASSERT(ha_alter_info->handler_flags & Alter_inplace_info::ALTER_COLUMN_NAME); cols = static_cast<const char**>( - mem_heap_alloc(heap, user_table->n_def * sizeof *cols)); + mem_heap_zalloc(heap, user_table->n_def * sizeof *cols)); + + i = 0; + List_iterator_fast<Create_field> cf_it( + ha_alter_info->alter_info->create_list); + while (const Create_field* new_field = cf_it++) { + DBUG_ASSERT(i < altered_table->s->fields); - for (i = 0; i < altered_table->s->fields; i++) { - const Field* field = altered_table->field[i]; - cols[i] = field->field_name; + for (uint old_i = 0; table->field[old_i]; old_i++) { + if (new_field->field == table->field[old_i]) { + cols[old_i] = new_field->field_name; + break; + } + } + + i++; } /* Copy the internal column names. */ + i = table->s->fields; cols[i] = dict_table_get_col_name(user_table, i); while (++i < user_table->n_def) { @@ -3337,6 +3351,9 @@ ha_innobase::prepare_inplace_alter_table( ulint fts_doc_col_no = ULINT_UNDEFINED; bool add_fts_doc_id = false; bool add_fts_doc_id_idx = false; +#ifdef _WIN32 + bool add_fts_idx = false; +#endif /* _WIN32 */ DBUG_ENTER("prepare_inplace_alter_table"); DBUG_ASSERT(!ha_alter_info->handler_ctx); @@ -3481,6 +3498,9 @@ check_if_ok_to_rename: & ~(HA_FULLTEXT | HA_PACK_KEY | HA_BINARY_PACK_KEY))); +#ifdef _WIN32 + add_fts_idx = true; +#endif /* _WIN32 */ continue; } @@ -3491,6 +3511,20 @@ check_if_ok_to_rename: } } +#ifdef _WIN32 + /* We won't be allowed to add fts index to a table with + fts indexes already but without AUX_HEX_NAME set. + This means the aux tables of the table failed to + rename to hex format but new created aux tables + shall be in hex format, which is contradictory. + It's only for Windows. */ + if (!DICT_TF2_FLAG_IS_SET(indexed_table, DICT_TF2_FTS_AUX_HEX_NAME) + && indexed_table->fts != NULL && add_fts_idx) { + my_error(ER_INNODB_FT_AUX_NOT_HEX_ID, MYF(0)); + goto err_exit_no_heap; + } +#endif /* _WIN32 */ + /* Check existing index definitions for too-long column prefixes as well, in case max_col_len shrunk. */ for (const dict_index_t* index @@ -3524,8 +3558,8 @@ check_if_ok_to_rename: if (ha_alter_info->handler_flags & Alter_inplace_info::ALTER_COLUMN_NAME) { col_names = innobase_get_col_names( - ha_alter_info, altered_table, indexed_table, - heap); + ha_alter_info, altered_table, table, + indexed_table, heap); } else { col_names = NULL; } @@ -4595,16 +4629,39 @@ commit_get_autoinc( & Alter_inplace_info::CHANGE_CREATE_OPTION) && (ha_alter_info->create_info->used_fields & HA_CREATE_USED_AUTO)) { - /* An AUTO_INCREMENT value was supplied, but the table - was not rebuilt. Get the user-supplied value or the - last value from the sequence. */ - ut_ad(old_table->found_next_number_field); + /* An AUTO_INCREMENT value was supplied, but the table was not + rebuilt. Get the user-supplied value or the last value from the + sequence. */ + ib_uint64_t max_value_table; + dberr_t err; + + Field* autoinc_field = + old_table->found_next_number_field; + + dict_index_t* index = dict_table_get_index_on_first_col( + ctx->old_table, autoinc_field->field_index); max_autoinc = ha_alter_info->create_info->auto_increment_value; dict_table_autoinc_lock(ctx->old_table); - if (max_autoinc < ctx->old_table->autoinc) { - max_autoinc = ctx->old_table->autoinc; + + err = row_search_max_autoinc( + index, autoinc_field->field_name, &max_value_table); + + if (err != DB_SUCCESS) { + ut_ad(0); + max_autoinc = 0; + } else if (max_autoinc <= max_value_table) { + ulonglong col_max_value; + ulonglong offset; + + col_max_value = innobase_get_int_col_max_value( + old_table->found_next_number_field); + + offset = ctx->prebuilt->autoinc_offset; + max_autoinc = innobase_next_autoinc( + max_value_table, 1, 1, offset, + col_max_value); } dict_table_autoinc_unlock(ctx->old_table); } else { diff --git a/storage/innobase/include/api0api.h b/storage/innobase/include/api0api.h index c294e3f34d5..d77d691becc 100644 --- a/storage/innobase/include/api0api.h +++ b/storage/innobase/include/api0api.h @@ -1257,6 +1257,16 @@ ib_cfg_get_cfg(); /*============*/ /*****************************************************************//** +Increase/decrease the memcached sync count of table to sync memcached +DML with SQL DDLs. +@return DB_SUCCESS or error number */ +ib_err_t +ib_cursor_set_memcached_sync( +/*=========================*/ + ib_crsr_t ib_crsr, /*!< in: cursor */ + ib_bool_t flag); /*!< in: true for increasing */ + +/*****************************************************************//** Check whether the table name conforms to our requirements. Currently we only do a simple check for the presence of a '/'. @return DB_SUCCESS or err code */ diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h index fc008cdd185..cfbaacf4de3 100644 --- a/storage/innobase/include/btr0pcur.h +++ b/storage/innobase/include/btr0pcur.h @@ -459,6 +459,13 @@ void btr_pcur_move_to_prev_on_page( /*==========================*/ btr_pcur_t* cursor);/*!< in/out: persistent cursor */ +/*********************************************************//** +Moves the persistent cursor to the infimum record on the same page. */ +UNIV_INLINE +void +btr_pcur_move_before_first_on_page( +/*===============================*/ + btr_pcur_t* cursor); /*!< in/out: persistent cursor */ /** Position state of persistent B-tree cursor. */ enum pcur_pos_t { diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic index 29f2fc722a2..7e355d3709d 100644 --- a/storage/innobase/include/btr0pcur.ic +++ b/storage/innobase/include/btr0pcur.ic @@ -588,3 +588,19 @@ btr_pcur_close( cursor->trx_if_known = NULL; } + +/*********************************************************//** +Moves the persistent cursor to the infimum record on the same page. */ +UNIV_INLINE +void +btr_pcur_move_before_first_on_page( +/*===============================*/ + btr_pcur_t* cursor) /*!< in/out: persistent cursor */ +{ + ut_ad(cursor->latch_mode != BTR_NO_LATCHES); + + page_cur_set_before_first(btr_pcur_get_block(cursor), + btr_pcur_get_page_cur(cursor)); + + cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; +} diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 7ad6339e63b..7e590c05209 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -607,6 +607,23 @@ buf_block_buf_fix_inc_func( # endif /* UNIV_SYNC_DEBUG */ buf_block_t* block) /*!< in/out: block to bufferfix */ __attribute__((nonnull)); + +/*******************************************************************//** +Increments the bufferfix count. */ +UNIV_INLINE +void +buf_block_fix( +/*===========*/ + buf_block_t* block); /*!< in/out: block to bufferfix */ + +/*******************************************************************//** +Increments the bufferfix count. */ +UNIV_INLINE +void +buf_block_unfix( +/*===========*/ + buf_block_t* block); /*!< in/out: block to bufferfix */ + # ifdef UNIV_SYNC_DEBUG /** Increments the bufferfix count. @param b in/out: block to bufferfix @@ -1423,38 +1440,42 @@ struct buf_page_t{ by buf_pool->mutex. */ /* @{ */ - unsigned space:32; /*!< tablespace id; also protected + ib_uint32_t space; /*!< tablespace id; also protected by buf_pool->mutex. */ - unsigned offset:32; /*!< page number; also protected + ib_uint32_t offset; /*!< page number; also protected by buf_pool->mutex. */ + /** count of how manyfold this block is currently bufferfixed */ +#ifdef PAGE_ATOMIC_REF_COUNT + ib_uint32_t buf_fix_count; + + /** type of pending I/O operation; also protected by + buf_pool->mutex for writes only @see enum buf_io_fix */ + byte io_fix; + byte state; +#else + unsigned buf_fix_count:19; + + /** type of pending I/O operation; also protected by + buf_pool->mutex for writes only @see enum buf_io_fix */ + unsigned io_fix:2; + + /*!< state of the control block; also protected by buf_pool->mutex. + State transitions from BUF_BLOCK_READY_FOR_USE to BUF_BLOCK_MEMORY + need not be protected by buf_page_get_mutex(). @see enum buf_page_state. + State changes that are relevant to page_hash are additionally protected + by the appropriate page_hash mutex i.e.: if a page is in page_hash or + is being added to/removed from page_hash then the corresponding changes + must also be protected by page_hash mutex. */ unsigned state:BUF_PAGE_STATE_BITS; - /*!< state of the control block; also - protected by buf_pool->mutex. - State transitions from - BUF_BLOCK_READY_FOR_USE to - BUF_BLOCK_MEMORY need not be - protected by buf_page_get_mutex(). - @see enum buf_page_state. - State changes that are relevant - to page_hash are additionally - protected by the appropriate - page_hash mutex i.e.: if a page - is in page_hash or is being - added to/removed from page_hash - then the corresponding changes - must also be protected by - page_hash mutex. */ + +#endif /* PAGE_ATOMIC_REF_COUNT */ + #ifndef UNIV_HOTBACKUP unsigned flush_type:2; /*!< if this block is currently being flushed to disk, this tells the flush_type. @see buf_flush_t */ - unsigned io_fix:2; /*!< type of pending I/O operation; - also protected by buf_pool->mutex - @see enum buf_io_fix */ - unsigned buf_fix_count:19;/*!< count of how manyfold this block - is currently bufferfixed */ unsigned buf_pool_index:6;/*!< index number of the buffer pool that this block belongs to */ # if MAX_BUFFER_POOLS > 64 @@ -1608,7 +1629,7 @@ struct buf_block_t{ decompressed LRU list; used in debugging */ #endif /* UNIV_DEBUG */ - ib_mutex_t mutex; /*!< mutex protecting this block: + ib_mutex_t mutex; /*!< mutex protecting this block: state (also protected by the buffer pool mutex), io_fix, buf_fix_count, and accessed; we introduce this new @@ -1794,9 +1815,9 @@ struct buf_pool_t{ /** @name General fields */ /* @{ */ - ib_mutex_t mutex; /*!< Buffer pool mutex of this + ib_mutex_t mutex; /*!< Buffer pool mutex of this instance */ - ib_mutex_t zip_mutex; /*!< Zip mutex of this buffer + ib_mutex_t zip_mutex; /*!< Zip mutex of this buffer pool instance, protects compressed only pages (of type buf_page_t, not buf_block_t */ @@ -1850,7 +1871,7 @@ struct buf_pool_t{ /* @{ */ - ib_mutex_t flush_list_mutex;/*!< mutex protecting the + ib_mutex_t flush_list_mutex;/*!< mutex protecting the flush list access. This mutex protects flush_list, flush_rbt and bpage::list pointers when @@ -1968,27 +1989,39 @@ Use these instead of accessing buf_pool->mutex directly. */ /** Test if a buffer pool mutex is owned. */ #define buf_pool_mutex_own(b) mutex_own(&b->mutex) /** Acquire a buffer pool mutex. */ -#define buf_pool_mutex_enter(b) do { \ - ut_ad(!mutex_own(&b->zip_mutex)); \ - mutex_enter(&b->mutex); \ +#define buf_pool_mutex_enter(b) do { \ + ut_ad(!mutex_own(&b->zip_mutex)); \ + mutex_enter(&b->mutex); \ } while (0) /** Test if flush list mutex is owned. */ #define buf_flush_list_mutex_own(b) mutex_own(&b->flush_list_mutex) /** Acquire the flush list mutex. */ -#define buf_flush_list_mutex_enter(b) do { \ - mutex_enter(&b->flush_list_mutex); \ +#define buf_flush_list_mutex_enter(b) do { \ + mutex_enter(&b->flush_list_mutex); \ } while (0) /** Release the flush list mutex. */ -# define buf_flush_list_mutex_exit(b) do { \ - mutex_exit(&b->flush_list_mutex); \ +# define buf_flush_list_mutex_exit(b) do { \ + mutex_exit(&b->flush_list_mutex); \ } while (0) +/** Test if block->mutex is owned. */ +#define buf_block_mutex_own(b) mutex_own(&(b)->mutex) + +/** Acquire the block->mutex. */ +#define buf_block_mutex_enter(b) do { \ + mutex_enter(&(b)->mutex); \ +} while (0) + +/** Release the trx->mutex. */ +#define buf_block_mutex_exit(b) do { \ + mutex_exit(&(b)->mutex); \ +} while (0) /** Get appropriate page_hash_lock. */ -# define buf_page_hash_lock_get(b, f) \ +# define buf_page_hash_lock_get(b, f) \ hash_get_lock(b->page_hash, f) #ifdef UNIV_SYNC_DEBUG diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic index 08b31a59da3..85868ad9f0e 100644 --- a/storage/innobase/include/buf0buf.ic +++ b/storage/innobase/include/buf0buf.ic @@ -342,15 +342,16 @@ buf_page_get_mutex( /*===============*/ const buf_page_t* bpage) /*!< in: pointer to control block */ { - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - switch (buf_page_get_state(bpage)) { case BUF_BLOCK_POOL_WATCH: ut_error; return(NULL); case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: + case BUF_BLOCK_ZIP_DIRTY: { + buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); + return(&buf_pool->zip_mutex); + } default: return(&((buf_block_t*) bpage)->mutex); } @@ -620,10 +621,11 @@ buf_page_set_accessed( buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); ut_ad(!buf_pool_mutex_own(buf_pool)); ut_ad(mutex_own(buf_page_get_mutex(bpage))); -#endif +#endif /* UNIV_DEBUG */ + ut_a(buf_page_in_file(bpage)); - if (!bpage->access_time) { + if (bpage->access_time == 0) { /* Make this the time of the first access. */ bpage->access_time = ut_time_ms(); } @@ -1000,6 +1002,25 @@ buf_block_get_modify_clock( Increments the bufferfix count. */ UNIV_INLINE void +buf_block_fix( +/*===========*/ + buf_block_t* block) /*!< in/out: block to bufferfix */ +{ +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_increment_uint32(&block->page.buf_fix_count, 1); +#else + ib_mutex_t* block_mutex = buf_page_get_mutex(&block->page); + + mutex_enter(block_mutex); + ++block->page.buf_fix_count; + mutex_exit(block_mutex); +#endif /* PAGE_ATOMIC_REF_COUNT */ +} + +/*******************************************************************//** +Increments the bufferfix count. */ +UNIV_INLINE +void buf_block_buf_fix_inc_func( /*=======================*/ #ifdef UNIV_SYNC_DEBUG @@ -1014,9 +1035,35 @@ buf_block_buf_fix_inc_func( ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line); ut_a(ret); #endif /* UNIV_SYNC_DEBUG */ + +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_increment_uint32(&block->page.buf_fix_count, 1); +#else ut_ad(mutex_own(&block->mutex)); - block->page.buf_fix_count++; + ++block->page.buf_fix_count; +#endif /* PAGE_ATOMIC_REF_COUNT */ +} + +/*******************************************************************//** +Decrements the bufferfix count. */ +UNIV_INLINE +void +buf_block_unfix( +/*============*/ + buf_block_t* block) /*!< in/out: block to bufferunfix */ +{ + ut_ad(block->page.buf_fix_count > 0); + +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_decrement_uint32(&block->page.buf_fix_count, 1); +#else + ib_mutex_t* block_mutex = buf_page_get_mutex(&block->page); + + mutex_enter(block_mutex); + --block->page.buf_fix_count; + mutex_exit(block_mutex); +#endif /* PAGE_ATOMIC_REF_COUNT */ } /*******************************************************************//** @@ -1027,9 +1074,16 @@ buf_block_buf_fix_dec( /*==================*/ buf_block_t* block) /*!< in/out: block to bufferunfix */ { - ut_ad(mutex_own(&block->mutex)); + ut_ad(block->page.buf_fix_count > 0); + +#ifdef PAGE_ATOMIC_REF_COUNT + os_atomic_decrement_uint32(&block->page.buf_fix_count, 1); +#else + mutex_enter(&block->mutex); + --block->page.buf_fix_count; + mutex_exit(&block->mutex); +#endif /* PAGE_ATOMIC_REF_COUNT */ - block->page.buf_fix_count--; #ifdef UNIV_SYNC_DEBUG rw_lock_s_unlock(&block->debug_latch); #endif @@ -1288,27 +1342,20 @@ buf_page_release_zip( buf_page_t* bpage) /*!< in: buffer block */ { buf_block_t* block; - buf_pool_t* buf_pool = buf_pool_from_bpage(bpage); - ut_ad(bpage); - ut_a(bpage->buf_fix_count > 0); + block = (buf_block_t*) bpage; switch (buf_page_get_state(bpage)) { - case BUF_BLOCK_ZIP_PAGE: - case BUF_BLOCK_ZIP_DIRTY: - mutex_enter(&buf_pool->zip_mutex); - bpage->buf_fix_count--; - mutex_exit(&buf_pool->zip_mutex); - return; case BUF_BLOCK_FILE_PAGE: - block = (buf_block_t*) bpage; - mutex_enter(&block->mutex); #ifdef UNIV_SYNC_DEBUG rw_lock_s_unlock(&block->debug_latch); -#endif - bpage->buf_fix_count--; - mutex_exit(&block->mutex); +#endif /* UNUV_SYNC_DEBUG */ + /* Fall through */ + case BUF_BLOCK_ZIP_PAGE: + case BUF_BLOCK_ZIP_DIRTY: + buf_block_unfix(block); return; + case BUF_BLOCK_POOL_WATCH: case BUF_BLOCK_NOT_USED: case BUF_BLOCK_READY_FOR_USE: @@ -1331,25 +1378,18 @@ buf_page_release( ulint rw_latch) /*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ { - ut_ad(block); - ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); - ut_a(block->page.buf_fix_count > 0); - - mutex_enter(&block->mutex); #ifdef UNIV_SYNC_DEBUG rw_lock_s_unlock(&(block->debug_latch)); #endif - block->page.buf_fix_count--; - - mutex_exit(&block->mutex); - if (rw_latch == RW_S_LATCH) { rw_lock_s_unlock(&(block->lock)); } else if (rw_latch == RW_X_LATCH) { rw_lock_x_unlock(&(block->lock)); } + + buf_block_unfix(block); } #ifdef UNIV_SYNC_DEBUG @@ -1367,6 +1407,7 @@ buf_block_dbg_add_level( { sync_thread_add_level(&block->lock, level, FALSE); } + #endif /* UNIV_SYNC_DEBUG */ /********************************************************************//** Acquire mutex on all buffer pool instances. */ diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h index 1b9336f4002..740286d0a82 100644 --- a/storage/innobase/include/buf0dblwr.h +++ b/storage/innobase/include/buf0dblwr.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -29,6 +29,7 @@ Created 2011/12/19 Inaam Rana #include "univ.i" #include "ut0byte.h" #include "log0log.h" +#include "log0recv.h" #ifndef UNIV_HOTBACKUP @@ -44,18 +45,25 @@ UNIV_INTERN void buf_dblwr_create(void); /*==================*/ + /****************************************************************//** At a database startup initializes the doublewrite buffer memory structure if we already have a doublewrite buffer created in the data files. If we are upgrading to an InnoDB version which supports multiple tablespaces, then this function performs the necessary update operations. If we are in a crash -recovery, this function uses a possible doublewrite buffer to restore -half-written pages in the data files. */ +recovery, this function loads the pages from double write buffer into memory. */ UNIV_INTERN void -buf_dblwr_init_or_restore_pages( -/*============================*/ - ibool restore_corrupt_pages); /*!< in: TRUE=restore pages */ +buf_dblwr_init_or_load_pages( +/*=========================*/ + bool load_corrupt_pages); + +/****************************************************************//** +Process the double write buffer pages. */ +void +buf_dblwr_process(void); +/*===================*/ + /****************************************************************//** frees doublewrite buffer. */ UNIV_INTERN diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h index 6fee9afcc91..f116720574b 100644 --- a/storage/innobase/include/buf0flu.h +++ b/storage/innobase/include/buf0flu.h @@ -234,16 +234,16 @@ NOTE: in simulated aio we must call os_aio_simulated_wake_handler_threads after we have posted a batch of writes! NOTE: buf_pool->mutex and buf_page_get_mutex(bpage) must be held upon entering this function, and they will be released by this -function. */ +function if it returns true. +@return TRUE if the page was flushed */ UNIV_INTERN -void +bool buf_flush_page( /*===========*/ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ buf_page_t* bpage, /*!< in: buffer control block */ buf_flush_t flush_type, /*!< in: type of flush */ - bool sync) /*!< in: true if sync IO request */ - __attribute__((nonnull)); + bool sync); /*!< in: true if sync IO request */ /********************************************************************//** Returns true if the block is modified and ready for flushing. @return true if can flush immediately */ diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h index 307ef18f0c2..11bbc9b5c8a 100644 --- a/storage/innobase/include/buf0types.h +++ b/storage/innobase/include/buf0types.h @@ -26,6 +26,10 @@ Created 11/17/1995 Heikki Tuuri #ifndef buf0types_h #define buf0types_h +#if defined(INNODB_PAGE_ATOMIC_REF_COUNT) && defined(HAVE_ATOMIC_BUILTINS) +#define PAGE_ATOMIC_REF_COUNT +#endif /* INNODB_PAGE_ATOMIC_REF_COUNT && HAVE_ATOMIC_BUILTINS */ + /** Buffer page (uncompressed or compressed) */ struct buf_page_t; /** Buffer block for which an uncompressed page exists */ diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index f740c427006..ce709a2e912 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -1443,20 +1443,16 @@ UNIV_INTERN void dict_table_stats_lock( /*==================*/ - const dict_table_t* table, /*!< in: table */ - ulint latch_mode) /*!< in: RW_S_LATCH or - RW_X_LATCH */ - __attribute__((nonnull)); + dict_table_t* table, /*!< in: table */ + ulint latch_mode); /*!< in: RW_S_LATCH or RW_X_LATCH */ /**********************************************************************//** Unlock the latch that has been locked by dict_table_stats_lock() */ UNIV_INTERN void dict_table_stats_unlock( /*====================*/ - const dict_table_t* table, /*!< in: table */ - ulint latch_mode) /*!< in: RW_S_LATCH or - RW_X_LATCH */ - __attribute__((nonnull)); + dict_table_t* table, /*!< in: table */ + ulint latch_mode); /*!< in: RW_S_LATCH or RW_X_LATCH */ /********************************************************************//** Checks if the database name in two table names is the same. @return TRUE if same db name */ @@ -1802,6 +1798,17 @@ const char* dict_tf_to_row_format_string( /*=========================*/ ulint table_flag); /*!< in: row format setting */ +/*****************************************************************//** +Get index by first field of the index +@return index which is having first field matches +with the field present in field_index position of table */ +UNIV_INLINE +dict_index_t* +dict_table_get_index_on_first_col( +/*==============================*/ + const dict_table_t* table, /*!< in: table */ + ulint col_index); /*!< in: position of column + in table */ #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic index 83953c9325a..066ffe47e4a 100644 --- a/storage/innobase/include/dict0dict.ic +++ b/storage/innobase/include/dict0dict.ic @@ -1403,4 +1403,31 @@ dict_table_is_temporary( return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)); } +/**********************************************************************//** +Get index by first field of the index +@return index which is having first field matches +with the field present in field_index position of table */ +UNIV_INLINE +dict_index_t* +dict_table_get_index_on_first_col( +/*==============================*/ + const dict_table_t* table, /*!< in: table */ + ulint col_index) /*!< in: position of column + in table */ +{ + ut_ad(col_index < table->n_cols); + + dict_col_t* column = dict_table_get_nth_col(table, col_index); + + for (dict_index_t* index = dict_table_get_first_index(table); + index != NULL; index = dict_table_get_next_index(index)) { + + if (index->fields[0].col == column) { + return(index); + } + } + ut_error; + return(0); +} + #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index bc90e2ddfaf..eb259020106 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -191,7 +191,7 @@ ROW_FORMAT=REDUNDANT. InnoDB engines do not check these flags for unknown bits in order to protect backward incompatibility. */ /* @{ */ /** Total number of bits in table->flags2. */ -#define DICT_TF2_BITS 6 +#define DICT_TF2_BITS 7 #define DICT_TF2_BIT_MASK ~(~0 << DICT_TF2_BITS) /** TEMPORARY; TRUE for tables from CREATE TEMPORARY TABLE. */ @@ -209,6 +209,10 @@ use its own tablespace instead of the system tablespace. */ /** Set when we discard/detach the tablespace */ #define DICT_TF2_DISCARDED 32 + +/** This bit is set if all aux table names (both common tables and +index tables) of a FTS table are in HEX format. */ +#define DICT_TF2_FTS_AUX_HEX_NAME 64 /* @} */ #define DICT_TF2_FLAG_SET(table, flag) \ @@ -717,6 +721,11 @@ a foreign key constraint is enforced, therefore RESTRICT just means no flag */ #define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32 /*!< ON UPDATE NO ACTION */ /* @} */ +/* This flag is for sync SQL DDL and memcached DML. +if table->memcached_sync_count == DICT_TABLE_IN_DDL means there's DDL running on +the table, DML from memcached will be blocked. */ +#define DICT_TABLE_IN_DDL -1 + /** Data structure for a database table. Most fields will be initialized to 0, NULL or FALSE in dict_mem_table_create(). */ struct dict_table_t{ @@ -830,9 +839,28 @@ struct dict_table_t{ initialized in dict_table_add_to_cache() */ /** Statistics for query optimization */ /* @{ */ + rw_lock_t* stats_latch; /*!< this latch protects: + dict_table_t::stat_initialized + dict_table_t::stat_n_rows (*) + dict_table_t::stat_clustered_index_size + dict_table_t::stat_sum_of_other_index_sizes + dict_table_t::stat_modified_counter (*) + dict_table_t::indexes*::stat_n_diff_key_vals[] + dict_table_t::indexes*::stat_index_size + dict_table_t::indexes*::stat_n_leaf_pages + (*) those are not always protected for + performance reasons */ unsigned stat_initialized:1; /*!< TRUE if statistics have been calculated the first time after database startup or table creation */ +#define DICT_TABLE_IN_USED -1 + lint memcached_sync_count; + /*!< count of how many handles are opened + to this table from memcached; DDL on the + table is NOT allowed until this count + goes to zero. If it's -1, means there's DDL + on the table, DML from memcached will be + blocked. */ ib_time_t stats_last_recalc; /*!< Timestamp of last recalc of the stats */ ib_uint32_t stat_persistent; diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index daeca1d8e44..6e906fa05b0 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -127,6 +127,8 @@ extern fil_addr_t fil_addr_null; at least up to this lsn */ #define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /*!< starting from 4.1.x this contains the space id of the page */ +#define FIL_PAGE_SPACE_ID FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID + #define FIL_PAGE_DATA 38 /*!< start of the data on the page */ /* @} */ /** File page trailer @{ */ diff --git a/storage/innobase/include/fts0priv.h b/storage/innobase/include/fts0priv.h index c6aca27f6ec..b4d9e1d41ec 100644 --- a/storage/innobase/include/fts0priv.h +++ b/storage/innobase/include/fts0priv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -569,7 +569,10 @@ int fts_write_object_id( /*================*/ ib_id_t id, /*!< in: a table/index id */ - char* str) /*!< in: buffer to write the id to */ + char* str, /*!< in: buffer to write the id to */ + bool hex_format __attribute__((unused))) + /*!< in: true for fixed hex format, + false for old ambiguous format */ __attribute__((nonnull)); /******************************************************************//** Read the table id from the string generated by fts_write_object_id(). diff --git a/storage/innobase/include/fts0priv.ic b/storage/innobase/include/fts0priv.ic index 268bb7e2227..8ef877f267e 100644 --- a/storage/innobase/include/fts0priv.ic +++ b/storage/innobase/include/fts0priv.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -32,9 +32,24 @@ int fts_write_object_id( /*================*/ ib_id_t id, /* in: a table/index id */ - char* str) /* in: buffer to write the id to */ + char* str, /* in: buffer to write the id to */ + bool hex_format __attribute__((unused))) + /* in: true for fixed hex format, + false for old ambiguous format */ { - // FIXME: Use ut_snprintf() +#ifdef _WIN32 + /* Use this to construct old(5.6.14 and 5.7.3) ambiguous + aux table names */ + DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name", + return(sprintf(str, "%016llu", id));); + + /* As above, but this is only for those tables failing to rename. */ + if (!hex_format) { + // FIXME: Use ut_snprintf(), so does following one. + return(sprintf(str, "%016llu", id)); + } +#endif /* _WIN32 */ + return(sprintf(str, UINT64PFx, id)); } @@ -48,6 +63,9 @@ fts_read_object_id( ib_id_t* id, /* out: an id */ const char* str) /* in: buffer to read from */ { + /* NOTE: this func doesn't care about whether current table + is set with HEX_NAME, the user of the id read here will check + if the id is HEX or DEC and do the right thing with it. */ return(sscanf(str, UINT64PFx, id) == 1); } diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index 6fdad6a0b89..a02b8f1893a 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -414,16 +414,6 @@ innobase_fts_text_case_cmp( const void* p1, /*!< in: key */ const void* p2); /*!< in: node */ -/******************************************************************//** -compare two character string according to their charset. */ -UNIV_INTERN -int -innobase_fts_string_cmp( -/*====================*/ - const void* cs, /*!< in: Character set */ - const void* p1, /*!< in: key */ - const void* p2); /*!< in: node */ - /****************************************************************//** Get FTS field charset info from the field's prtype @return charset info */ diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index dcdd4bdd8aa..b94f5a6fcec 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -31,6 +31,7 @@ Created 9/20/1997 Heikki Tuuri #include "buf0types.h" #include "hash0hash.h" #include "log0log.h" +#include <list> #ifdef UNIV_HOTBACKUP extern ibool recv_replay_file_ops; @@ -367,6 +368,14 @@ struct recv_addr_t{ hash_node_t addr_hash;/*!< hash node in the hash bucket chain */ }; +struct recv_dblwr_t { + void add(byte* page); + + byte* find_first_page(ulint space_id); + + std::list<byte *> pages; /* Pages from double write buffer */ +}; + /** Recovery system data structure */ struct recv_sys_t{ #ifndef UNIV_HOTBACKUP @@ -431,6 +440,8 @@ struct recv_sys_t{ hash_table_t* addr_hash;/*!< hash table of file addresses of pages */ ulint n_addrs;/*!< number of not processed hashed file addresses in the hash table */ + + recv_dblwr_t dblwr; }; /** The recovery system */ diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h index c36ef06b554..f30034f3074 100644 --- a/storage/innobase/include/mem0mem.h +++ b/storage/innobase/include/mem0mem.h @@ -92,18 +92,35 @@ void mem_close(void); /*===========*/ +#ifdef UNIV_DEBUG /**************************************************************//** Use this macro instead of the corresponding function! Macro for memory heap creation. */ -#define mem_heap_create(N) mem_heap_create_func(\ - (N), MEM_HEAP_DYNAMIC, __FILE__, __LINE__) +# define mem_heap_create(N) mem_heap_create_func( \ + (N), __FILE__, __LINE__, MEM_HEAP_DYNAMIC) /**************************************************************//** Use this macro instead of the corresponding function! Macro for memory heap creation. */ -#define mem_heap_create_typed(N, T) mem_heap_create_func(\ - (N), (T), __FILE__, __LINE__) +# define mem_heap_create_typed(N, T) mem_heap_create_func( \ + (N), __FILE__, __LINE__, (T)) + +#else /* UNIV_DEBUG */ +/**************************************************************//** +Use this macro instead of the corresponding function! Macro for memory +heap creation. */ + +# define mem_heap_create(N) mem_heap_create_func( \ + (N), MEM_HEAP_DYNAMIC) +/**************************************************************//** +Use this macro instead of the corresponding function! Macro for memory +heap creation. */ + +# define mem_heap_create_typed(N, T) mem_heap_create_func( \ + (N), (T)) + +#endif /* UNIV_DEBUG */ /**************************************************************//** Use this macro instead of the corresponding function! Macro for memory heap freeing. */ @@ -124,9 +141,11 @@ mem_heap_create_func( this means that a single user buffer of size n will fit in the block, 0 creates a default size block */ - ulint type, /*!< in: heap type */ +#ifdef UNIV_DEBUG const char* file_name, /*!< in: file name where created */ - ulint line); /*!< in: line where created */ + ulint line, /*!< in: line where created */ +#endif /* UNIV_DEBUG */ + ulint type); /*!< in: heap type */ /*****************************************************************//** NOTE: Use the corresponding macro instead of this function. Frees the space occupied by a memory heap. In the debug version erases the heap memory @@ -218,8 +237,14 @@ Macro for memory buffer allocation */ #define mem_zalloc(N) memset(mem_alloc(N), 0, (N)) -#define mem_alloc(N) mem_alloc_func((N), NULL, __FILE__, __LINE__) -#define mem_alloc2(N,S) mem_alloc_func((N), (S), __FILE__, __LINE__) +#ifdef UNIV_DEBUG +#define mem_alloc(N) mem_alloc_func((N), __FILE__, __LINE__, NULL) +#define mem_alloc2(N,S) mem_alloc_func((N), __FILE__, __LINE__, (S)) +#else /* UNIV_DEBUG */ +#define mem_alloc(N) mem_alloc_func((N), NULL) +#define mem_alloc2(N,S) mem_alloc_func((N), (S)) +#endif /* UNIV_DEBUG */ + /***************************************************************//** NOTE: Use the corresponding macro instead of this function. Allocates a single buffer of memory from the dynamic memory of @@ -231,10 +256,12 @@ void* mem_alloc_func( /*===========*/ ulint n, /*!< in: requested size in bytes */ - ulint* size, /*!< out: allocated size in bytes, - or NULL */ +#ifdef UNIV_DEBUG const char* file_name, /*!< in: file name where created */ - ulint line); /*!< in: line where created */ + ulint line, /*!< in: line where created */ +#endif /* UNIV_DEBUG */ + ulint* size); /*!< out: allocated size in bytes, + or NULL */ /**************************************************************//** Use this macro instead of the corresponding function! @@ -343,8 +370,10 @@ mem_validate_all_blocks(void); /** The info structure stored at the beginning of a heap block */ struct mem_block_info_t { ulint magic_n;/* magic number for debugging */ +#ifdef UNIV_DEBUG char file_name[8];/* file name where the mem heap was created */ ulint line; /*!< line number where the mem heap was created */ +#endif /* UNIV_DEBUG */ UT_LIST_BASE_NODE_T(mem_block_t) base; /* In the first block in the the list this is the base node of the list of blocks; in subsequent blocks this is undefined */ diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic index 7f0e128cc40..0d983d69e1a 100644 --- a/storage/innobase/include/mem0mem.ic +++ b/storage/innobase/include/mem0mem.ic @@ -28,21 +28,34 @@ Created 6/8/1994 Heikki Tuuri # include "mem0pool.h" #endif /* !UNIV_HOTBACKUP */ +#ifdef UNIV_DEBUG +# define mem_heap_create_block(heap, n, type, file_name, line) \ + mem_heap_create_block_func(heap, n, file_name, line, type) +# define mem_heap_create_at(N, file_name, line) \ + mem_heap_create_func(N, file_name, line, MEM_HEAP_DYNAMIC) +#else /* UNIV_DEBUG */ +# define mem_heap_create_block(heap, n, type, file_name, line) \ + mem_heap_create_block_func(heap, n, type) +# define mem_heap_create_at(N, file_name, line) \ + mem_heap_create_func(N, MEM_HEAP_DYNAMIC) +#endif /* UNIV_DEBUG */ /***************************************************************//** Creates a memory heap block where data can be allocated. @return own: memory heap block, NULL if did not succeed (only possible for MEM_HEAP_BTR_SEARCH type heaps) */ UNIV_INTERN mem_block_t* -mem_heap_create_block( -/*==================*/ +mem_heap_create_block_func( +/*=======================*/ mem_heap_t* heap, /*!< in: memory heap or NULL if first block should be created */ ulint n, /*!< in: number of bytes needed for user data */ - ulint type, /*!< in: type of heap: MEM_HEAP_DYNAMIC or - MEM_HEAP_BUFFER */ +#ifdef UNIV_DEBUG const char* file_name,/*!< in: file name where created */ - ulint line); /*!< in: line where created */ + ulint line, /*!< in: line where created */ +#endif /* UNIV_DEBUG */ + ulint type); /*!< in: type of heap: MEM_HEAP_DYNAMIC or + MEM_HEAP_BUFFER */ /******************************************************************//** Frees a block from a memory heap. */ UNIV_INTERN @@ -421,9 +434,11 @@ mem_heap_create_func( this means that a single user buffer of size n will fit in the block, 0 creates a default size block */ - ulint type, /*!< in: heap type */ +#ifdef UNIV_DEBUG const char* file_name, /*!< in: file name where created */ - ulint line) /*!< in: line where created */ + ulint line, /*!< in: line where created */ +#endif /* UNIV_DEBUG */ + ulint type) /*!< in: heap type */ { mem_block_t* block; @@ -509,15 +524,17 @@ void* mem_alloc_func( /*===========*/ ulint n, /*!< in: desired number of bytes */ - ulint* size, /*!< out: allocated size in bytes, - or NULL */ +#ifdef UNIV_DEBUG const char* file_name, /*!< in: file name where created */ - ulint line) /*!< in: line where created */ + ulint line, /*!< in: line where created */ +#endif /* UNIV_DEBUG */ + ulint* size) /*!< out: allocated size in bytes, + or NULL */ { mem_heap_t* heap; void* buf; - heap = mem_heap_create_func(n, MEM_HEAP_DYNAMIC, file_name, line); + heap = mem_heap_create_at(n, file_name, line); /* Note that as we created the first block in the heap big enough for the buffer requested by the caller, the buffer will be in the diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h index f19d61e0137..8e2948e2d68 100644 --- a/storage/innobase/include/os0file.h +++ b/storage/innobase/include/os0file.h @@ -1,6 +1,6 @@ /*********************************************************************** -Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. Portions of this file contain modifications contributed and copyrighted @@ -150,6 +150,10 @@ enum os_file_create_t { #define OS_FILE_INSUFFICIENT_RESOURCE 78 #define OS_FILE_AIO_INTERRUPTED 79 #define OS_FILE_OPERATION_ABORTED 80 + +#define OS_FILE_ACCESS_VIOLATION 81 + +#define OS_FILE_ERROR_MAX 100 /* @} */ /** Types for aio operations @{ */ @@ -381,7 +385,8 @@ enum os_file_type_t { OS_FILE_TYPE_UNKNOWN = 0, OS_FILE_TYPE_FILE, /* regular file */ OS_FILE_TYPE_DIR, /* directory */ - OS_FILE_TYPE_LINK /* symbolic link */ + OS_FILE_TYPE_LINK, /* symbolic link */ + OS_FILE_TYPE_BLOCK /* block device */ }; /* Maximum path string length in bytes when referring to tables with in the diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h index d3ce68253ec..ea3f2a06db5 100644 --- a/storage/innobase/include/os0sync.h +++ b/storage/innobase/include/os0sync.h @@ -374,6 +374,9 @@ compare to, new_val is the value to swap in. */ # define os_compare_and_swap_lint(ptr, old_val, new_val) \ os_compare_and_swap(ptr, old_val, new_val) +# define os_compare_and_swap_uint32(ptr, old_val, new_val) \ + os_compare_and_swap(ptr, old_val, new_val) + # ifdef HAVE_IB_ATOMIC_PTHREAD_T_GCC # define os_compare_and_swap_thread_id(ptr, old_val, new_val) \ os_compare_and_swap(ptr, old_val, new_val) @@ -395,6 +398,9 @@ amount of increment. */ # define os_atomic_increment_lint(ptr, amount) \ os_atomic_increment(ptr, amount) +# define os_atomic_increment_uint32(ptr, amount ) \ + os_atomic_increment(ptr, amount) + # define os_atomic_increment_ulint(ptr, amount) \ os_atomic_increment(ptr, amount) @@ -407,6 +413,9 @@ amount to decrement. */ # define os_atomic_decrement(ptr, amount) \ __sync_sub_and_fetch(ptr, amount) +# define os_atomic_decrement_uint32(ptr, amount) \ + os_atomic_decrement(ptr, amount) + # define os_atomic_decrement_lint(ptr, amount) \ os_atomic_decrement(ptr, amount) @@ -439,6 +448,9 @@ intrinsics and running on Solaris >= 10 use Solaris atomics */ Returns true if swapped, ptr is pointer to target, old_val is value to compare to, new_val is the value to swap in. */ +# define os_compare_and_swap_uint32(ptr, old_val, new_val) \ + (atomic_cas_32(ptr, old_val, new_val) == old_val) + # define os_compare_and_swap_ulint(ptr, old_val, new_val) \ (atomic_cas_ulong(ptr, old_val, new_val) == old_val) @@ -467,6 +479,9 @@ compare to, new_val is the value to swap in. */ Returns the resulting value, ptr is pointer to target, amount is the amount of increment. */ +# define os_atomic_increment_uint32(ptr, amount) \ + atomic_add_32_nv(ptr, amount) + # define os_atomic_increment_ulint(ptr, amount) \ atomic_add_long_nv(ptr, amount) @@ -479,6 +494,9 @@ amount of increment. */ /* Returns the resulting value, ptr is pointer to target, amount is the amount to decrement. */ +# define os_atomic_decrement_uint32(ptr, amount) \ + os_atomic_increment_uint32(ptr, -(amount)) + # define os_atomic_decrement_lint(ptr, amount) \ os_atomic_increment_ulint((ulong_t*) ptr, -(amount)) @@ -555,6 +573,9 @@ win_cmp_and_xchg_dword( Returns true if swapped, ptr is pointer to target, old_val is value to compare to, new_val is the value to swap in. */ +# define os_compare_and_swap_uint32(ptr, old_val, new_val) \ + (win_cmp_and_xchg_dword(ptr, new_val, old_val) == old_val) + # define os_compare_and_swap_ulint(ptr, old_val, new_val) \ (win_cmp_and_xchg_ulint(ptr, new_val, old_val) == old_val) @@ -576,6 +597,9 @@ amount of increment. */ # define os_atomic_increment_lint(ptr, amount) \ (win_xchg_and_add(ptr, amount) + amount) +# define os_atomic_increment_uint32(ptr, amount) \ + ((ulint) _InterlockedExchangeAdd((long*) ptr, amount)) + # define os_atomic_increment_ulint(ptr, amount) \ ((ulint) (win_xchg_and_add((lint*) ptr, (lint) amount) + amount)) @@ -588,6 +612,9 @@ amount of increment. */ Returns the resulting value, ptr is pointer to target, amount is the amount to decrement. There is no atomic substract function on Windows */ +# define os_atomic_decrement_uint32(ptr, amount) \ + ((ulint) _InterlockedExchangeAdd((long*) ptr, (-amount))) + # define os_atomic_decrement_lint(ptr, amount) \ (win_xchg_and_add(ptr, -(lint) amount) - amount) diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h index fb21aaec778..b572f7abb49 100644 --- a/storage/innobase/include/page0page.h +++ b/storage/innobase/include/page0page.h @@ -1102,6 +1102,14 @@ page_find_rec_with_heap_no( /*=======================*/ const page_t* page, /*!< in: index page */ ulint heap_no);/*!< in: heap number */ +/** Get the last non-delete-marked record on a page. +@param[in] page index tree leaf page +@return the last record, not delete-marked +@retval infimum record if all records are delete-marked */ + +const rec_t* +page_find_rec_max_not_deleted( + const page_t* page); #ifdef UNIV_MATERIALIZE #undef UNIV_INLINE #define UNIV_INLINE UNIV_INLINE_ORIGINAL diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic index 1410f21b670..9b81156708f 100644 --- a/storage/innobase/include/page0page.ic +++ b/storage/innobase/include/page0page.ic @@ -414,6 +414,8 @@ page_rec_is_user_rec( /*=================*/ const rec_t* rec) /*!< in: record */ { + ut_ad(page_rec_check(rec)); + return(page_rec_is_user_rec_low(page_offset(rec))); } @@ -426,6 +428,8 @@ page_rec_is_supremum( /*=================*/ const rec_t* rec) /*!< in: record */ { + ut_ad(page_rec_check(rec)); + return(page_rec_is_supremum_low(page_offset(rec))); } @@ -438,6 +442,8 @@ page_rec_is_infimum( /*================*/ const rec_t* rec) /*!< in: record */ { + ut_ad(page_rec_check(rec)); + return(page_rec_is_infimum_low(page_offset(rec))); } diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h index 2a84aee7a6f..8e7d5ff2d48 100644 --- a/storage/innobase/include/rem0rec.h +++ b/storage/innobase/include/rem0rec.h @@ -440,13 +440,24 @@ rec_get_offsets_func( ulint n_fields,/*!< in: maximum number of initialized fields (ULINT_UNDEFINED if all fields) */ - mem_heap_t** heap, /*!< in/out: memory heap */ +#ifdef UNIV_DEBUG const char* file, /*!< in: file name where called */ - ulint line) /*!< in: line number where called */ - __attribute__((nonnull(1,2,5,6),warn_unused_result)); + ulint line, /*!< in: line number where called */ +#endif /* UNIV_DEBUG */ + mem_heap_t** heap) /*!< in/out: memory heap */ +#ifdef UNIV_DEBUG + __attribute__((nonnull(1,2,5,7),warn_unused_result)); +#else /* UNIV_DEBUG */ + __attribute__((nonnull(1,2,5),warn_unused_result)); +#endif /* UNIV_DEBUG */ -#define rec_get_offsets(rec,index,offsets,n,heap) \ - rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__) +#ifdef UNIV_DEBUG +# define rec_get_offsets(rec,index,offsets,n,heap) \ + rec_get_offsets_func(rec,index,offsets,n,__FILE__,__LINE__,heap) +#else /* UNIV_DEBUG */ +# define rec_get_offsets(rec, index, offsets, n, heap) \ + rec_get_offsets_func(rec, index, offsets, n, heap) +#endif /* UNIV_DEBUG */ /******************************************************//** The following function determines the offsets to each field diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 1e98cf690d8..79eaee73752 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2012, Oracle and/or its affiliates. All rights reserved. +Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, 2009, Google Inc. Copyright (c) 2009, Percona Inc. @@ -360,8 +360,8 @@ extern ulong srv_max_purge_lag_delay; extern ulong srv_replication_delay; /*-------------------------------------------*/ -extern ibool srv_print_innodb_monitor; -extern ibool srv_print_innodb_lock_monitor; +extern my_bool srv_print_innodb_monitor; +extern my_bool srv_print_innodb_lock_monitor; extern ibool srv_print_innodb_tablespace_monitor; extern ibool srv_print_verbose_log; #define DEPRECATED_MSG_INNODB_TABLE_MONITOR \ diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h index c268098d1ea..34cd8ef4bd6 100644 --- a/storage/innobase/include/sync0rw.h +++ b/storage/innobase/include/sync0rw.h @@ -181,6 +181,9 @@ unlocking, not the corresponding function. */ # define rw_lock_s_lock_gen(M, P) \ rw_lock_s_lock_func((M), (P), __FILE__, __LINE__) +# define rw_lock_s_lock_gen_nowait(M, P) \ + rw_lock_s_lock_low((M), (P), __FILE__, __LINE__) + # define rw_lock_s_lock_nowait(M, F, L) \ rw_lock_s_lock_low((M), 0, (F), (L)) @@ -243,6 +246,9 @@ unlocking, not the corresponding function. */ # define rw_lock_s_lock_gen(M, P) \ pfs_rw_lock_s_lock_func((M), (P), __FILE__, __LINE__) +# define rw_lock_s_lock_gen_nowait(M, P) \ + pfs_rw_lock_s_lock_low((M), (P), __FILE__, __LINE__) + # define rw_lock_s_lock_nowait(M, F, L) \ pfs_rw_lock_s_lock_low((M), 0, (F), (L)) diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic index 8786ad84643..1ddae5e6c58 100644 --- a/storage/innobase/include/sync0rw.ic +++ b/storage/innobase/include/sync0rw.ic @@ -380,8 +380,6 @@ rw_lock_x_lock_func_nowait( const char* file_name,/*!< in: file name where lock requested */ ulint line) /*!< in: line where requested */ { - os_thread_id_t curr_thread = os_thread_get_curr_id(); - ibool success; #ifdef INNODB_RW_LOCKS_USE_ATOMICS @@ -401,7 +399,8 @@ rw_lock_x_lock_func_nowait( rw_lock_set_writer_id_and_recursion_flag(lock, TRUE); } else if (lock->recursive - && os_thread_eq(lock->writer_thread, curr_thread)) { + && os_thread_eq(lock->writer_thread, + os_thread_get_curr_id())) { /* Relock: this lock_word modification is safe since no other threads can modify (lock, unlock, or reserve) lock_word while there is an exclusive writer and this is the writer thread. */ diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index f580901237f..34e4c0067e2 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -836,8 +836,7 @@ struct trx_t{ when trx->in_rw_trx_list. Initially set to TRX_ID_MAX. */ - time_t start_time; /*!< time the trx object was created - or the state last time became + time_t start_time; /*!< time the trx state last time became TRX_STATE_ACTIVE */ trx_id_t id; /*!< transaction id */ XID xid; /*!< X/Open XA transaction diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index 9b5fffe81e8..9ab123aa4f2 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -44,7 +44,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 5 #define INNODB_VERSION_MINOR 6 -#define INNODB_VERSION_BUGFIX 15 +#define INNODB_VERSION_BUGFIX 16 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; @@ -171,7 +171,6 @@ command. Not tested on Windows. */ #define UNIV_COMPILE_TEST_FUNCS */ -#undef UNIV_SYNC_DEBUG #if defined(HAVE_valgrind)&& defined(HAVE_VALGRIND_MEMCHECK_H) # define UNIV_DEBUG_VALGRIND #endif /* HAVE_VALGRIND */ @@ -433,7 +432,7 @@ macro ULINTPF. */ # define UINT32PF "%I32u" # define INT64PF "%I64d" # define UINT64PF "%I64u" -# define UINT64PFx "%016I64u" +# define UINT64PFx "%016I64x" # define DBUG_LSN_PF "%llu" typedef __int64 ib_int64_t; typedef unsigned __int64 ib_uint64_t; diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index e7a643b516a..08f50f70fd4 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. This program is free software; you can redistribute it and/or modify it under @@ -24,6 +24,11 @@ Recovery Created 9/20/1997 Heikki Tuuri *******************************************************/ +// First include (the generated) my_config.h, to get correct platform defines. +#include "my_config.h" +#include <stdio.h> // Solaris/x86 header file bug + +#include <vector> #include "log0recv.h" #ifdef UNIV_NONINL @@ -52,6 +57,7 @@ Created 9/20/1997 Heikki Tuuri # include "sync0sync.h" #else /* !UNIV_HOTBACKUP */ + /** This is set to FALSE if the backup was originally taken with the ibbackup --include regexp option: then we do not want to create tables in directories which were not included */ @@ -422,6 +428,9 @@ recv_sys_init( recv_max_page_lsn = 0; + /* Call the constructor for recv_sys_t::dblwr member */ + new (&recv_sys->dblwr) recv_dblwr_t(); + mutex_exit(&(recv_sys->mutex)); } @@ -1311,13 +1320,22 @@ recv_parse_or_apply_log_rec_body( ptr = mlog_parse_string(ptr, end_ptr, page, page_zip); break; case MLOG_FILE_RENAME: - ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, - space_id, 0); + /* Do not rerun file-based log entries if this is + IO completion from a page read. */ + if (page == NULL) { + ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, + space_id, 0); + } break; case MLOG_FILE_CREATE: case MLOG_FILE_DELETE: case MLOG_FILE_CREATE2: - ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, 0, 0); + /* Do not rerun file-based log entries if this is + IO completion from a page read. */ + if (page == NULL) { + ptr = fil_op_log_parse_or_replay(ptr, end_ptr, + type, 0, 0); + } break; case MLOG_ZIP_WRITE_NODE_PTR: ut_ad(!page || page_type == FIL_PAGE_INDEX); @@ -2953,6 +2971,8 @@ recv_init_crash_recovery(void) ib_logf(IB_LOG_LEVEL_INFO, "Reading tablespace information from the .ibd files..."); + buf_dblwr_init_or_load_pages(true); + fil_load_single_table_tablespaces(); /* If we are using the doublewrite method, we will @@ -2968,7 +2988,7 @@ recv_init_crash_recovery(void) ib_logf(IB_LOG_LEVEL_INFO, "from the doublewrite buffer..."); - buf_dblwr_init_or_restore_pages(TRUE); + buf_dblwr_process(); /* Spawn the background thread to flush dirty pages from the buffer pools. */ @@ -3262,7 +3282,7 @@ recv_recovery_from_checkpoint_start_func( if (!recv_needed_recovery && !srv_read_only_mode) { /* Init the doublewrite buffer memory structure */ - buf_dblwr_init_or_restore_pages(FALSE); + buf_dblwr_init_or_load_pages(false); } } @@ -3964,3 +3984,46 @@ recv_recovery_from_archive_finish(void) recv_recovery_from_backup_on = FALSE; } #endif /* UNIV_LOG_ARCHIVE */ + + +void recv_dblwr_t::add(byte* page) +{ + pages.push_back(page); +} + +byte* recv_dblwr_t::find_first_page(ulint space_id) +{ + std::vector<byte*> matches; + byte* result = 0; + + for (std::list<byte*>::iterator i = pages.begin(); + i != pages.end(); ++i) { + + if ((page_get_space_id(*i) == space_id) + && (page_get_page_no(*i) == 0)) { + matches.push_back(*i); + } + } + + if (matches.size() == 1) { + result = matches[0]; + } else if (matches.size() > 1) { + + lsn_t max_lsn = 0; + lsn_t page_lsn = 0; + + for (std::vector<byte*>::iterator i = matches.begin(); + i != matches.end(); ++i) { + + page_lsn = mach_read_from_8(*i + FIL_PAGE_LSN); + + if (page_lsn > max_lsn) { + max_lsn = page_lsn; + result = *i; + } + } + } + + return(result); +} + diff --git a/storage/innobase/mem/mem0mem.cc b/storage/innobase/mem/mem0mem.cc index e0e6220f4d8..e066aff5b30 100644 --- a/storage/innobase/mem/mem0mem.cc +++ b/storage/innobase/mem/mem0mem.cc @@ -299,15 +299,17 @@ Creates a memory heap block where data can be allocated. for MEM_HEAP_BTR_SEARCH type heaps) */ UNIV_INTERN mem_block_t* -mem_heap_create_block( -/*==================*/ +mem_heap_create_block_func( +/*=======================*/ mem_heap_t* heap, /*!< in: memory heap or NULL if first block should be created */ ulint n, /*!< in: number of bytes needed for user data */ - ulint type, /*!< in: type of heap: MEM_HEAP_DYNAMIC or - MEM_HEAP_BUFFER */ +#ifdef UNIV_DEBUG const char* file_name,/*!< in: file name where created */ - ulint line) /*!< in: line where created */ + ulint line, /*!< in: line where created */ +#endif /* UNIV_DEBUG */ + ulint type) /*!< in: type of heap: MEM_HEAP_DYNAMIC or + MEM_HEAP_BUFFER */ { #ifndef UNIV_HOTBACKUP buf_block_t* buf_block = NULL; @@ -368,8 +370,9 @@ mem_heap_create_block( #endif /* !UNIV_HOTBACKUP */ block->magic_n = MEM_BLOCK_MAGIC_N; - ut_strlcpy_rev(block->file_name, file_name, sizeof(block->file_name)); - block->line = line; + ut_d(ut_strlcpy_rev(block->file_name, file_name, + sizeof(block->file_name))); + ut_d(block->line = line); #ifdef MEM_PERIODIC_CHECK mutex_enter(&(mem_comm_pool->mutex)); diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc index d07d381fb8f..d3f1980aeed 100644 --- a/storage/innobase/os/os0file.cc +++ b/storage/innobase/os/os0file.cc @@ -465,8 +465,10 @@ os_file_get_last_error_low( return(OS_FILE_INSUFFICIENT_RESOURCE); } else if (err == ERROR_OPERATION_ABORTED) { return(OS_FILE_OPERATION_ABORTED); + } else if (err == ERROR_ACCESS_DENIED) { + return(OS_FILE_ACCESS_VIOLATION); } else { - return(100 + err); + return(OS_FILE_ERROR_MAX + err); } #else int err = errno; @@ -540,8 +542,10 @@ os_file_get_last_error_low( return(OS_FILE_AIO_INTERRUPTED); } break; + case EACCES: + return(OS_FILE_ACCESS_VIOLATION); } - return(100 + err); + return(OS_FILE_ERROR_MAX + err); #endif } @@ -619,6 +623,7 @@ os_file_handle_error_cond_exit( case OS_FILE_PATH_ERROR: case OS_FILE_ALREADY_EXISTS: + case OS_FILE_ACCESS_VIOLATION: return(FALSE); @@ -3166,30 +3171,41 @@ os_file_get_status( return(DB_FAIL); - } else if (S_ISDIR(statinfo.st_mode)) { + } + + switch (statinfo.st_mode & S_IFMT) { + case S_IFDIR: stat_info->type = OS_FILE_TYPE_DIR; - } else if (S_ISLNK(statinfo.st_mode)) { + break; + case S_IFLNK: stat_info->type = OS_FILE_TYPE_LINK; - } else if (S_ISREG(statinfo.st_mode)) { + break; + case S_IFBLK: + stat_info->type = OS_FILE_TYPE_BLOCK; + break; + case S_IFREG: stat_info->type = OS_FILE_TYPE_FILE; + break; + default: + stat_info->type = OS_FILE_TYPE_UNKNOWN; + } - if (check_rw_perm) { - int fh; - int access; - access = !srv_read_only_mode ? O_RDWR : O_RDONLY; + if (check_rw_perm && (stat_info->type == OS_FILE_TYPE_FILE + || stat_info->type == OS_FILE_TYPE_BLOCK)) { + int fh; + int access; - fh = ::open(path, access, os_innodb_umask); + access = !srv_read_only_mode ? O_RDWR : O_RDONLY; - if (fh == -1) { - stat_info->rw_perm = false; - } else { - stat_info->rw_perm = true; - close(fh); - } + fh = ::open(path, access, os_innodb_umask); + + if (fh == -1) { + stat_info->rw_perm = false; + } else { + stat_info->rw_perm = true; + close(fh); } - } else { - stat_info->type = OS_FILE_TYPE_UNKNOWN; } #endif /* _WIN_ */ diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc index efce1f10cae..f5f7e1299ce 100644 --- a/storage/innobase/page/page0cur.cc +++ b/storage/innobase/page/page0cur.cc @@ -977,7 +977,8 @@ page_cur_insert_rec_low( == (ibool) !!page_is_comp(page)); ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) - == index->id || recv_recovery_is_on() || mtr->inside_ibuf); + == index->id || recv_recovery_is_on() + || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index))); ut_ad(!page_rec_is_supremum(current_rec)); @@ -1204,7 +1205,8 @@ page_cur_insert_rec_zip( ut_ad(page_is_comp(page)); ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) - == index->id || mtr->inside_ibuf || recv_recovery_is_on()); + == index->id || recv_recovery_is_on() + || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index))); ut_ad(!page_cur_is_after_last(cursor)); #ifdef UNIV_ZIP_DEBUG @@ -1977,7 +1979,8 @@ page_cur_delete_rec( const dict_index_t* index, /*!< in: record descriptor */ const ulint* offsets,/*!< in: rec_get_offsets( cursor->rec, index) */ - mtr_t* mtr) /*!< in: mini-transaction handle */ + mtr_t* mtr) /*!< in: mini-transaction handle + or NULL */ { page_dir_slot_t* cur_dir_slot; page_dir_slot_t* prev_slot; @@ -2006,7 +2009,8 @@ page_cur_delete_rec( ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) - == index->id || mtr->inside_ibuf || recv_recovery_is_on()); + == index->id || recv_recovery_is_on() + || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index))); /* The record must not be the supremum or infimum record. */ ut_ad(page_rec_is_user_rec(current_rec)); diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc index 2faf804279c..bd5fb36af8f 100644 --- a/storage/innobase/page/page0page.cc +++ b/storage/innobase/page/page0page.cc @@ -2779,3 +2779,35 @@ page_delete_rec( return(no_compress_needed); } +/** Get the last non-delete-marked record on a page. +@param[in] page index tree leaf page +@return the last record, not delete-marked +@retval infimum record if all records are delete-marked */ + +const rec_t* +page_find_rec_max_not_deleted( + const page_t* page) +{ + const rec_t* rec = page_get_infimum_rec(page); + const rec_t* prev_rec = NULL; // remove warning + + /* Because the page infimum is never delete-marked, + prev_rec will always be assigned to it first. */ + ut_ad(!rec_get_deleted_flag(rec, page_rec_is_comp(rec))); + if (page_is_comp(page)) { + do { + if (!rec_get_deleted_flag(rec, true)) { + prev_rec = rec; + } + rec = page_rec_get_next_low(rec, true); + } while (rec != page + PAGE_NEW_SUPREMUM); + } else { + do { + if (!rec_get_deleted_flag(rec, false)) { + prev_rec = rec; + } + rec = page_rec_get_next_low(rec, false); + } while (rec != page + PAGE_OLD_SUPREMUM); + } + return(prev_rec); +} diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc index 52dcbf64183..cfe67e49b68 100644 --- a/storage/innobase/page/page0zip.cc +++ b/storage/innobase/page/page0zip.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2005, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. This program is free software; you can redistribute it and/or modify it under @@ -24,6 +24,9 @@ Compressed page interface Created June 2005 by Marko Makela *******************************************************/ +// First include (the generated) my_config.h, to get correct platform defines. +#include "my_config.h" + #include <map> using namespace std; @@ -1567,9 +1570,8 @@ page_zip_fields_free( dict_table_t* table = index->table; os_fast_mutex_free(&index->zip_pad.mutex); mem_heap_free(index->heap); - mutex_free(&(table->autoinc_mutex)); - ut_free(table->name); - mem_heap_free(table->heap); + + dict_mem_table_free(table); } } @@ -4894,8 +4896,12 @@ page_zip_verify_checksum( /* declare empty pages non-corrupted */ if (stored == 0) { /* make sure that the page is really empty */ - ut_d(ulint i; for (i = 0; i < size; i++) { - ut_a(*((const char*) data + i) == 0); }); + ulint i; + for (i = 0; i < size; i++) { + if (*((const char*) data + i) != 0) { + return(FALSE); + } + } return(TRUE); } diff --git a/storage/innobase/pars/pars0pars.cc b/storage/innobase/pars/pars0pars.cc index e0bc00fad0d..509755c76fa 100644 --- a/storage/innobase/pars/pars0pars.cc +++ b/storage/innobase/pars/pars0pars.cc @@ -1988,6 +1988,12 @@ pars_create_table( } } + /* Set the flags2 when create table or alter tables */ + flags2 |= DICT_TF2_FTS_AUX_HEX_NAME; + DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name", + flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;); + + n_cols = que_node_list_get_len(column_defs); table = dict_mem_table_create( diff --git a/storage/innobase/rem/rem0rec.cc b/storage/innobase/rem/rem0rec.cc index 43072159b9e..0d7b7c16785 100644 --- a/storage/innobase/rem/rem0rec.cc +++ b/storage/innobase/rem/rem0rec.cc @@ -543,9 +543,11 @@ rec_get_offsets_func( ulint n_fields,/*!< in: maximum number of initialized fields (ULINT_UNDEFINED if all fields) */ - mem_heap_t** heap, /*!< in/out: memory heap */ +#ifdef UNIV_DEBUG const char* file, /*!< in: file name where called */ - ulint line) /*!< in: line number where called */ + ulint line, /*!< in: line number where called */ +#endif /* UNIV_DEBUG */ + mem_heap_t** heap) /*!< in/out: memory heap */ { ulint n; ulint size; @@ -590,9 +592,8 @@ rec_get_offsets_func( if (UNIV_UNLIKELY(!offsets) || UNIV_UNLIKELY(rec_offs_get_n_alloc(offsets) < size)) { if (UNIV_UNLIKELY(!*heap)) { - *heap = mem_heap_create_func(size * sizeof(ulint), - MEM_HEAP_DYNAMIC, - file, line); + *heap = mem_heap_create_at(size * sizeof(ulint), + file, line); } offsets = static_cast<ulint*>( mem_heap_alloc(*heap, size * sizeof(ulint))); diff --git a/storage/innobase/row/row0ftsort.cc b/storage/innobase/row/row0ftsort.cc index 087d2152826..d88fa93aaf9 100644 --- a/storage/innobase/row/row0ftsort.cc +++ b/storage/innobase/row/row0ftsort.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1431,11 +1431,17 @@ row_fts_merge_insert( ins_ctx.ins_graph = static_cast<que_t**>(mem_heap_alloc(heap, n_bytes)); memset(ins_ctx.ins_graph, 0x0, n_bytes); + /* We should set the flags2 with aux_table_name here, + in order to get the correct aux table names. */ + index->table->flags2 |= DICT_TF2_FTS_AUX_HEX_NAME; + DBUG_EXECUTE_IF("innodb_test_wrong_fts_aux_table_name", + index->table->flags2 &= ~DICT_TF2_FTS_AUX_HEX_NAME;); + ins_ctx.fts_table.type = FTS_INDEX_TABLE; ins_ctx.fts_table.index_id = index->id; ins_ctx.fts_table.table_id = table->id; ins_ctx.fts_table.parent = index->table->name; - ins_ctx.fts_table.table = NULL; + ins_ctx.fts_table.table = index->table; for (i = 0; i < fts_sort_pll_degree; i++) { if (psort_info[i].merge_file[id]->n_rec == 0) { diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index c18ef9ee250..e9bbeea240f 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -78,11 +78,12 @@ struct row_log_buf_t { mrec_buf_t buf; /*!< buffer for accessing a record that spans two blocks */ ulint blocks; /*!< current position in blocks */ - ulint bytes; /*!< current position within buf */ + ulint bytes; /*!< current position within block */ ulonglong total; /*!< logical position, in bytes from the start of the row_log_table log; 0 for row_log_online_op() and row_log_apply(). */ + ulint size; /*!< allocated size of block */ }; /** Tracks BLOB allocation during online ALTER TABLE */ @@ -193,9 +194,47 @@ struct row_log_t { or by index->lock X-latch only */ row_log_buf_t head; /*!< reader context; protected by MDL only; modifiable by row_log_apply_ops() */ - ulint size; /*!< allocated size */ }; + +/** Allocate the memory for the log buffer. +@param[in,out] log_buf Buffer used for log operation +@return TRUE if success, false if not */ +static __attribute__((warn_unused_result)) +bool +row_log_block_allocate( + row_log_buf_t& log_buf) +{ + DBUG_ENTER("row_log_block_allocate"); + if (log_buf.block == NULL) { + log_buf.size = srv_sort_buf_size; + log_buf.block = (byte*) os_mem_alloc_large(&log_buf.size); + DBUG_EXECUTE_IF("simulate_row_log_allocation_failure", + if (log_buf.block) + os_mem_free_large(log_buf.block, log_buf.size); + log_buf.block = NULL;); + if (!log_buf.block) { + DBUG_RETURN(false); + } + } + DBUG_RETURN(true); +} + +/** Free the log buffer. +@param[in,out] log_buf Buffer used for log operation */ +static +void +row_log_block_free( + row_log_buf_t& log_buf) +{ + DBUG_ENTER("row_log_block_free"); + if (log_buf.block != NULL) { + os_mem_free_large(log_buf.block, log_buf.size); + log_buf.block = NULL; + } + DBUG_VOID_RETURN; +} + /******************************************************//** Logs an operation to a secondary index that is (or was) being created. */ UNIV_INTERN @@ -247,6 +286,11 @@ row_log_online_op( log->max_trx = trx_id; } + if (!row_log_block_allocate(log->tail)) { + log->error = DB_OUT_OF_MEMORY; + goto err_exit; + } + UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf); ut_ad(log->tail.bytes < srv_sort_buf_size); @@ -318,6 +362,7 @@ write_failed: } UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf); +err_exit: mutex_exit(&log->mutex); } @@ -352,10 +397,16 @@ row_log_table_open( UNIV_MEM_INVALID(log->tail.buf, sizeof log->tail.buf); if (log->error != DB_SUCCESS) { +err_exit: mutex_exit(&log->mutex); return(NULL); } + if (!row_log_block_allocate(log->tail)) { + log->error = DB_OUT_OF_MEMORY; + goto err_exit; + } + ut_ad(log->tail.bytes < srv_sort_buf_size); *avail = srv_sort_buf_size - log->tail.bytes; @@ -2306,6 +2357,11 @@ all_done: ut_ad(dict_index_is_online_ddl(index)); + if (!row_log_block_allocate(index->online_log->head)) { + error = DB_OUT_OF_MEMORY; + goto func_exit; + } + success = os_file_read_no_error_handling( OS_FILE_FROM_FD(index->online_log->fd), index->online_log->head.block, ofs, @@ -2509,6 +2565,7 @@ func_exit: mem_heap_free(offsets_heap); mem_heap_free(heap); + row_log_block_free(index->online_log->head); ut_free(offsets); return(error); } @@ -2582,9 +2639,7 @@ row_log_allocate( const ulint* col_map)/*!< in: mapping of old column numbers to new ones, or NULL if !table */ { - byte* buf; row_log_t* log; - ulint size; DBUG_ENTER("row_log_allocate"); ut_ad(!dict_index_is_online_ddl(index)); @@ -2596,17 +2651,14 @@ row_log_allocate( #ifdef UNIV_SYNC_DEBUG ut_ad(rw_lock_own(dict_index_get_lock(index), RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ - size = 2 * srv_sort_buf_size + sizeof *log; - buf = (byte*) os_mem_alloc_large(&size); - if (!buf) { + log = (row_log_t*) ut_malloc(sizeof *log); + if (!log) { DBUG_RETURN(false); } - log = (row_log_t*) &buf[2 * srv_sort_buf_size]; - log->size = size; log->fd = row_merge_file_create_low(); if (log->fd < 0) { - os_mem_free_large(buf, size); + ut_free(log); DBUG_RETURN(false); } mutex_create(index_online_log_key, &log->mutex, @@ -2618,10 +2670,9 @@ row_log_allocate( log->col_map = col_map; log->error = DB_SUCCESS; log->max_trx = 0; - log->head.block = buf; - log->tail.block = buf + srv_sort_buf_size; log->tail.blocks = log->tail.bytes = 0; log->tail.total = 0; + log->tail.block = log->head.block = NULL; log->head.blocks = log->head.bytes = 0; log->head.total = 0; dict_index_set_online_status(index, ONLINE_INDEX_CREATION); @@ -2646,9 +2697,11 @@ row_log_free( MONITOR_ATOMIC_DEC(MONITOR_ONLINE_CREATE_INDEX); delete log->blobs; + row_log_block_free(log->tail); + row_log_block_free(log->head); row_merge_file_destroy_low(log->fd); mutex_free(&log->mutex); - os_mem_free_large(log->head.block, log->size); + ut_free(log); log = 0; } @@ -3074,6 +3127,11 @@ next_block: goto interrupted; } + error = index->online_log->error; + if (error != DB_SUCCESS) { + goto func_exit; + } + if (dict_index_is_corrupted(index)) { error = DB_INDEX_CORRUPT; goto func_exit; @@ -3130,6 +3188,11 @@ all_done: log_free_check(); + if (!row_log_block_allocate(index->online_log->head)) { + error = DB_OUT_OF_MEMORY; + goto func_exit; + } + success = os_file_read_no_error_handling( OS_FILE_FROM_FD(index->online_log->fd), index->online_log->head.block, ofs, @@ -3330,6 +3393,7 @@ func_exit: mem_heap_free(heap); mem_heap_free(offsets_heap); + row_log_block_free(index->online_log->head); ut_free(offsets); return(error); } diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc index 2a60790f29c..93d13ea49ee 100644 --- a/storage/innobase/row/row0mysql.cc +++ b/storage/innobase/row/row0mysql.cc @@ -1277,7 +1277,9 @@ row_insert_for_mysql( " newraw is replaced\n" "InnoDB: with raw, and innodb_force_... is removed.\n", stderr); - + if(srv_force_recovery) { + return(DB_READ_ONLY); + } return(DB_ERROR); } @@ -1662,7 +1664,9 @@ row_update_for_mysql( " is replaced\n" "InnoDB: with raw, and innodb_force_... is removed.\n", stderr); - + if(srv_force_recovery) { + return(DB_READ_ONLY); + } return(DB_ERROR); } @@ -3240,7 +3244,6 @@ row_truncate_table_for_mysql( ut_a(trx->dict_operation_lock_mode == 0); /* Prevent foreign key checks etc. while we are truncating the table */ - row_mysql_lock_data_dictionary(trx); ut_ad(mutex_own(&(dict_sys->mutex))); @@ -3304,6 +3307,25 @@ row_truncate_table_for_mysql( goto funct_exit; } + /* Check if memcached plugin is running on this table. if is, we don't + allow truncate this table. */ + if (table->memcached_sync_count != 0) { + ut_print_timestamp(stderr); + fputs(" InnoDB: Cannot truncate table ", stderr); + ut_print_name(stderr, trx, TRUE, table->name); + fputs(" by DROP+CREATE\n" + "InnoDB: because there are memcached operations" + " running on it.\n", + stderr); + err = DB_ERROR; + + goto funct_exit; + } else { + /* We need to set this counter to -1 for blocking + memcached operations. */ + table->memcached_sync_count = DICT_TABLE_IN_DDL; + } + /* Remove all locks except the table-level X lock. */ lock_remove_all_on_table(table, FALSE); @@ -3487,6 +3509,7 @@ next_rec: fts_table.name = table->name; fts_table.id = new_id; + fts_table.flags2 = table->flags2; err = fts_create_common_tables( trx, &fts_table, table->name, TRUE); @@ -3631,6 +3654,12 @@ next_rec: funct_exit: + if (table->memcached_sync_count == DICT_TABLE_IN_DDL) { + /* We need to set the memcached sync back to 0, unblock + memcached operationse. */ + table->memcached_sync_count = 0; + } + row_mysql_unlock_data_dictionary(trx); dict_stats_update(table, DICT_STATS_EMPTY_TABLE); @@ -4702,6 +4731,9 @@ row_rename_table_for_mysql( " is replaced\n" "InnoDB: with raw, and innodb_force_... is removed.\n", stderr); + if(srv_force_recovery) { + err = DB_READ_ONLY; + } goto funct_exit; } else if (row_mysql_is_system_table(new_name)) { @@ -4975,15 +5007,31 @@ row_rename_table_for_mysql( if (err != DB_SUCCESS && (table->space != 0)) { char* orig_name = table->name; + trx_t* trx_bg = trx_allocate_for_background(); + + /* If the first fts_rename fails, the trx would + be rolled back and committed, we can't use it any more, + so we have to start a new background trx here. */ + ut_a(trx_state_eq(trx, TRX_STATE_NOT_STARTED)); + trx_bg->op_info = "Revert the failing rename " + "for fts aux tables"; + trx_bg->dict_operation_lock_mode = RW_X_LATCH; + trx_start_for_ddl(trx_bg, TRX_DICT_OP_TABLE); /* If rename fails and table has its own tablespace, we need to call fts_rename_aux_tables again to revert the ibd file rename, which is not under the control of trx. Also notice the parent table name - in cache is not changed yet. */ + in cache is not changed yet. If the reverting fails, + the ibd data may be left in the new database, which + can be fixed only manually. */ table->name = const_cast<char*>(new_name); - fts_rename_aux_tables(table, old_name, trx); + fts_rename_aux_tables(table, old_name, trx_bg); table->name = orig_name; + + trx_bg->dict_operation_lock_mode = 0; + trx_commit_for_mysql(trx_bg); + trx_free_for_background(trx_bg); } } diff --git a/storage/innobase/row/row0sel.cc b/storage/innobase/row/row0sel.cc index 09cf75c1050..359ae3f2c21 100644 --- a/storage/innobase/row/row0sel.cc +++ b/storage/innobase/row/row0sel.cc @@ -5320,25 +5320,40 @@ func_exit: return(value); } -/*******************************************************************//** -Get the last row. -@return current rec or NULL */ +/** Get the maximum and non-delete-marked record in an index. +@param[in] index index tree +@param[in,out] mtr mini-transaction (may be committed and restarted) +@return maximum record, page s-latched in mtr +@retval NULL if there are no records, or if all of them are delete-marked */ static const rec_t* -row_search_autoinc_get_rec( -/*=======================*/ - btr_pcur_t* pcur, /*!< in: the current cursor */ - mtr_t* mtr) /*!< in: mini transaction */ +row_search_get_max_rec( + dict_index_t* index, + mtr_t* mtr) { + btr_pcur_t pcur; + const rec_t* rec; + /* Open at the high/right end (false), and init cursor */ + btr_pcur_open_at_index_side( + false, index, BTR_SEARCH_LEAF, &pcur, true, 0, mtr); + do { - const rec_t* rec = btr_pcur_get_rec(pcur); + const page_t* page; + + page = btr_pcur_get_page(&pcur); + rec = page_find_rec_max_not_deleted(page); if (page_rec_is_user_rec(rec)) { - return(rec); + break; + } else { + rec = NULL; } - } while (btr_pcur_move_to_prev(pcur, mtr)); + btr_pcur_move_before_first_on_page(&pcur); + } while (btr_pcur_move_to_prev(&pcur, mtr)); - return(NULL); + btr_pcur_close(&pcur); + + return(rec); } /*******************************************************************//** @@ -5353,55 +5368,30 @@ row_search_max_autoinc( const char* col_name, /*!< in: name of autoinc column */ ib_uint64_t* value) /*!< out: AUTOINC value read */ { - ulint i; - ulint n_cols; - dict_field_t* dfield = NULL; + dict_field_t* dfield = dict_index_get_nth_field(index, 0); dberr_t error = DB_SUCCESS; - - n_cols = dict_index_get_n_ordering_defined_by_user(index); - - /* Search the index for the AUTOINC column name */ - for (i = 0; i < n_cols; ++i) { - dfield = dict_index_get_nth_field(index, i); - - if (strcmp(col_name, dfield->name) == 0) { - break; - } - } - *value = 0; - /* Must find the AUTOINC column name */ - if (i < n_cols && dfield) { + if (strcmp(col_name, dfield->name) != 0) { + error = DB_RECORD_NOT_FOUND; + } else { mtr_t mtr; - btr_pcur_t pcur; + const rec_t* rec; mtr_start(&mtr); - /* Open at the high/right end (false), and init cursor */ - btr_pcur_open_at_index_side( - false, index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr); - - if (!page_is_empty(btr_pcur_get_page(&pcur))) { - const rec_t* rec; - - rec = row_search_autoinc_get_rec(&pcur, &mtr); + rec = row_search_get_max_rec(index, &mtr); - if (rec != NULL) { - ibool unsigned_type = ( - dfield->col->prtype & DATA_UNSIGNED); + if (rec != NULL) { + ibool unsigned_type = ( + dfield->col->prtype & DATA_UNSIGNED); - *value = row_search_autoinc_read_column( - index, rec, i, - dfield->col->mtype, unsigned_type); - } + *value = row_search_autoinc_read_column( + index, rec, 0, + dfield->col->mtype, unsigned_type); } - btr_pcur_close(&pcur); - mtr_commit(&mtr); - } else { - error = DB_RECORD_NOT_FOUND; } return(error); diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index a25469f35b7..5910666bf2e 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -378,8 +378,8 @@ UNIV_INTERN ulint srv_available_undo_logs = 0; /* Set the following to 0 if you want InnoDB to write messages on stderr on startup/shutdown. */ UNIV_INTERN ibool srv_print_verbose_log = TRUE; -UNIV_INTERN ibool srv_print_innodb_monitor = FALSE; -UNIV_INTERN ibool srv_print_innodb_lock_monitor = FALSE; +UNIV_INTERN my_bool srv_print_innodb_monitor = FALSE; +UNIV_INTERN my_bool srv_print_innodb_lock_monitor = FALSE; UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE; UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE; diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 00604a896ca..387f793a763 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -218,7 +218,8 @@ srv_file_check_mode( /* Note: stat.rw_perm is only valid of files */ - if (stat.type == OS_FILE_TYPE_FILE) { + if (stat.type == OS_FILE_TYPE_FILE + || stat.type == OS_FILE_TYPE_BLOCK) { if (!stat.rw_perm) { ib_logf(IB_LOG_LEVEL_ERROR, @@ -1526,6 +1527,16 @@ innobase_start_or_create_for_mysql(void) # endif /* F_FULLFSYNC */ #endif /* HAVE_DARWIN_THREADS */ + ib_logf(IB_LOG_LEVEL_INFO, + "Using %s to ref count buffer pool pages", +#ifdef PAGE_ATOMIC_REF_COUNT + "atomics" +#else + "mutexes" +#endif /* PAGE_ATOMIC_REF_COUNT */ + ); + + if (sizeof(ulint) != sizeof(void*)) { ut_print_timestamp(stderr); fprintf(stderr, diff --git a/storage/innobase/sync/sync0sync.cc b/storage/innobase/sync/sync0sync.cc index 90f16719e20..5ef8a02fb3f 100644 --- a/storage/innobase/sync/sync0sync.cc +++ b/storage/innobase/sync/sync0sync.cc @@ -1137,6 +1137,7 @@ sync_thread_add_level( case SYNC_RECV: case SYNC_FTS_BG_THREADS: case SYNC_WORK_QUEUE: + case SYNC_FTS_TOKENIZE: case SYNC_FTS_OPTIMIZE: case SYNC_FTS_CACHE: case SYNC_FTS_CACHE_INIT: diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 7a75c7b573a..43c1308b44d 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -562,6 +562,13 @@ trx_resurrect_insert( trx->no = TRX_ID_MAX; } + /* trx_start_low() is not called with resurrect, so need to initialize + start time here.*/ + if (trx->state == TRX_STATE_ACTIVE + || trx->state == TRX_STATE_PREPARED) { + trx->start_time = ut_time(); + } + if (undo->dict_operation) { trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); trx->table_id = undo->table_id; @@ -649,6 +656,13 @@ trx_resurrect_update( trx->no = TRX_ID_MAX; } + /* trx_start_low() is not called with resurrect, so need to initialize + start time here.*/ + if (trx->state == TRX_STATE_ACTIVE + || trx->state == TRX_STATE_PREPARED) { + trx->start_time = ut_time(); + } + if (undo->dict_operation) { trx_set_dict_operation(trx, TRX_DICT_OP_TABLE); trx->table_id = undo->table_id; |