diff options
author | Alexander Barkov <bar@mariadb.org> | 2017-06-15 15:27:11 +0400 |
---|---|---|
committer | Alexander Barkov <bar@mariadb.org> | 2017-06-15 15:27:11 +0400 |
commit | 765347384af7fd3393ad37567a612d93ed8b3d92 (patch) | |
tree | a2c0a08596142312ec38f33e4e02f353a2730fe1 /storage/xtradb | |
parent | 3b1921c714fcb4415cea9058408fb5a626e93b62 (diff) | |
parent | e813fe862226554cfe31754b3dfeafbb2b9a7159 (diff) | |
download | mariadb-git-765347384af7fd3393ad37567a612d93ed8b3d92.tar.gz |
Merge remote-tracking branch 'origin/10.2' into bb-10.2-ext
Diffstat (limited to 'storage/xtradb')
79 files changed, 2289 insertions, 1785 deletions
diff --git a/storage/xtradb/btr/btr0btr.cc b/storage/xtradb/btr/btr0btr.cc index d84c93f8b3e..e66599e206d 100644 --- a/storage/xtradb/btr/btr0btr.cc +++ b/storage/xtradb/btr/btr0btr.cc @@ -2,7 +2,7 @@ Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2014, 2017, MariaDB Corporation +Copyright (c) 2014, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1703,9 +1703,7 @@ btr_create( dict_index_t* index, /*!< in: index */ mtr_t* mtr) /*!< in: mini-transaction handle */ { - ulint page_no; buf_block_t* block; - buf_frame_t* frame; page_t* page; page_zip_des_t* page_zip; @@ -1720,6 +1718,10 @@ btr_create( space, 0, IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr); + if (ibuf_hdr_block == NULL) { + return(FIL_NULL); + } + buf_block_dbg_add_level( ibuf_hdr_block, SYNC_IBUF_TREE_NODE_NEW); @@ -1733,7 +1735,17 @@ btr_create( + IBUF_HEADER + IBUF_TREE_SEG_HEADER, IBUF_TREE_ROOT_PAGE_NO, FSP_UP, mtr); + + if (block == NULL) { + return(FIL_NULL); + } + ut_ad(buf_block_get_page_no(block) == IBUF_TREE_ROOT_PAGE_NO); + + buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW); + + flst_init(block->frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, + mtr); } else { #ifdef UNIV_BLOB_DEBUG if ((type & DICT_CLUSTERED) && !index->blobs) { @@ -1745,34 +1757,19 @@ btr_create( #endif /* UNIV_BLOB_DEBUG */ block = fseg_create(space, 0, PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr); - } - if (block == NULL) { - - return(FIL_NULL); - } - - page_no = buf_block_get_page_no(block); - frame = buf_block_get_frame(block); - - if (type & DICT_IBUF) { - /* It is an insert buffer tree: initialize the free list */ - buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW); - - ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO); + if (block == NULL) { + return(FIL_NULL); + } - flst_init(frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr); - } else { - /* It is a non-ibuf tree: create a file segment for leaf - pages */ buf_block_dbg_add_level(block, SYNC_TREE_NODE_NEW); - if (!fseg_create(space, page_no, + if (!fseg_create(space, buf_block_get_page_no(block), PAGE_HEADER + PAGE_BTR_SEG_LEAF, mtr)) { /* Not enough space for new segment, free root segment before return. */ - btr_free_root(space, zip_size, page_no, mtr); - + btr_free_root(space, zip_size, + buf_block_get_page_no(block), mtr); return(FIL_NULL); } @@ -1816,7 +1813,7 @@ btr_create( ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE); - return(page_no); + return(buf_block_get_page_no(block)); } /************************************************************//** diff --git a/storage/xtradb/btr/btr0cur.cc b/storage/xtradb/btr/btr0cur.cc index 1ea1ec0696b..ffd7ebc7504 100644 --- a/storage/xtradb/btr/btr0cur.cc +++ b/storage/xtradb/btr/btr0cur.cc @@ -1329,18 +1329,21 @@ btr_cur_ins_lock_and_undo( index, thr, mtr, inherit); if (err != DB_SUCCESS + || !(~flags | (BTR_NO_UNDO_LOG_FLAG | BTR_KEEP_SYS_FLAG)) || !dict_index_is_clust(index) || dict_index_is_ibuf(index)) { return(err); } - err = trx_undo_report_row_operation(flags, TRX_UNDO_INSERT_OP, - thr, index, entry, - NULL, 0, NULL, NULL, - &roll_ptr); - if (err != DB_SUCCESS) { - - return(err); + if (flags & BTR_NO_UNDO_LOG_FLAG) { + roll_ptr = 0; + } else { + err = trx_undo_report_row_operation(thr, index, entry, + NULL, 0, NULL, NULL, + &roll_ptr); + if (err != DB_SUCCESS) { + return(err); + } } /* Now we can fill in the roll ptr field in entry */ @@ -1389,15 +1392,17 @@ btr_cur_optimistic_insert( btr_cur_t* cursor, /*!< in: cursor on page after which to insert; cursor stays valid */ ulint** offsets,/*!< out: offsets on *rec */ - mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ + mem_heap_t** heap, /*!< in/out: pointer to memory heap */ dtuple_t* entry, /*!< in/out: entry to insert */ rec_t** rec, /*!< out: pointer to inserted record if succeed */ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to - be stored externally by the caller, or - NULL */ + be stored externally by the caller */ ulint n_ext, /*!< in: number of externally stored columns */ - que_thr_t* thr, /*!< in: query thread or NULL */ + que_thr_t* thr, /*!< in/out: query thread; can be NULL if + !(~flags + & (BTR_NO_LOCKING_FLAG + | BTR_NO_UNDO_LOG_FLAG)) */ mtr_t* mtr) /*!< in/out: mini-transaction; if this function returns DB_SUCCESS on a leaf page of a secondary index in a @@ -1418,6 +1423,7 @@ btr_cur_optimistic_insert( ulint rec_size; dberr_t err; + ut_ad(thr || !(~flags & (BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG))); *big_rec = NULL; block = btr_cur_get_block(cursor); @@ -1427,7 +1433,10 @@ btr_cur_optimistic_insert( page = buf_block_get_frame(block); index = cursor->index; - ut_ad((thr && thr_get_trx(thr)->fake_changes) + const bool fake_changes = (~flags & (BTR_NO_LOCKING_FLAG + | BTR_NO_UNDO_LOG_FLAG)) + && thr_get_trx(thr)->fake_changes; + ut_ad(fake_changes || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); ut_ad(!dict_index_is_online_ddl(index) || dict_index_is_clust(index) @@ -1568,7 +1577,7 @@ fail_err: goto fail_err; } - if (UNIV_UNLIKELY(thr && thr_get_trx(thr)->fake_changes)) { + if (UNIV_UNLIKELY(fake_changes)) { /* skip CHANGE, LOG */ *big_rec = big_rec_vec; return(err); /* == DB_SUCCESS */ @@ -1686,15 +1695,17 @@ btr_cur_pessimistic_insert( cursor stays valid */ ulint** offsets,/*!< out: offsets on *rec */ mem_heap_t** heap, /*!< in/out: pointer to memory heap - that can be emptied, or NULL */ + that can be emptied */ dtuple_t* entry, /*!< in/out: entry to insert */ rec_t** rec, /*!< out: pointer to inserted record if succeed */ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to - be stored externally by the caller, or - NULL */ + be stored externally by the caller */ ulint n_ext, /*!< in: number of externally stored columns */ - que_thr_t* thr, /*!< in: query thread or NULL */ + que_thr_t* thr, /*!< in/out: query thread; can be NULL if + !(~flags + & (BTR_NO_LOCKING_FLAG + | BTR_NO_UNDO_LOG_FLAG)) */ mtr_t* mtr) /*!< in/out: mini-transaction */ { dict_index_t* index = cursor->index; @@ -1706,13 +1717,17 @@ btr_cur_pessimistic_insert( ulint n_reserved = 0; ut_ad(dtuple_check_typed(entry)); + ut_ad(thr || !(~flags & (BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG))); *big_rec = NULL; - ut_ad((thr && thr_get_trx(thr)->fake_changes) || mtr_memo_contains(mtr, + const bool fake_changes = (~flags & (BTR_NO_LOCKING_FLAG + | BTR_NO_UNDO_LOG_FLAG)) + && thr_get_trx(thr)->fake_changes; + ut_ad(fake_changes || mtr_memo_contains(mtr, dict_index_get_lock(btr_cur_get_index(cursor)), MTR_MEMO_X_LOCK)); - ut_ad((thr && thr_get_trx(thr)->fake_changes) || mtr_memo_contains(mtr, btr_cur_get_block(cursor), + ut_ad(fake_changes || mtr_memo_contains(mtr, btr_cur_get_block(cursor), MTR_MEMO_PAGE_X_FIX)); ut_ad(!dict_index_is_online_ddl(index) || dict_index_is_clust(index) @@ -1773,7 +1788,7 @@ btr_cur_pessimistic_insert( } } - if (UNIV_UNLIKELY(thr && thr_get_trx(thr)->fake_changes)) { + if (UNIV_UNLIKELY(fake_changes)) { /* skip CHANGE, LOG */ if (n_reserved > 0) { fil_space_release_free_extents(index->space, @@ -1871,7 +1886,9 @@ btr_cur_upd_lock_and_undo( const rec_t* rec; dberr_t err; - if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) { + ut_ad((thr != NULL) || (flags & BTR_NO_LOCKING_FLAG)); + + if (!(flags & BTR_NO_LOCKING_FLAG) && thr_get_trx(thr)->fake_changes) { /* skip LOCK, UNDO */ return(DB_SUCCESS); } @@ -1906,9 +1923,10 @@ btr_cur_upd_lock_and_undo( /* Append the info about the update in the undo log */ - return(trx_undo_report_row_operation( - flags, TRX_UNDO_MODIFY_OP, thr, - index, NULL, update, + return((flags & BTR_NO_UNDO_LOG_FLAG) + ? DB_SUCCESS + : trx_undo_report_row_operation( + thr, index, NULL, update, cmpl_info, rec, offsets, roll_ptr)); } @@ -2659,12 +2677,12 @@ btr_cur_pessimistic_update( ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */ mem_heap_t** offsets_heap, /*!< in/out: pointer to memory heap - that can be emptied, or NULL */ + that can be emptied */ mem_heap_t* entry_heap, /*!< in/out: memory heap for allocating big_rec and the index tuple */ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to - be stored externally by the caller, or NULL */ + be stored externally by the caller */ const upd_t* update, /*!< in: update vector; this is allowed also contain trx id and roll ptr fields, but the values in update vector have no effect */ @@ -3239,7 +3257,7 @@ btr_cur_del_mark_set_clust_rec( return(err); } - err = trx_undo_report_row_operation(0, TRX_UNDO_MODIFY_OP, thr, + err = trx_undo_report_row_operation(thr, index, NULL, NULL, 0, rec, offsets, &roll_ptr); if (err != DB_SUCCESS) { diff --git a/storage/xtradb/btr/btr0defragment.cc b/storage/xtradb/btr/btr0defragment.cc index 64dc077d582..c2f58a8e1cf 100644 --- a/storage/xtradb/btr/btr0defragment.cc +++ b/storage/xtradb/btr/btr0defragment.cc @@ -154,7 +154,6 @@ btr_defragment_init() (ulonglong) (1000000.0 / srv_defragment_frequency)); mutex_create(btr_defragment_mutex_key, &btr_defragment_mutex, SYNC_ANY_LATCH); - os_thread_create(btr_defragment_thread, NULL, NULL); } /******************************************************************//** @@ -735,14 +734,13 @@ btr_defragment_n_pages( return current_block; } -/******************************************************************//** -Thread that merges consecutive b-tree pages into fewer pages to defragment -the index. */ +/** Whether btr_defragment_thread is active */ +bool btr_defragment_thread_active; + +/** Merge consecutive b-tree pages into fewer pages to defragment indexes */ extern "C" UNIV_INTERN os_thread_ret_t -DECLARE_THREAD(btr_defragment_thread)( -/*==========================================*/ - void* arg) /*!< in: work queue */ +DECLARE_THREAD(btr_defragment_thread)(void*) { btr_pcur_t* pcur; btr_cur_t* cursor; @@ -752,6 +750,8 @@ DECLARE_THREAD(btr_defragment_thread)( buf_block_t* last_block; while (srv_shutdown_state == SRV_SHUTDOWN_NONE) { + ut_ad(btr_defragment_thread_active); + /* If defragmentation is disabled, sleep before checking whether it's enabled. */ if (!srv_defragment) { @@ -825,9 +825,9 @@ DECLARE_THREAD(btr_defragment_thread)( btr_defragment_remove_item(item); } } - btr_defragment_shutdown(); + + btr_defragment_thread_active = false; os_thread_exit(NULL); OS_THREAD_DUMMY_RETURN; } - #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/btr/btr0sea.cc b/storage/xtradb/btr/btr0sea.cc index 68dbcdf1fa7..2f0428747d5 100644 --- a/storage/xtradb/btr/btr0sea.cc +++ b/storage/xtradb/btr/btr0sea.cc @@ -192,7 +192,7 @@ btr_search_sys_create( &btr_search_latch_arr[i], SYNC_SEARCH_SYS); btr_search_sys->hash_tables[i] - = ha_create(hash_size, 0, MEM_HEAP_FOR_BTR_SEARCH, 0); + = ib_create(hash_size, 0, MEM_HEAP_FOR_BTR_SEARCH, 0); #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG btr_search_sys->hash_tables[i]->adaptive = TRUE; diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc index c57dab79ef7..01bec11d2ed 100644 --- a/storage/xtradb/buf/buf0buf.cc +++ b/storage/xtradb/buf/buf0buf.cc @@ -2,7 +2,7 @@ Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. -Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved. +Copyright (c) 2013, 2017, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -65,6 +65,18 @@ Created 11/5/1995 Heikki Tuuri #include "fil0pagecompress.h" #include "ha_prototypes.h" +#ifdef UNIV_LINUX +#include <stdlib.h> +#endif + +#ifdef HAVE_LZO +#include "lzo/lzo1x.h" +#endif + +#ifdef HAVE_SNAPPY +#include "snappy-c.h" +#endif + /** Decrypt a page. @param[in,out] bpage Page control block @param[in,out] space tablespace @@ -74,9 +86,38 @@ bool buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space) MY_ATTRIBUTE((nonnull)); +/********************************************************************//** +Mark a table with the specified space pointed by bpage->space corrupted. +Also remove the bpage from LRU list. +@param[in,out] bpage Block */ +static +void +buf_mark_space_corrupt( + buf_page_t* bpage); + /* prototypes for new functions added to ha_innodb.cc */ trx_t* innobase_get_trx(); +inline void* aligned_malloc(size_t size, size_t align) { + void *result; +#ifdef _MSC_VER + result = _aligned_malloc(size, align); +#else + if(posix_memalign(&result, align, size)) { + result = 0; + } +#endif + return result; +} + +inline void aligned_free(void *ptr) { +#ifdef _MSC_VER + _aligned_free(ptr); +#else + free(ptr); +#endif +} + static inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx) @@ -108,10 +149,6 @@ _increment_page_get_statistics(buf_block_t* block, trx_t* trx) return; } -#ifdef HAVE_LZO -#include "lzo/lzo1x.h" -#endif - /* IMPLEMENTATION OF THE BUFFER POOL ================================= @@ -1510,8 +1547,6 @@ buf_pool_init_instance( buf_pool->chunks = chunk = (buf_chunk_t*) mem_zalloc(sizeof *chunk); - UT_LIST_INIT(buf_pool->free); - if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) { mem_free(chunk); mem_free(buf_pool); @@ -1533,7 +1568,7 @@ buf_pool_init_instance( ut_a(srv_n_page_hash_locks != 0); ut_a(srv_n_page_hash_locks <= MAX_PAGE_HASH_LOCKS); - buf_pool->page_hash = ha_create(2 * buf_pool->curr_size, + buf_pool->page_hash = ib_create(2 * buf_pool->curr_size, srv_n_page_hash_locks, MEM_HEAP_FOR_PAGE_HASH, SYNC_BUF_PAGE_HASH); @@ -1642,20 +1677,14 @@ buf_pool_free_instance( if (buf_pool->tmp_arr) { for(ulint i = 0; i < buf_pool->tmp_arr->n_slots; i++) { buf_tmp_buffer_t* slot = &(buf_pool->tmp_arr->slots[i]); -#ifdef HAVE_LZO - if (slot && slot->lzo_mem) { - ut_free(slot->lzo_mem); - slot->lzo_mem = NULL; - } -#endif - if (slot && slot->crypt_buf_free) { - ut_free(slot->crypt_buf_free); - slot->crypt_buf_free = NULL; + if (slot && slot->crypt_buf) { + aligned_free(slot->crypt_buf); + slot->crypt_buf = NULL; } - if (slot && slot->comp_buf_free) { - ut_free(slot->comp_buf_free); - slot->comp_buf_free = NULL; + if (slot && slot->comp_buf) { + aligned_free(slot->comp_buf); + slot->comp_buf = NULL; } } } @@ -2521,17 +2550,26 @@ buf_zip_decompress( { const byte* frame = block->page.zip.data; ulint size = page_zip_get_size(&block->page.zip); + /* Space is not found if this function is called during IMPORT */ + fil_space_t* space = fil_space_acquire_for_io(block->page.space); + const unsigned key_version = mach_read_from_4(frame + + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + fil_space_crypt_t* crypt_data = space ? space->crypt_data : NULL; + const bool encrypted = crypt_data + && crypt_data->type != CRYPT_SCHEME_UNENCRYPTED + && (!crypt_data->is_default_encryption() + || srv_encrypt_tables); ut_ad(buf_block_get_zip_size(block)); ut_a(buf_block_get_space(block) != 0); if (UNIV_UNLIKELY(check && !page_zip_verify_checksum(frame, size))) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: compressed page checksum mismatch" - " (space %u page %u): stored: %lu, crc32: %lu " - "innodb: %lu, none: %lu\n", + ib_logf(IB_LOG_LEVEL_ERROR, + "Compressed page checksum mismatch" + " for %s [%u:%u]: stored: " ULINTPF ", crc32: " ULINTPF + " innodb: " ULINTPF ", none: " ULINTPF ".", + space ? space->chain.start->name : "N/A", block->page.space, block->page.offset, mach_read_from_4(frame + FIL_PAGE_SPACE_OR_CHKSUM), page_zip_calc_checksum(frame, size, @@ -2540,22 +2578,28 @@ buf_zip_decompress( SRV_CHECKSUM_ALGORITHM_INNODB), page_zip_calc_checksum(frame, size, SRV_CHECKSUM_ALGORITHM_NONE)); - return(FALSE); + goto err_exit; } switch (fil_page_get_type(frame)) { - case FIL_PAGE_INDEX: + case FIL_PAGE_INDEX: { + if (page_zip_decompress(&block->page.zip, block->frame, TRUE)) { + if (space) { + fil_space_release_for_io(space); + } return(TRUE); } - fprintf(stderr, - "InnoDB: unable to decompress space %u page %u\n", + ib_logf(IB_LOG_LEVEL_ERROR, + "Unable to decompress space %s [%u:%u]", + space ? space->chain.start->name : "N/A", block->page.space, block->page.offset); - return(FALSE); + goto err_exit; + } case FIL_PAGE_TYPE_ALLOCATED: case FIL_PAGE_INODE: case FIL_PAGE_IBUF_BITMAP: @@ -2566,14 +2610,36 @@ buf_zip_decompress( /* Copy to uncompressed storage. */ memcpy(block->frame, frame, buf_block_get_zip_size(block)); + + if (space) { + fil_space_release_for_io(space); + } + return(TRUE); } - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: unknown compressed page" - " type %lu\n", - fil_page_get_type(frame)); + ib_logf(IB_LOG_LEVEL_ERROR, + "Unknown compressed page in %s [%u:%u]" + " type %s [" ULINTPF "].", + space ? space->chain.start->name : "N/A", + block->page.space, block->page.offset, + fil_get_page_type_name(fil_page_get_type(frame)), fil_page_get_type(frame)); + +err_exit: + if (encrypted) { + ib_logf(IB_LOG_LEVEL_INFO, + "Row compressed page could be encrypted with key_version %u.", + key_version); + block->page.encrypted = true; + dict_set_encrypted_by_space(block->page.space); + } else { + dict_set_corrupted_by_space(block->page.space); + } + + if (space) { + fil_space_release_for_io(space); + } + return(FALSE); } @@ -3056,9 +3122,9 @@ loop: } ib_logf(IB_LOG_LEVEL_FATAL, "Unable" - " to read tablespace %lu page no" - " %lu into the buffer pool after" - " %lu attempts" + " to read tablespace " ULINTPF " page no " + ULINTPF " into the buffer pool after " + ULINTPF " attempts." " The most probable cause" " of this error may be that the" " table has been corrupted." @@ -3271,12 +3337,21 @@ got_block: /* Decompress the page while not holding any buf_pool or block->mutex. */ - /* Page checksum verification is already done when - the page is read from disk. Hence page checksum - verification is not necessary when decompressing the page. */ { - bool success = buf_zip_decompress(block, FALSE); - ut_a(success); + bool success = buf_zip_decompress(block, TRUE); + + if (!success) { + buf_block_mutex_enter(fix_block); + buf_block_set_io_fix(fix_block, BUF_IO_NONE); + buf_block_mutex_exit(fix_block); + + os_atomic_decrement_ulint(&buf_pool->n_pend_unzip, 1); + rw_lock_x_unlock(&fix_block->lock); + mutex_enter(&buf_pool->LRU_list_mutex); + buf_block_unfix(fix_block); + mutex_exit(&buf_pool->LRU_list_mutex); + return NULL; + } } if (!recv_no_ibuf_operations) { @@ -3374,16 +3449,10 @@ got_block: goto loop; } - fprintf(stderr, - "innodb_change_buffering_debug evict %u %u\n", - (unsigned) space, (unsigned) offset); return(NULL); } if (buf_flush_page_try(buf_pool, fix_block)) { - fprintf(stderr, - "innodb_change_buffering_debug flush %u %u\n", - (unsigned) space, (unsigned) offset); guess = fix_block; goto loop; } @@ -4354,11 +4423,11 @@ buf_page_create( memset(frame + FIL_PAGE_NEXT, 0xff, 4); mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED); - /* Reset to zero the file flush lsn field in the page; if the first - page of an ibdata file is 'created' in this function into the buffer - pool then we lose the original contents of the file flush lsn stamp. - Then InnoDB could in a crash recovery print a big, false, corruption - warning if the stamp contains an lsn bigger than the ib_logfile lsn. */ + /* FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION is only used on the + following pages: + (1) The first page of the InnoDB system tablespace (page 0:0) + (2) FIL_RTREE_SPLIT_SEQ_NUM on R-tree pages + (3) key_version on encrypted pages (not page 0:0) */ memset(frame + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, 0, 8); @@ -4570,6 +4639,7 @@ buf_page_check_corrupt(buf_page_t* bpage, fil_space_t* space) !bpage->encrypted && fil_space_verify_crypt_checksum(dst_frame, zip_size, space, bpage->offset)); + if (!still_encrypted) { /* If traditional checksums match, we assume that page is not anymore encrypted. */ @@ -6176,22 +6246,27 @@ buf_pool_reserve_tmp_slot( buf_pool_mutex_exit(buf_pool); /* Allocate temporary memory for encryption/decryption */ - if (free_slot->crypt_buf_free == NULL) { - free_slot->crypt_buf_free = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2)); - free_slot->crypt_buf = static_cast<byte *>(ut_align(free_slot->crypt_buf_free, UNIV_PAGE_SIZE)); - memset(free_slot->crypt_buf_free, 0, UNIV_PAGE_SIZE *2); + if (free_slot->crypt_buf == NULL) { + free_slot->crypt_buf = static_cast<byte*>(aligned_malloc(UNIV_PAGE_SIZE, UNIV_PAGE_SIZE)); + memset(free_slot->crypt_buf, 0, UNIV_PAGE_SIZE); } /* For page compressed tables allocate temporary memory for compression/decompression */ - if (compressed && free_slot->comp_buf_free == NULL) { - free_slot->comp_buf_free = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2)); - free_slot->comp_buf = static_cast<byte *>(ut_align(free_slot->comp_buf_free, UNIV_PAGE_SIZE)); - memset(free_slot->comp_buf_free, 0, UNIV_PAGE_SIZE *2); -#ifdef HAVE_LZO - free_slot->lzo_mem = static_cast<byte *>(ut_malloc(LZO1X_1_15_MEM_COMPRESS)); - memset(free_slot->lzo_mem, 0, LZO1X_1_15_MEM_COMPRESS); + if (compressed && free_slot->comp_buf == NULL) { + ulint size = UNIV_PAGE_SIZE; + + /* Both snappy and lzo compression methods require that + output buffer used for compression is bigger than input + buffer. Increase the allocated buffer size accordingly. */ +#if HAVE_SNAPPY + size = snappy_max_compressed_length(size); +#endif +#if HAVE_LZO + size += LZO1X_1_15_MEM_COMPRESS; #endif + free_slot->comp_buf = static_cast<byte*>(aligned_malloc(size, UNIV_PAGE_SIZE)); + memset(free_slot->comp_buf, 0, size); } return (free_slot); @@ -6279,8 +6354,7 @@ buf_page_encrypt_before_write( fsp_flags_get_page_compression_level(space->flags), fil_space_get_block_size(space, bpage->offset), encrypted, - &out_len, - IF_LZO(slot->lzo_mem, NULL)); + &out_len); bpage->real_size = out_len; diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc index 1c9646c0bd6..49371f9a6f1 100644 --- a/storage/xtradb/buf/buf0dblwr.cc +++ b/storage/xtradb/buf/buf0dblwr.cc @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -175,13 +175,14 @@ buf_dblwr_init( mem_zalloc(buf_size * sizeof(void*))); } -/****************************************************************//** -Creates the doublewrite buffer to a new InnoDB installation. The header of the -doublewrite buffer is placed on the trx system header page. */ +/** Create the doublewrite buffer if the doublewrite buffer header +is not present in the TRX_SYS page. +@return whether the operation succeeded +@retval true if the doublewrite buffer exists or was created +@retval false if the creation failed (too small first data file) */ UNIV_INTERN -void -buf_dblwr_create(void) -/*==================*/ +bool +buf_dblwr_create() { buf_block_t* block2; buf_block_t* new_block; @@ -194,8 +195,7 @@ buf_dblwr_create(void) if (buf_dblwr) { /* Already inited */ - - return; + return(true); } start_again: @@ -213,39 +213,59 @@ start_again: mtr_commit(&mtr); buf_dblwr_being_created = FALSE; - return; + return(true); } - ib_logf(IB_LOG_LEVEL_INFO, - "Doublewrite buffer not found: creating new"); - if (buf_pool_get_curr_size() < ((TRX_SYS_DOUBLEWRITE_BLOCKS * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE + FSP_EXTENT_SIZE / 2 + 100) * UNIV_PAGE_SIZE)) { - ib_logf(IB_LOG_LEVEL_FATAL, - "Cannot create doublewrite buffer: you must " - "increase your buffer pool size. Cannot continue " - "operation."); + ib_logf(IB_LOG_LEVEL_ERROR, + "Cannot create doublewrite buffer: " + "innodb_buffer_pool_size is too small."); + mtr_commit(&mtr); + return(false); + } else { + fil_space_t* space = fil_space_acquire(TRX_SYS_SPACE); + const bool fail = UT_LIST_GET_FIRST(space->chain)->size + < 3 * FSP_EXTENT_SIZE; + fil_space_release(space); + + if (fail) { + goto too_small; + } } block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG, &mtr); + if (block2 == NULL) { +too_small: + ib_logf(IB_LOG_LEVEL_ERROR, + "Cannot create doublewrite buffer: " + "the first file in innodb_data_file_path" + " must be at least %luM.", + 3 * (FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) >> 20); + mtr_commit(&mtr); + return(false); + } + + ib_logf(IB_LOG_LEVEL_INFO, + "Doublewrite buffer not found: creating new"); + + /* FIXME: After this point, the doublewrite buffer creation + is not atomic. The doublewrite buffer should not exist in + the InnoDB system tablespace file in the first place. + It could be located in separate optional file(s) in a + user-specified location. */ + /* fseg_create acquires a second latch on the page, therefore we must declare it: */ buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK); - if (block2 == NULL) { - ib_logf(IB_LOG_LEVEL_FATAL, - "Cannot create doublewrite buffer: you must " - "increase your tablespace size. " - "Cannot continue operation."); - } - fseg_header = doublewrite + TRX_SYS_DOUBLEWRITE_FSEG; prev_page_no = 0; @@ -351,7 +371,7 @@ recovery, this function loads the pages from double write buffer into memory. */ void buf_dblwr_init_or_load_pages( /*=========================*/ - os_file_t file, + pfs_os_file_t file, char* path, bool load_corrupt_pages) { @@ -482,6 +502,14 @@ buf_dblwr_process() byte* unaligned_read_buf; recv_dblwr_t& recv_dblwr = recv_sys->dblwr; + if (!buf_dblwr) { + return; + } + + ib_logf(IB_LOG_LEVEL_INFO, + "Restoring possible half-written data pages " + "from the doublewrite buffer..."); + unaligned_read_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); read_buf = static_cast<byte*>( diff --git a/storage/xtradb/buf/buf0dump.cc b/storage/xtradb/buf/buf0dump.cc index e728636042b..71b97b770e1 100644 --- a/storage/xtradb/buf/buf0dump.cc +++ b/storage/xtradb/buf/buf0dump.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under @@ -612,6 +612,7 @@ buf_load() if (dump_n == 0) { ut_free(dump); + ut_free(dump_tmp); ut_sprintf_timestamp(now); buf_load_status(STATUS_NOTICE, "Buffer pool(s) load completed at %s " @@ -685,6 +686,7 @@ extern "C" UNIV_INTERN os_thread_ret_t DECLARE_THREAD(buf_dump_thread)(void*) { + my_thread_init(); ut_ad(!srv_read_only_mode); buf_dump_status(STATUS_INFO, "Dumping buffer pool(s) not yet started"); @@ -721,6 +723,7 @@ DECLARE_THREAD(buf_dump_thread)(void*) srv_buf_dump_thread_active = false; + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ os_thread_exit(NULL); diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc index 1f5c3993be7..84eea3bc692 100644 --- a/storage/xtradb/buf/buf0flu.cc +++ b/storage/xtradb/buf/buf0flu.cc @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2017, MariaDB Corporation. Copyright (c) 2013, 2014, Fusion-io This program is free software; you can redistribute it and/or modify it under @@ -62,10 +62,10 @@ is set to TRUE by the page_cleaner thread when it is spawned and is set back to FALSE at shutdown by the page_cleaner as well. Therefore no need to protect it by a mutex. It is only ever read by the thread doing the shutdown */ -UNIV_INTERN ibool buf_page_cleaner_is_active = FALSE; +UNIV_INTERN bool buf_page_cleaner_is_active; /** Flag indicating if the lru_manager is in active state. */ -UNIV_INTERN bool buf_lru_manager_is_active = false; +UNIV_INTERN bool buf_lru_manager_is_active; #ifdef UNIV_PFS_THREAD UNIV_INTERN mysql_pfs_key_t buf_page_cleaner_thread_key; @@ -352,6 +352,7 @@ buf_flush_insert_into_flush_list( buf_block_t* block, /*!< in/out: block which is modified */ lsn_t lsn) /*!< in: oldest modification */ { + ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE); ut_ad(log_flush_order_mutex_own()); ut_ad(mutex_own(&block->mutex)); @@ -410,6 +411,7 @@ buf_flush_insert_sorted_into_flush_list( buf_page_t* prev_b; buf_page_t* b; + ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE); ut_ad(log_flush_order_mutex_own()); ut_ad(mutex_own(&block->mutex)); ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); @@ -715,6 +717,7 @@ buf_flush_write_complete( buf_page_set_io_fix(bpage, BUF_IO_NONE); buf_pool->n_flush[flush_type]--; + ut_ad(buf_pool->n_flush[flush_type] != ULINT_MAX); #ifdef UNIV_MTFLUSH_DEBUG fprintf(stderr, "n pending flush %lu\n", @@ -1099,6 +1102,7 @@ buf_flush_page( } ++buf_pool->n_flush[flush_type]; + ut_ad(buf_pool->n_flush[flush_type] != 0); mutex_exit(&buf_pool->flush_state_mutex); @@ -2264,13 +2268,14 @@ Clears up tail of the LRU lists: * Flush dirty pages at the tail of LRU to the disk The depth to which we scan each buffer pool is controlled by dynamic config parameter innodb_LRU_scan_depth. -@return number of pages flushed */ +@return number of flushed and evicted pages */ UNIV_INTERN ulint buf_flush_LRU_tail(void) /*====================*/ { ulint total_flushed = 0; + ulint total_evicted = 0; ulint start_time = ut_time_ms(); ulint scan_depth[MAX_BUFFER_POOLS]; ulint requested_pages[MAX_BUFFER_POOLS]; @@ -2341,6 +2346,7 @@ buf_flush_LRU_tail(void) limited_scan[i] = (previous_evicted[i] > n.evicted); previous_evicted[i] = n.evicted; + total_evicted += n.evicted; requested_pages[i] += lru_chunk_size; @@ -2381,7 +2387,7 @@ buf_flush_LRU_tail(void) } } - return(total_flushed); + return(total_flushed + total_evicted); } /*********************************************************************//** @@ -2682,6 +2688,23 @@ buf_get_total_free_list_length(void) return result; } +/** Returns the aggregate LRU list length over all buffer pool instances. +@return total LRU list length. */ +MY_ATTRIBUTE((warn_unused_result)) +static +ulint +buf_get_total_LRU_list_length(void) +{ + ulint result = 0; + + for (ulint i = 0; i < srv_buf_pool_instances; i++) { + + result += UT_LIST_GET_LEN(buf_pool_from_array(i)->LRU); + } + + return result; +} + /*********************************************************************//** Adjust the desired page cleaner thread sleep time for LRU flushes. */ MY_ATTRIBUTE((nonnull)) @@ -2694,8 +2717,9 @@ page_cleaner_adapt_lru_sleep_time( ulint lru_n_flushed) /*!< in: number of flushed in previous batch */ { - ulint free_len = buf_get_total_free_list_length(); - ulint max_free_len = srv_LRU_scan_depth * srv_buf_pool_instances; + ulint free_len = buf_get_total_free_list_length(); + ulint max_free_len = ut_min(buf_get_total_LRU_list_length(), + srv_LRU_scan_depth * srv_buf_pool_instances); if (free_len < max_free_len / 100 && lru_n_flushed) { @@ -2707,7 +2731,7 @@ page_cleaner_adapt_lru_sleep_time( /* Free lists filled more than 20% or no pages flushed in previous batch, sleep a bit more */ - *lru_sleep_time += 50; + *lru_sleep_time += 1; if (*lru_sleep_time > srv_cleaner_max_lru_time) *lru_sleep_time = srv_cleaner_max_lru_time; } else if (free_len < max_free_len / 20 && *lru_sleep_time >= 50) { @@ -2754,6 +2778,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); ulint next_loop_time = ut_time_ms() + 1000; ulint n_flushed = 0; ulint last_activity = srv_get_activity_count(); @@ -2774,8 +2799,6 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( os_thread_pf(os_thread_get_curr_id())); #endif /* UNIV_DEBUG_THREAD_CREATION */ - buf_page_cleaner_is_active = TRUE; - while (srv_shutdown_state == SRV_SHUTDOWN_NONE) { ulint page_cleaner_sleep_time; @@ -2884,8 +2907,9 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( /* We have lived our life. Time to die. */ thread_exit: - buf_page_cleaner_is_active = FALSE; + buf_page_cleaner_is_active = false; + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ os_thread_exit(NULL); @@ -2924,8 +2948,6 @@ DECLARE_THREAD(buf_flush_lru_manager_thread)( os_thread_pf(os_thread_get_curr_id())); #endif /* UNIV_DEBUG_THREAD_CREATION */ - buf_lru_manager_is_active = true; - /* On server shutdown, the LRU manager thread runs through cleanup phase to provide free pages for the master and purge threads. */ while (srv_shutdown_state == SRV_SHUTDOWN_NONE diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc index bc46bcab63b..a1cfeb3860f 100644 --- a/storage/xtradb/dict/dict0dict.cc +++ b/storage/xtradb/dict/dict0dict.cc @@ -888,6 +888,12 @@ dict_index_get_nth_col_or_prefix_pos( ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + ut_ad((inc_prefix && !prefix_col_pos) || (!inc_prefix)); + + if (!prefix_col_pos) { + prefix_col_pos = &prefixed_pos_dummy; + } + *prefix_col_pos = ULINT_UNDEFINED; if (!prefix_col_pos) { prefix_col_pos = &prefixed_pos_dummy; diff --git a/storage/xtradb/dict/dict0mem.cc b/storage/xtradb/dict/dict0mem.cc index fa7177c5137..125d7d78a1f 100644 --- a/storage/xtradb/dict/dict0mem.cc +++ b/storage/xtradb/dict/dict0mem.cc @@ -320,8 +320,8 @@ dict_mem_table_col_rename_low( ut_ad(from_len <= NAME_LEN); ut_ad(to_len <= NAME_LEN); - char from[NAME_LEN]; - strncpy(from, s, NAME_LEN); + char from[NAME_LEN + 1]; + strncpy(from, s, NAME_LEN + 1); if (from_len == to_len) { /* The easy case: simply replace the column name in diff --git a/storage/xtradb/dict/dict0stats.cc b/storage/xtradb/dict/dict0stats.cc index d2e9a2618c0..c1463e98ce0 100644 --- a/storage/xtradb/dict/dict0stats.cc +++ b/storage/xtradb/dict/dict0stats.cc @@ -1168,8 +1168,9 @@ dict_stats_analyze_index_level( leaf-level delete marks because delete marks on non-leaf level do not make sense. */ - if (level == 0 && srv_stats_include_delete_marked? 0: - rec_get_deleted_flag( + if (level == 0 + && !srv_stats_include_delete_marked + && rec_get_deleted_flag( rec, page_is_comp(btr_pcur_get_page(&pcur)))) { diff --git a/storage/xtradb/dict/dict0stats_bg.cc b/storage/xtradb/dict/dict0stats_bg.cc index 55d34ff6ae1..ba6fd115551 100644 --- a/storage/xtradb/dict/dict0stats_bg.cc +++ b/storage/xtradb/dict/dict0stats_bg.cc @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. +Copyright (c) 2012, 2017, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -39,12 +39,18 @@ Created Apr 25, 2012 Vasil Dimov /** Minimum time interval between stats recalc for a given table */ #define MIN_RECALC_INTERVAL 10 /* seconds */ -#define SHUTTING_DOWN() (srv_shutdown_state != SRV_SHUTDOWN_NONE) - /** Event to wake up dict_stats_thread on dict_stats_recalc_pool_add() or shutdown. Not protected by any mutex. */ UNIV_INTERN os_event_t dict_stats_event; +/** Variable to initiate shutdown the dict stats thread. Note we don't +use 'srv_shutdown_state' because we want to shutdown dict stats thread +before purge thread. */ +static bool dict_stats_start_shutdown; + +/** Event to wait for shutdown of the dict stats thread */ +static os_event_t dict_stats_shutdown_event; + /** This mutex protects the "recalc_pool" variable. */ static ib_mutex_t recalc_pool_mutex; static ib_mutex_t defrag_pool_mutex; @@ -341,11 +347,11 @@ Must be called before dict_stats_thread() is started. */ UNIV_INTERN void dict_stats_thread_init() -/*====================*/ { ut_a(!srv_read_only_mode); dict_stats_event = os_event_create(); + dict_stats_shutdown_event = os_event_create(); /* The recalc_pool_mutex is acquired from: 1) the background stats gathering thread before any other latch @@ -390,6 +396,9 @@ dict_stats_thread_deinit() os_event_free(dict_stats_event); dict_stats_event = NULL; + os_event_free(dict_stats_shutdown_event); + dict_stats_shutdown_event = NULL; + dict_stats_start_shutdown = false; } /*****************************************************************//** @@ -530,9 +539,10 @@ extern "C" UNIV_INTERN os_thread_ret_t DECLARE_THREAD(dict_stats_thread)(void*) { + my_thread_init(); ut_a(!srv_read_only_mode); - while (!SHUTTING_DOWN()) { + while (!dict_stats_start_shutdown) { /* Wake up periodically even if not signaled. This is because we may lose an event - if the below call to @@ -542,7 +552,7 @@ DECLARE_THREAD(dict_stats_thread)(void*) os_event_wait_time( dict_stats_event, MIN_RECALC_INTERVAL * 1000000); - if (SHUTTING_DOWN()) { + if (dict_stats_start_shutdown) { break; } @@ -556,9 +566,20 @@ DECLARE_THREAD(dict_stats_thread)(void*) srv_dict_stats_thread_active = false; + os_event_set(dict_stats_shutdown_event); + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit instead of return(). */ os_thread_exit(NULL); OS_THREAD_DUMMY_RETURN; } + +/** Shut down the dict_stats_thread. */ +void +dict_stats_shutdown() +{ + dict_stats_start_shutdown = true; + os_event_set(dict_stats_event); + os_event_wait(dict_stats_shutdown_event); +} diff --git a/storage/xtradb/fil/fil0crypt.cc b/storage/xtradb/fil/fil0crypt.cc index e27e93244ae..e73d600d2ca 100644 --- a/storage/xtradb/fil/fil0crypt.cc +++ b/storage/xtradb/fil/fil0crypt.cc @@ -887,7 +887,7 @@ fil_space_decrypt( Calculate post encryption checksum @param[in] zip_size zip_size or 0 @param[in] dst_frame Block where checksum is calculated -@return page checksum or BUF_NO_CHECKSUM_MAGIC +@return page checksum not needed. */ UNIV_INTERN ulint @@ -896,30 +896,13 @@ fil_crypt_calculate_checksum( const byte* dst_frame) { ib_uint32_t checksum = 0; - srv_checksum_algorithm_t algorithm = - static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm); + /* For encrypted tables we use only crc32 and strict_crc32 */ if (zip_size == 0) { - switch (algorithm) { - case SRV_CHECKSUM_ALGORITHM_CRC32: - case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: - checksum = buf_calc_page_crc32(dst_frame); - break; - case SRV_CHECKSUM_ALGORITHM_INNODB: - case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: - checksum = (ib_uint32_t) buf_calc_page_new_checksum( - dst_frame); - break; - case SRV_CHECKSUM_ALGORITHM_NONE: - case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: - checksum = BUF_NO_CHECKSUM_MAGIC; - break; - /* no default so the compiler will emit a warning - * if new enum is added and not handled here */ - } + checksum = buf_calc_page_crc32(dst_frame); } else { checksum = page_zip_calc_checksum(dst_frame, zip_size, - algorithm); + SRV_CHECKSUM_ALGORITHM_CRC32); } return checksum; @@ -953,14 +936,6 @@ fil_space_verify_crypt_checksum( return(false); } - srv_checksum_algorithm_t algorithm = - static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm); - - /* If no checksum is used, can't continue checking. */ - if (algorithm == SRV_CHECKSUM_ALGORITHM_NONE) { - return(true); - } - /* Read stored post encryption checksum. */ ib_uint32_t checksum = mach_read_from_4( page + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION + 4); @@ -1044,7 +1019,6 @@ fil_space_verify_crypt_checksum( checksum1 = mach_read_from_4( page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM); valid = (buf_page_is_checksum_valid_crc32(page,checksum1,checksum2) - || buf_page_is_checksum_valid_none(page,checksum1,checksum2) || buf_page_is_checksum_valid_innodb(page,checksum1, checksum2)); } @@ -1141,6 +1115,36 @@ fil_crypt_needs_rotation( return false; } +/** Read page 0 and possible crypt data from there. +@param[in,out] space Tablespace */ +static inline +void +fil_crypt_read_crypt_data(fil_space_t* space) +{ + if (space->crypt_data || space->size) { + /* The encryption metadata has already been read, or + the tablespace is not encrypted and the file has been + opened already. */ + return; + } + + mtr_t mtr; + mtr_start(&mtr); + ulint zip_size = fsp_flags_get_zip_size(space->flags); + ulint offset = fsp_header_get_crypt_offset(zip_size); + if (buf_block_t* block = buf_page_get(space->id, zip_size, 0, + RW_S_LATCH, &mtr)) { + mutex_enter(&fil_system->mutex); + if (!space->crypt_data) { + space->crypt_data = fil_space_read_crypt_data( + space->id, block->frame, offset); + } + mutex_exit(&fil_system->mutex); + } + + mtr_commit(&mtr); +} + /*********************************************************************** Start encrypting a space @param[in,out] space Tablespace @@ -1151,6 +1155,7 @@ fil_crypt_start_encrypting_space( fil_space_t* space) { bool recheck = false; + mutex_enter(&fil_crypt_threads_mutex); fil_space_crypt_t *crypt_data = space->crypt_data; @@ -1217,8 +1222,6 @@ fil_crypt_start_encrypting_space( byte* frame = buf_block_get_frame(block); crypt_data->type = CRYPT_SCHEME_1; crypt_data->write_page0(frame, &mtr); - - mtr_commit(&mtr); /* record lsn of update */ @@ -1294,10 +1297,10 @@ struct rotate_thread_t { bool should_shutdown() const { switch (srv_shutdown_state) { case SRV_SHUTDOWN_NONE: - case SRV_SHUTDOWN_CLEANUP: return thread_no >= srv_n_fil_crypt_threads; - case SRV_SHUTDOWN_FLUSH_PHASE: + case SRV_SHUTDOWN_CLEANUP: return true; + case SRV_SHUTDOWN_FLUSH_PHASE: case SRV_SHUTDOWN_LAST_PHASE: case SRV_SHUTDOWN_EXIT_THREADS: break; @@ -1646,6 +1649,8 @@ fil_crypt_find_space_to_rotate( } while (!state->should_shutdown() && state->space) { + fil_crypt_read_crypt_data(state->space); + if (fil_crypt_space_needs_rotation(state, key_state, recheck)) { ut_ad(key_state->key_id); /* init state->min_key_version_found before @@ -2340,8 +2345,10 @@ DECLARE_THREAD(fil_crypt_thread)( while (!thr.should_shutdown() && fil_crypt_find_page_to_rotate(&new_state, &thr)) { - /* rotate a (set) of pages */ - fil_crypt_rotate_pages(&new_state, &thr); + if (!thr.space->is_stopping()) { + /* rotate a (set) of pages */ + fil_crypt_rotate_pages(&new_state, &thr); + } /* If space is marked as stopping, release space and stop rotation. */ @@ -2571,10 +2578,10 @@ fil_space_crypt_get_status( memset(status, 0, sizeof(*status)); ut_ad(space->n_pending_ops > 0); - fil_space_crypt_t* crypt_data = space->crypt_data; + fil_crypt_read_crypt_data(const_cast<fil_space_t*>(space)); status->space = space->id; - if (crypt_data != NULL) { + if (fil_space_crypt_t* crypt_data = space->crypt_data) { mutex_enter(&crypt_data->mutex); status->scheme = crypt_data->type; status->keyserver_requests = crypt_data->keyserver_requests; diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index e39be46840c..12048bc479f 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2014, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -166,7 +166,8 @@ UNIV_INTERN extern uint srv_fil_crypt_rotate_key_age; UNIV_INTERN extern ib_mutex_t fil_crypt_threads_mutex; /** Determine if (i) is a user tablespace id or not. */ -# define fil_is_user_tablespace_id(i) ((i) > srv_undo_tablespaces_open) +# define fil_is_user_tablespace_id(i) (i != 0 \ + && !srv_is_undo_tablespace(i)) /** Determine if user has explicitly disabled fsync(). */ #ifndef __WIN__ @@ -621,7 +622,8 @@ fil_node_open_file( size_bytes = os_file_get_size(node->handle); ut_a(size_bytes != (os_offset_t) -1); - node->file_block_size = os_file_get_block_size(node->handle, node->name); + node->file_block_size = os_file_get_block_size( + node->handle, node->name); space->file_block_size = node->file_block_size; #ifdef UNIV_HOTBACKUP @@ -659,12 +661,10 @@ fil_node_open_file( /* Try to read crypt_data from page 0 if it is not yet read. */ - if (!node->space->page_0_crypt_read) { - ulint offset = fsp_header_get_crypt_offset( - fsp_flags_get_zip_size(flags)); - ut_ad(node->space->crypt_data == NULL); + if (!node->space->crypt_data) { + const ulint offset = fsp_header_get_crypt_offset( + fsp_flags_get_zip_size(flags)); node->space->crypt_data = fil_space_read_crypt_data(space_id, page, offset); - node->space->page_0_crypt_read = true; } ut_free(buf2); @@ -731,7 +731,8 @@ add_size: } if (node->file_block_size == 0) { - node->file_block_size = os_file_get_block_size(node->handle, node->name); + node->file_block_size = os_file_get_block_size( + node->handle, node->name); space->file_block_size = node->file_block_size; } @@ -1597,22 +1598,6 @@ fil_space_create( space->magic_n = FIL_SPACE_MAGIC_N; space->crypt_data = crypt_data; - /* In create table we write page 0 so we have already - "read" it and for system tablespaces we have read - crypt data at startup. */ - if (create_table || crypt_data != NULL) { - space->page_0_crypt_read = true; - } - -#ifdef UNIV_DEBUG - ib_logf(IB_LOG_LEVEL_INFO, - "Created tablespace for space %lu name %s key_id %u encryption %d.", - space->id, - space->name, - space->crypt_data ? space->crypt_data->key_id : 0, - space->crypt_data ? space->crypt_data->encryption : 0); -#endif - rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP); HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space); @@ -2063,8 +2048,6 @@ fil_init( fil_system->spaces = hash_create(hash_size); fil_system->name_hash = hash_create(hash_size); - UT_LIST_INIT(fil_system->LRU); - fil_system->max_n_open = max_n_open; fil_space_crypt_init(); @@ -2264,99 +2247,70 @@ fil_set_max_space_id_if_bigger( mutex_exit(&fil_system->mutex); } -/****************************************************************//** -Writes the flushed lsn and the latest archived log number to the page header -of the first page of a data file of the system tablespace (space 0), -which is uncompressed. */ -static MY_ATTRIBUTE((warn_unused_result)) +/** Write the flushed LSN to the page header of the first page in the +system tablespace. +@param[in] lsn flushed LSN +@return DB_SUCCESS or error number */ dberr_t -fil_write_lsn_and_arch_no_to_file( -/*==============================*/ - ulint space, /*!< in: space to write to */ - ulint sum_of_sizes, /*!< in: combined size of previous files - in space, in database pages */ - lsn_t lsn, /*!< in: lsn to write */ - ulint arch_log_no MY_ATTRIBUTE((unused))) - /*!< in: archived log number to write */ +fil_write_flushed_lsn( + lsn_t lsn) { byte* buf1; byte* buf; dberr_t err; - buf1 = static_cast<byte*>(mem_alloc(2 * UNIV_PAGE_SIZE)); + buf1 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE)); - err = fil_read(TRUE, space, 0, sum_of_sizes, 0, - UNIV_PAGE_SIZE, buf, NULL, 0); - if (err == DB_SUCCESS) { - mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, - lsn); - - err = fil_write(TRUE, space, 0, sum_of_sizes, 0, - UNIV_PAGE_SIZE, buf, NULL, 0); - } - - mem_free(buf1); - - return(err); -} - -/****************************************************************//** -Writes the flushed lsn and the latest archived log number to the page -header of the first page of each data file in the system tablespace. -@return DB_SUCCESS or error number */ -UNIV_INTERN -dberr_t -fil_write_flushed_lsn_to_data_files( -/*================================*/ - lsn_t lsn, /*!< in: lsn to write */ - ulint arch_log_no) /*!< in: latest archived log file number */ -{ - fil_space_t* space; - fil_node_t* node; - dberr_t err; - - mutex_enter(&fil_system->mutex); - - for (space = UT_LIST_GET_FIRST(fil_system->space_list); - space != NULL; - space = UT_LIST_GET_NEXT(space_list, space)) { - - /* We only write the lsn to all existing data files which have - been open during the lifetime of the mysqld process; they are - represented by the space objects in the tablespace memory - cache. Note that all data files in the system tablespace 0 - and the UNDO log tablespaces (if separate) are always open. */ - - if (space->purpose == FIL_TABLESPACE - && !fil_is_user_tablespace_id(space->id)) { - ulint sum_of_sizes = 0; + /* Acquire system tablespace */ + fil_space_t* space = fil_space_acquire(0); - for (node = UT_LIST_GET_FIRST(space->chain); - node != NULL; - node = UT_LIST_GET_NEXT(chain, node)) { + /* If tablespace is not encrypted, stamp flush_lsn to + first page of all system tablespace datafiles to avoid + unnecessary error messages on possible downgrade. */ + if (space->crypt_data->min_key_version == 0) { + fil_node_t* node; + ulint sum_of_sizes = 0; - mutex_exit(&fil_system->mutex); - - err = fil_write_lsn_and_arch_no_to_file( - space->id, sum_of_sizes, lsn, - arch_log_no); + for (node = UT_LIST_GET_FIRST(space->chain); + node != NULL; + node = UT_LIST_GET_NEXT(chain, node)) { - if (err != DB_SUCCESS) { + err = fil_read(TRUE, 0, 0, sum_of_sizes, 0, + UNIV_PAGE_SIZE, buf, NULL, 0); - return(err); - } + if (err == DB_SUCCESS) { + mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, + lsn); - mutex_enter(&fil_system->mutex); + err = fil_write(TRUE, 0, 0, sum_of_sizes, 0, + UNIV_PAGE_SIZE, buf, NULL, 0); sum_of_sizes += node->size; } } + } else { + /* When system tablespace is encrypted stamp flush_lsn to + only the first page of the first datafile (rest of pages + are encrypted). */ + err = fil_read(TRUE, 0, 0, 0, 0, + UNIV_PAGE_SIZE, buf, NULL, 0); + + if (err == DB_SUCCESS) { + mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION, + lsn); + + err = fil_write(TRUE, 0, 0, 0, 0, + UNIV_PAGE_SIZE, buf, NULL, 0); + } } - mutex_exit(&fil_system->mutex); + fil_flush_file_spaces(FIL_TABLESPACE); + fil_space_release(space); - return(DB_SUCCESS); + ut_free(buf1); + + return(err); } /** Check the consistency of the first data page of a tablespace @@ -2409,30 +2363,29 @@ fil_check_first_page(const page_t* page, ulint space_id, ulint flags) return("inconsistent data in space header"); } -/*******************************************************************//** -Reads the flushed lsn, arch no, space_id and tablespace flag fields from -the first page of a data file at database startup. +/** Reads the flushed lsn, arch no, space_id and tablespace flag fields from +the first page of a first data file at database startup. +@param[in] data_file open data file +@param[in] one_read_only true if first datafile is already + read +@param[out] flags FSP_SPACE_FLAGS +@param[out] space_id tablepspace ID +@param[out] flushed_lsn flushed lsn value +@param[out] crypt_data encryption crypt data @retval NULL on success, or if innodb_force_recovery is set @return pointer to an error message string */ UNIV_INTERN const char* fil_read_first_page( -/*================*/ - os_file_t data_file, /*!< in: open data file */ - ibool one_read_already, /*!< in: TRUE if min and max - parameters below already - contain sensible data */ - ulint* flags, /*!< out: FSP_SPACE_FLAGS */ - ulint* space_id, /*!< out: tablespace ID */ - lsn_t* min_flushed_lsn, /*!< out: min of flushed - lsn values in data files */ - lsn_t* max_flushed_lsn, /*!< out: max of flushed - lsn values in data files */ - fil_space_crypt_t** crypt_data) /*< out: crypt data */ + pfs_os_file_t data_file, + ibool one_read_already, + ulint* flags, + ulint* space_id, + lsn_t* flushed_lsn, + fil_space_crypt_t** crypt_data) { byte* buf; byte* page; - lsn_t flushed_lsn; const char* check_msg = NULL; fil_space_crypt_t* cdata; @@ -2449,6 +2402,7 @@ fil_read_first_page( return "File size is less than minimum"; } } + buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); /* Align the memory for a possible read from a raw device */ @@ -2467,6 +2421,11 @@ fil_read_first_page( *space_id = fsp_header_get_space_id(page); *flags = fsp_header_get_flags(page); + if (flushed_lsn) { + *flushed_lsn = mach_read_from_8(page + + FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + } + if (!fsp_flags_is_valid(*flags)) { ulint cflags = fsp_flags_convert_from_101(*flags); if (cflags == ULINT_UNDEFINED) { @@ -2479,37 +2438,36 @@ fil_read_first_page( } } - if (!(IS_XTRABACKUP() && srv_backup_mode)) { - check_msg = fil_check_first_page(page, *space_id, *flags); + if (!(IS_XTRABACKUP() && srv_backup_mode)) { + check_msg = fil_check_first_page(page, *space_id, *flags); } - } - flushed_lsn = mach_read_from_8(page + - FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION); + /* Possible encryption crypt data is also stored only to first page + of the first datafile. */ + const ulint offset = fsp_header_get_crypt_offset( + fsp_flags_get_zip_size(*flags)); - ulint space = fsp_header_get_space_id(page); - ulint offset = fsp_header_get_crypt_offset( - fsp_flags_get_zip_size(*flags)); + cdata = fil_space_read_crypt_data(*space_id, page, offset); - cdata = fil_space_read_crypt_data(space, page, offset); - - if (crypt_data) { - *crypt_data = cdata; - } - - /* If file space is encrypted we need to have at least some - encryption service available where to get keys */ - if (cdata && cdata->should_encrypt()) { + if (crypt_data) { + *crypt_data = cdata; + } - if (!encryption_key_id_exists(cdata->key_id)) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Tablespace id %ld is encrypted but encryption service" - " or used key_id %u is not available. Can't continue opening tablespace.", - space, cdata->key_id); + /* If file space is encrypted we need to have at least some + encryption service available where to get keys */ + if (cdata && cdata->should_encrypt()) { - return ("table encrypted but encryption service not available."); + if (!encryption_key_id_exists(cdata->key_id)) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Tablespace id " ULINTPF + " is encrypted but encryption service" + " or used key_id %u is not available. " + "Can't continue opening tablespace.", + *space_id, cdata->key_id); + return ("table encrypted but encryption service not available."); + } } } @@ -2519,20 +2477,6 @@ fil_read_first_page( return(check_msg); } - if (!one_read_already) { - *min_flushed_lsn = flushed_lsn; - *max_flushed_lsn = flushed_lsn; - - return(NULL); - } - - if (*min_flushed_lsn > flushed_lsn) { - *min_flushed_lsn = flushed_lsn; - } - if (*max_flushed_lsn < flushed_lsn) { - *max_flushed_lsn = flushed_lsn; - } - return(NULL); } @@ -2791,14 +2735,12 @@ fil_op_log_parse_or_replay( } else if (log_flags & MLOG_FILE_FLAG_TEMP) { /* Temporary table, do nothing */ } else { - const char* path = NULL; - /* Create the database directory for name, if it does not exist yet */ fil_create_directory_for_tablename(name); if (fil_create_new_single_table_tablespace( - space_id, name, path, flags, + space_id, name, NULL, flags, DICT_TF2_USE_TABLESPACE, FIL_IBD_FILE_INITIAL_SIZE, FIL_ENCRYPTION_DEFAULT, @@ -3744,7 +3686,7 @@ fil_open_linked_file( /*===============*/ const char* tablename, /*!< in: database/tablename */ char** remote_filepath,/*!< out: remote filepath */ - os_file_t* remote_file, /*!< out: remote file handle */ + pfs_os_file_t* remote_file, /*!< out: remote file handle */ ulint atomic_writes) /*!< in: atomic writes table option value */ { @@ -3807,7 +3749,8 @@ fil_create_new_single_table_tablespace( fil_encryption_t mode, /*!< in: encryption mode */ ulint key_id) /*!< in: encryption key_id */ { - os_file_t file; + pfs_os_file_t file; + ibool ret; dberr_t err; byte* buf2; @@ -4250,6 +4193,7 @@ fsp_flags_try_adjust(ulint space_id, ulint flags) flags, MLOG_4BYTES, &mtr); } } + mtr_commit(&mtr); } @@ -4377,6 +4321,7 @@ fil_open_single_table_tablespace( def.file = os_file_create_simple_no_error_handling( innodb_file_data_key, def.filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &def.success, atomic_writes); + if (def.success) { tablespaces_found++; } @@ -4391,8 +4336,8 @@ fil_open_single_table_tablespace( /* Read the first page of the datadir tablespace, if found. */ if (def.success) { def.check_msg = fil_read_first_page( - def.file, FALSE, &def.flags, &def.id, - &def.lsn, &def.lsn, &def.crypt_data); + def.file, false, &def.flags, &def.id, + NULL, &def.crypt_data); if (table) { table->crypt_data = def.crypt_data; @@ -4401,6 +4346,7 @@ fil_open_single_table_tablespace( def.valid = !def.check_msg && def.id == id && fsp_flags_match(flags, def.flags); + if (def.valid) { valid_tablespaces_found++; } else { @@ -4414,8 +4360,8 @@ fil_open_single_table_tablespace( /* Read the first page of the remote tablespace */ if (remote.success) { remote.check_msg = fil_read_first_page( - remote.file, FALSE, &remote.flags, &remote.id, - &remote.lsn, &remote.lsn, &remote.crypt_data); + remote.file, false, &remote.flags, &remote.id, + NULL, &remote.crypt_data); if (table) { table->crypt_data = remote.crypt_data; @@ -4425,6 +4371,7 @@ fil_open_single_table_tablespace( /* Validate this single-table-tablespace with SYS_TABLES. */ remote.valid = !remote.check_msg && remote.id == id && fsp_flags_match(flags, remote.flags); + if (remote.valid) { valid_tablespaces_found++; } else { @@ -4439,8 +4386,8 @@ fil_open_single_table_tablespace( /* Read the first page of the datadir tablespace, if found. */ if (dict.success) { dict.check_msg = fil_read_first_page( - dict.file, FALSE, &dict.flags, &dict.id, - &dict.lsn, &dict.lsn, &dict.crypt_data); + dict.file, false, &dict.flags, &dict.id, + NULL, &dict.crypt_data); if (table) { table->crypt_data = dict.crypt_data; @@ -4472,14 +4419,16 @@ fil_open_single_table_tablespace( "See " REFMAN "innodb-troubleshooting-datadict.html " "for how to resolve the issue.", tablename); + if (IS_XTRABACKUP() && fix_dict) { ib_logf(IB_LOG_LEVEL_WARN, - "It will be removed from the data dictionary."); + "It will be removed from the data dictionary."); if (purge_sys) { fil_remove_invalid_table_from_data_dict(tablename); } } + err = DB_CORRUPTION; goto cleanup_and_exit; @@ -4491,26 +4440,32 @@ fil_open_single_table_tablespace( ib_logf(IB_LOG_LEVEL_ERROR, "A tablespace for %s has been found in " "multiple places;", tablename); + if (def.success) { ib_logf(IB_LOG_LEVEL_ERROR, - "Default location; %s, LSN=" LSN_PF - ", Space ID=%lu, Flags=%lu", - def.filepath, def.lsn, - (ulong) def.id, (ulong) def.flags); + "Default location; %s" + ", Space ID=" ULINTPF " , Flags=" ULINTPF " .", + def.filepath, + def.id, + def.flags); } + if (remote.success) { ib_logf(IB_LOG_LEVEL_ERROR, - "Remote location; %s, LSN=" LSN_PF - ", Space ID=%lu, Flags=%lu", - remote.filepath, remote.lsn, - (ulong) remote.id, (ulong) remote.flags); + "Remote location; %s" + ", Space ID=" ULINTPF " , Flags=" ULINTPF " .", + remote.filepath, + remote.id, + remote.flags); } + if (dict.success) { ib_logf(IB_LOG_LEVEL_ERROR, - "Dictionary location; %s, LSN=" LSN_PF - ", Space ID=%lu, Flags=%lu", - dict.filepath, dict.lsn, - (ulong) dict.id, (ulong) dict.flags); + "Dictionary location; %s" + ", Space ID=" ULINTPF " , Flags=" ULINTPF " .", + dict.filepath, + dict.id, + dict.flags); } /* Force-recovery will allow some tablespaces to be @@ -4543,6 +4498,7 @@ fil_open_single_table_tablespace( os_file_close(def.file); tablespaces_found--; } + if (dict.success && !dict.valid) { dict.success = false; os_file_close(dict.file); @@ -4658,7 +4614,17 @@ cleanup_and_exit: mem_free(def.filepath); - if (err == DB_SUCCESS && !srv_read_only_mode) { + /* We need to check fsp flags when no errors has happened and + server was not started on read only mode and tablespace validation + was requested or flags contain other table options except + low order bits to FSP_FLAGS_POS_PAGE_SSIZE position. + Note that flag comparison is pessimistic. Adjust is required + only when flags contain buggy MariaDB 10.1.0 - + MariaDB 10.1.20 flags. */ + if (err == DB_SUCCESS + && !srv_read_only_mode + && (validate + || flags >= (1U << FSP_FLAGS_POS_PAGE_SSIZE))) { fsp_flags_try_adjust(id, flags & ~FSP_FLAGS_MEM_MASK); } @@ -4895,8 +4861,8 @@ fil_validate_single_table_tablespace( check_first_page: fsp->success = TRUE; if (const char* check_msg = fil_read_first_page( - fsp->file, FALSE, &fsp->flags, &fsp->id, - &fsp->lsn, &fsp->lsn, &fsp->crypt_data)) { + fsp->file, false, &fsp->flags, &fsp->id, + NULL, &fsp->crypt_data)) { ib_logf(IB_LOG_LEVEL_ERROR, "%s in tablespace %s (table %s)", check_msg, fsp->filepath, tablename); @@ -4909,6 +4875,7 @@ check_first_page: in Xtrabackup, this does not work.*/ return; } + if (!restore_attempted) { if (!fil_user_tablespace_find_space_id(fsp)) { return; @@ -5152,11 +5119,11 @@ will_not_choose: if (def.success && remote.success) { ib_logf(IB_LOG_LEVEL_ERROR, "Tablespaces for %s have been found in two places;\n" - "Location 1: SpaceID: %lu LSN: %lu File: %s\n" - "Location 2: SpaceID: %lu LSN: %lu File: %s\n" + "Location 1: SpaceID: " ULINTPF " File: %s\n" + "Location 2: SpaceID: " ULINTPF " File: %s\n" "You must delete one of them.", - tablename, (ulong) def.id, (ulong) def.lsn, - def.filepath, (ulong) remote.id, (ulong) remote.lsn, + tablename, def.id, + def.filepath, remote.id, remote.filepath); def.success = FALSE; @@ -6141,19 +6108,19 @@ fil_report_invalid_page_access( ulint len, /*!< in: I/O length */ ulint type) /*!< in: I/O type */ { - fprintf(stderr, - "InnoDB: Error: trying to access page number %lu" - " in space %lu,\n" - "InnoDB: space name %s,\n" - "InnoDB: which is outside the tablespace bounds.\n" - "InnoDB: Byte offset %lu, len %lu, i/o type %lu.\n" - "InnoDB: If you get this error at mysqld startup," - " please check that\n" - "InnoDB: your my.cnf matches the ibdata files" - " that you have in the\n" - "InnoDB: MySQL server.\n", - (ulong) block_offset, (ulong) space_id, space_name, - (ulong) byte_offset, (ulong) len, (ulong) type); + ib_logf(IB_LOG_LEVEL_FATAL, + "Trying to access page number " ULINTPF + " in space " ULINTPF + " space name %s," + " which is outside the tablespace bounds." + " Byte offset " ULINTPF ", len " ULINTPF + " i/o type " ULINTPF ".%s", + block_offset, space_id, space_name, + byte_offset, len, type, + space_id == 0 && !srv_was_started + ? "Please check that the configuration matches" + " the InnoDB system tablespace location (ibdata files)" + : ""); } /********************************************************************//** @@ -6373,11 +6340,10 @@ _fil_io( mutex_exit(&fil_system->mutex); return(DB_ERROR); } + fil_report_invalid_page_access( block_offset, space_id, space->name, byte_offset, len, type); - - ut_error; } /* Open file if closed */ @@ -6389,10 +6355,11 @@ _fil_io( ib_logf(IB_LOG_LEVEL_ERROR, "Trying to do i/o to a tablespace which " "exists without .ibd data file. " - "i/o type %lu, space id %lu, page no %lu, " - "i/o length %lu bytes", - (ulong) type, (ulong) space_id, - (ulong) block_offset, (ulong) len); + "i/o type " ULINTPF ", space id " + ULINTPF ", page no " ULINTPF ", " + "i/o length " ULINTPF " bytes", + type, space_id, + block_offset, len); return(DB_TABLESPACE_DELETED); } @@ -6412,8 +6379,6 @@ _fil_io( fil_report_invalid_page_access( block_offset, space_id, space->name, byte_offset, len, type); - - ut_error; } /* Now we have made the changes in the data structures of fil_system */ @@ -6886,7 +6851,7 @@ fil_buf_block_init( } struct fil_iterator_t { - os_file_t file; /*!< File handle */ + pfs_os_file_t file; /*!< File handle */ const char* filepath; /*!< File path name */ os_offset_t start; /*!< From where to start */ os_offset_t end; /*!< Where to stop */ @@ -6929,15 +6894,15 @@ fil_iterate( /* TODO: For compressed tables we do a lot of useless copying for non-index pages. Unfortunately, it is required by buf_zip_decompress() */ + const bool row_compressed = callback.get_zip_size() > 0; for (offset = iter.start; offset < iter.end; offset += n_bytes) { byte* io_buffer = iter.io_buffer; - bool row_compressed = false; block->frame = io_buffer; - if (callback.get_zip_size() > 0) { + if (row_compressed) { page_zip_des_init(&block->page.zip); page_zip_set_size(&block->page.zip, iter.page_size); block->page.zip.data = block->frame + UNIV_PAGE_SIZE; @@ -6946,9 +6911,6 @@ fil_iterate( /* Zip IO is done in the compressed page buffer. */ io_buffer = block->page.zip.data; - row_compressed = true; - } else { - io_buffer = iter.io_buffer; } /* We have to read the exact number of bytes. Otherwise the @@ -6961,16 +6923,12 @@ fil_iterate( ut_ad(n_bytes > 0); ut_ad(!(n_bytes % iter.page_size)); - byte* readptr = io_buffer; - byte* writeptr = io_buffer; - bool encrypted = false; - + const bool encrypted = iter.crypt_data != NULL + && iter.crypt_data->should_encrypt(); /* Use additional crypt io buffer if tablespace is encrypted */ - if (iter.crypt_data != NULL && iter.crypt_data->should_encrypt()) { - encrypted = true; - readptr = iter.crypt_io_buffer; - writeptr = iter.crypt_io_buffer; - } + byte* const readptr = encrypted + ? iter.crypt_io_buffer : io_buffer; + byte* const writeptr = readptr; if (!os_file_read(iter.file, readptr, offset, (ulint) n_bytes)) { @@ -6993,8 +6951,9 @@ fil_iterate( ulint page_type = mach_read_from_2(src+FIL_PAGE_TYPE); - bool page_compressed = (page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED || - page_type == FIL_PAGE_PAGE_COMPRESSED); + const bool page_compressed + = page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED + || page_type == FIL_PAGE_PAGE_COMPRESSED; /* If tablespace is encrypted, we need to decrypt the page. Note that tablespaces are not in @@ -7096,8 +7055,7 @@ fil_iterate( 0,/* FIXME: compression level */ 512,/* FIXME: use proper block size */ encrypted, - &len, - NULL); + &len); updated = true; } @@ -7165,7 +7123,7 @@ fil_tablespace_iterate( PageCallback& callback) { dberr_t err; - os_file_t file; + pfs_os_file_t file; char* filepath; ut_a(n_io_buffers > 0); diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc index 8b2449983df..2b6ae95640f 100644 --- a/storage/xtradb/fil/fil0pagecompress.cc +++ b/storage/xtradb/fil/fil0pagecompress.cc @@ -99,17 +99,16 @@ fil_compress_page( ulint level, /* in: compression level */ ulint block_size, /*!< in: block size */ bool encrypted, /*!< in: is page also encrypted */ - ulint* out_len, /*!< out: actual length of compressed + ulint* out_len) /*!< out: actual length of compressed page */ - byte* lzo_mem) /*!< in: temporal memory used by LZO */ { int err = Z_OK; int comp_level = level; ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE; - ulint write_size=0; + ulint write_size = 0; /* Cache to avoid change during function execution */ ulint comp_method = innodb_compression_algorithm; - bool allocated=false; + bool allocated = false; /* page_compression does not apply to tables or tablespaces that use ROW_FORMAT=COMPRESSED */ @@ -121,13 +120,23 @@ fil_compress_page( if (!out_buf) { allocated = true; - out_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE)); -#ifdef HAVE_LZO + ulint size = UNIV_PAGE_SIZE; + + /* Both snappy and lzo compression methods require that + output buffer used for compression is bigger than input + buffer. Increase the allocated buffer size accordingly. */ +#if HAVE_SNAPPY + if (comp_method == PAGE_SNAPPY_ALGORITHM) { + size = snappy_max_compressed_length(size); + } +#endif +#if HAVE_LZO if (comp_method == PAGE_LZO_ALGORITHM) { - lzo_mem = static_cast<byte *>(ut_malloc(LZO1X_1_15_MEM_COMPRESS)); - memset(lzo_mem, 0, LZO1X_1_15_MEM_COMPRESS); + size += LZO1X_1_15_MEM_COMPRESS; } #endif + + out_buf = static_cast<byte *>(ut_malloc(size)); } ut_ad(buf); @@ -163,8 +172,14 @@ fil_compress_page( switch(comp_method) { #ifdef HAVE_LZ4 case PAGE_LZ4_ALGORITHM: + +#ifdef HAVE_LZ4_COMPRESS_DEFAULT + err = LZ4_compress_default((const char *)buf, + (char *)out_buf+header_len, len, write_size); +#else err = LZ4_compress_limitedOutput((const char *)buf, (char *)out_buf+header_len, len, write_size); +#endif /* HAVE_LZ4_COMPRESS_DEFAULT */ write_size = err; if (err == 0) { @@ -192,7 +207,7 @@ fil_compress_page( #ifdef HAVE_LZO case PAGE_LZO_ALGORITHM: err = lzo1x_1_15_compress( - buf, len, out_buf+header_len, &write_size, lzo_mem); + buf, len, out_buf+header_len, &write_size, out_buf+UNIV_PAGE_SIZE); if (err != LZO_E_OK || write_size > UNIV_PAGE_SIZE-header_len) { if (space && !space->printed_compression_failure) { @@ -283,6 +298,7 @@ fil_compress_page( case PAGE_SNAPPY_ALGORITHM: { snappy_status cstatus; + write_size = snappy_max_compressed_length(UNIV_PAGE_SIZE); cstatus = snappy_compress( (const char *)buf, @@ -438,11 +454,6 @@ fil_compress_page( err_exit: if (allocated) { ut_free(out_buf); -#ifdef HAVE_LZO - if (comp_method == PAGE_LZO_ALGORITHM) { - ut_free(lzo_mem); - } -#endif } return (buf); @@ -504,7 +515,7 @@ fil_decompress_page( ptype != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED)) { ib_logf(IB_LOG_LEVEL_ERROR, "Corruption: We try to uncompress corrupted page" - " CRC %lu type %lu len %lu.", + " CRC " ULINTPF " type " ULINTPF " len " ULINTPF ".", mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM), mach_read_from_2(buf+FIL_PAGE_TYPE), len); @@ -528,7 +539,7 @@ fil_decompress_page( if (actual_size == 0 || actual_size > UNIV_PAGE_SIZE) { ib_logf(IB_LOG_LEVEL_ERROR, "Corruption: We try to uncompress corrupted page" - " actual size %lu compression %s.", + " actual size " ULINTPF " compression %s.", actual_size, fil_get_compression_alg_name(compression_alg)); fflush(stderr); if (return_error) { @@ -543,12 +554,9 @@ fil_decompress_page( *write_size = actual_size; } -#ifdef UNIV_PAGECOMPRESS_DEBUG - ib_logf(IB_LOG_LEVEL_INFO, - "Preparing for decompress for len %lu\n", - actual_size); -#endif /* UNIV_PAGECOMPRESS_DEBUG */ - + DBUG_PRINT("compress", + ("Preparing for decompress for len " ULINTPF ".", + actual_size)); switch(compression_alg) { case PAGE_ZLIB_ALGORITHM: @@ -560,7 +568,7 @@ fil_decompress_page( ib_logf(IB_LOG_LEVEL_ERROR, "Corruption: Page is marked as compressed" " but uncompress failed with error %d " - " size %lu len %lu.", + " size " ULINTPF " len " ULINTPF ".", err, actual_size, len); fflush(stderr); @@ -579,9 +587,10 @@ fil_decompress_page( if (err != (int)actual_size) { ib_logf(IB_LOG_LEVEL_ERROR, "Corruption: Page is marked as compressed" - " but decompression read only %d bytes " - " size %lu len %lu.", + " but uncompress failed with error %d " + " size " ULINTPF " len " ULINTPF ".", err, actual_size, len); + fflush(stderr); if (return_error) { @@ -593,16 +602,17 @@ fil_decompress_page( #endif /* HAVE_LZ4 */ #ifdef HAVE_LZO case PAGE_LZO_ALGORITHM: { - ulint olen=0; + ulint olen = 0; err = lzo1x_decompress((const unsigned char *)buf+header_len, actual_size,(unsigned char *)in_buf, &olen, NULL); if (err != LZO_E_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) { ib_logf(IB_LOG_LEVEL_ERROR, "Corruption: Page is marked as compressed" - " but decompression read only %ld bytes" - " size %lu len %lu.", - olen, actual_size, len); + " but uncompress failed with error %d " + " size " ULINTPF " len " ULINTPF ".", + err, actual_size, len); + fflush(stderr); if (return_error) { @@ -637,7 +647,7 @@ fil_decompress_page( ib_logf(IB_LOG_LEVEL_ERROR, "Corruption: Page is marked as compressed" " but decompression read only %ld bytes" - " size %lu len %lu.", + " size " ULINTPF "len " ULINTPF ".", dst_pos, actual_size, len); fflush(stderr); @@ -666,7 +676,7 @@ fil_decompress_page( ib_logf(IB_LOG_LEVEL_ERROR, "Corruption: Page is marked as compressed" " but decompression read only %du bytes" - " size %lu len %lu err %d.", + " size " ULINTPF " len " ULINTPF " err %d.", dst_pos, actual_size, len, err); fflush(stderr); @@ -682,7 +692,7 @@ fil_decompress_page( case PAGE_SNAPPY_ALGORITHM: { snappy_status cstatus; - ulint olen = 0; + ulint olen = UNIV_PAGE_SIZE; cstatus = snappy_uncompress( (const char *)(buf+header_len), @@ -690,11 +700,11 @@ fil_decompress_page( (char *)in_buf, (size_t*)&olen); - if (cstatus != SNAPPY_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) { + if (cstatus != SNAPPY_OK || olen != UNIV_PAGE_SIZE) { ib_logf(IB_LOG_LEVEL_ERROR, "Corruption: Page is marked as compressed" - " but decompression read only %lu bytes" - " size %lu len %lu err %d.", + " but decompression read only " ULINTPF " bytes" + " size " ULINTPF " len " ULINTPF " err %d.", olen, actual_size, len, (int)cstatus); fflush(stderr); @@ -703,6 +713,7 @@ fil_decompress_page( } ut_error; } + break; } #endif /* HAVE_SNAPPY */ @@ -728,8 +739,7 @@ fil_decompress_page( memcpy(buf, in_buf, len); error_return: - // Need to free temporal buffer if no buffer was given - if (page_buf == NULL) { + if (page_buf != in_buf) { ut_free(in_buf); } } diff --git a/storage/xtradb/fsp/fsp0fsp.cc b/storage/xtradb/fsp/fsp0fsp.cc index 934824c6462..df8c6ffe222 100644 --- a/storage/xtradb/fsp/fsp0fsp.cc +++ b/storage/xtradb/fsp/fsp0fsp.cc @@ -673,16 +673,13 @@ fsp_header_init_fields( } #ifndef UNIV_HOTBACKUP -/**********************************************************************//** -Initializes the space header of a new created space and creates also the -insert buffer tree root if space == 0. */ +/** Initialize a tablespace header. +@param[in] space_id space id +@param[in] size current size in blocks +@param[in,out] mtr mini-transaction */ UNIV_INTERN void -fsp_header_init( -/*============*/ - ulint space_id, /*!< in: space id */ - ulint size, /*!< in: current size in blocks */ - mtr_t* mtr) /*!< in/out: mini-transaction */ +fsp_header_init(ulint space_id, ulint size, mtr_t* mtr) { fsp_header_t* header; buf_block_t* block; @@ -725,14 +722,8 @@ fsp_header_init( flst_init(header + FSP_SEG_INODES_FREE, mtr); mlog_write_ull(header + FSP_SEG_ID, 1, mtr); - if (space_id == 0) { - fsp_fill_free_list(FALSE, space_id, header, mtr); - btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, - 0, 0, DICT_IBUF_ID_MIN + space_id, - dict_ind_redundant, mtr); - } else { - fsp_fill_free_list(TRUE, space_id, header, mtr); - } + + fsp_fill_free_list(space_id != TRX_SYS_SPACE, space_id, header, mtr); fil_space_t* space = fil_space_acquire(space_id); ut_ad(space); @@ -2075,7 +2066,6 @@ fseg_create_general( inode = fsp_alloc_seg_inode(space_header, mtr); if (inode == NULL) { - goto funct_exit; } @@ -2750,7 +2740,6 @@ fsp_reserve_free_extents( ibool success; ulint n_pages_added; size_t total_reserved = 0; - ulint rounds = 0; ut_ad(mtr); *n_reserved = n_ext; @@ -2829,17 +2818,7 @@ try_to_extend: success = fsp_try_extend_data_file(&n_pages_added, space, space_header, mtr); if (success && n_pages_added > 0) { - - rounds++; total_reserved += n_pages_added; - - if (rounds > 50) { - ib_logf(IB_LOG_LEVEL_INFO, - "Space id %lu trying to reserve %lu extents actually reserved %lu " - " reserve %lu free %lu size %lu rounds %lu total_reserved %llu", - space, n_ext, n_pages_added, reserve, n_free, size, rounds, (ullint) total_reserved); - } - goto try_again; } @@ -4160,20 +4139,8 @@ ulint fsp_header_get_crypt_offset( const ulint zip_size) { - ulint pageno = 0; - /* compute first page_no that will have xdes stored on page != 0*/ - for (ulint i = 0; - (pageno = xdes_calc_descriptor_page(zip_size, i)) == 0; ) - i++; - - /* use pageno prior to this...i.e last page on page 0 */ - ut_ad(pageno > 0); - pageno--; - - ulint iv_offset = XDES_ARR_OFFSET + - XDES_SIZE * (1 + xdes_calc_descriptor_index(zip_size, pageno)); - - return FSP_HEADER_OFFSET + iv_offset; + return (FSP_HEADER_OFFSET + (XDES_ARR_OFFSET + XDES_SIZE * + (zip_size ? zip_size : UNIV_PAGE_SIZE) / FSP_EXTENT_SIZE)); } /**********************************************************************//** diff --git a/storage/xtradb/fts/fts0que.cc b/storage/xtradb/fts/fts0que.cc index 2e335c1c255..f24973e26fb 100644 --- a/storage/xtradb/fts/fts0que.cc +++ b/storage/xtradb/fts/fts0que.cc @@ -953,6 +953,18 @@ fts_query_free_doc_ids( query->total_size -= SIZEOF_RBT_CREATE; } +/** +Free the query intersection +@param[in] query query instance */ +static +void +fts_query_free_intersection( + fts_query_t* query) +{ + fts_query_free_doc_ids(query, query->intersection); + query->intersection = NULL; +} + /*******************************************************************//** Add the word to the documents "list" of matching words from the query. We make a copy of the word from the query heap. */ @@ -1311,6 +1323,7 @@ fts_query_intersect( /* error is passed by 'query->error' */ if (query->error != DB_SUCCESS) { ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT); + fts_query_free_intersection(query); return(query->error); } @@ -1339,6 +1352,8 @@ fts_query_intersect( ut_a(!query->multi_exist || (query->multi_exist && rbt_size(query->doc_ids) <= n_doc_ids)); + } else if (query->intersection != NULL) { + fts_query_free_intersection(query); } } @@ -1557,6 +1572,11 @@ fts_merge_doc_ids( query, ranking->doc_id, ranking->rank); if (query->error != DB_SUCCESS) { + if (query->intersection != NULL) + { + ut_a(query->oper == FTS_EXIST); + fts_query_free_intersection(query); + } DBUG_RETURN(query->error); } diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 49b0fedb3b4..28f5acb2dcc 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2000, 2016, Oracle and/or its affiliates. +Copyright (c) 2000, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2013, 2017, MariaDB Corporation. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. @@ -876,17 +876,31 @@ innobase_purge_changed_page_bitmaps( /*================================*/ ulonglong lsn) __attribute__((unused)); /*!< in: LSN to purge files up to */ +/** Empty free list algorithm. +Checks if buffer pool is big enough to enable backoff algorithm. +InnoDB empty free list algorithm backoff requires free pages +from LRU for the best performance. +buf_LRU_buf_pool_running_out cancels query if 1/4 of +buffer pool belongs to LRU or freelist. +At the same time buf_flush_LRU_list_batch +keeps up to BUF_LRU_MIN_LEN in LRU. +In order to avoid deadlock baclkoff requires buffer pool +to be at least 4*BUF_LRU_MIN_LEN, +but flush peformance is bad because of trashing +and additional BUF_LRU_MIN_LEN pages are requested. +@param[in] algorithm desired algorithm from srv_empty_free_list_t +@return true if it's possible to enable backoff. */ +static inline +bool +innodb_empty_free_list_algorithm_allowed( + srv_empty_free_list_t algorithm) +{ + long long buf_pool_pages = srv_buf_pool_size / srv_page_size + / srv_buf_pool_instances; -/*****************************************************************//** -Check whether this is a fake change transaction. -@return TRUE if a fake change transaction */ -static -my_bool -innobase_is_fake_change( -/*====================*/ - handlerton *hton, /*!< in: InnoDB handlerton */ - THD* thd) __attribute__((unused)); /*!< in: MySQL thread handle of the user for - whom the transaction is being committed */ + return(buf_pool_pages >= BUF_LRU_MIN_LEN * (4 + 1) + || algorithm != SRV_EMPTY_FREE_LIST_BACKOFF); +} /** Get the list of foreign keys referencing a specified table table. @@ -1601,14 +1615,11 @@ innobase_drop_database( the path is used as the database name: for example, in 'mysql/data/test' the database name is 'test' */ -/*******************************************************************//** -Closes an InnoDB database. */ +/** Shut down the InnoDB storage engine. +@return 0 */ static int -innobase_end( -/*=========*/ - handlerton* hton, /* in: Innodb handlerton */ - ha_panic_function type); +innobase_end(handlerton*, ha_panic_function); #if NOT_USED /*****************************************************************//** @@ -1703,28 +1714,6 @@ normalize_table_name_low( ibool set_lower_case); /* in: TRUE if we want to set name to lower case */ -/*************************************************************//** -Checks if buffer pool is big enough to enable backoff algorithm. -InnoDB empty free list algorithm backoff requires free pages -from LRU for the best performance. -buf_LRU_buf_pool_running_out cancels query if 1/4 of -buffer pool belongs to LRU or freelist. -At the same time buf_flush_LRU_list_batch -keeps up to BUF_LRU_MIN_LEN in LRU. -In order to avoid deadlock baclkoff requires buffer pool -to be at least 4*BUF_LRU_MIN_LEN, -but flush peformance is bad because of trashing -and additional BUF_LRU_MIN_LEN pages are requested. -@return true if it's possible to enable backoff. */ -static -bool -innodb_empty_free_list_algorithm_backoff_allowed( - srv_empty_free_list_t - algorithm, /*!< in: desired algorithm - from srv_empty_free_list_t */ - long long buf_pool_pages); /*!< in: total number - of pages inside buffer pool */ - #ifdef NOT_USED /*************************************************************//** Removes old archived transaction log files. @@ -2596,16 +2585,11 @@ innobase_get_stmt( THD* thd, /*!< in: MySQL thread handle */ size_t* length) /*!< out: length of the SQL statement */ { - const char* query = NULL; - LEX_STRING *stmt = NULL; - if (thd) { - stmt = thd_query_string(thd); - if (stmt) { - *length = stmt->length; - query = stmt->str; - } + if (const LEX_STRING *stmt = thd_query_string(thd)) { + *length = stmt->length; + return stmt->str; } - return (query); + return NULL; } /**********************************************************************//** @@ -3505,13 +3489,13 @@ innobase_convert_identifier( ibool file_id)/*!< in: TRUE=id is a table or database name; FALSE=id is an UTF-8 string */ { + char nz2[MAX_TABLE_NAME_LEN + 1]; const char* s = id; int q; if (file_id) { char nz[MAX_TABLE_NAME_LEN + 1]; - char nz2[MAX_TABLE_NAME_LEN + 1]; /* Decode the table name. The MySQL function expects a NUL-terminated string. The input and output strings @@ -4374,10 +4358,9 @@ innobase_change_buffering_inited_ok: srv_use_posix_fallocate = (ibool) innobase_use_fallocate; #endif /* Do not enable backoff algorithm for small buffer pool. */ - if (!innodb_empty_free_list_algorithm_backoff_allowed( + if (!innodb_empty_free_list_algorithm_allowed( static_cast<srv_empty_free_list_t>( - srv_empty_free_list_algorithm), - innobase_buffer_pool_size / srv_page_size)) { + srv_empty_free_list_algorithm))) { sql_print_information( "InnoDB: innodb_empty_free_list_algorithm " "has been changed to legacy " @@ -4512,21 +4495,13 @@ error: DBUG_RETURN(TRUE); } -/*******************************************************************//** -Closes an InnoDB database. -@return TRUE if error */ +/** Shut down the InnoDB storage engine. +@return 0 */ static int -innobase_end( -/*=========*/ - handlerton* hton, /*!< in/out: InnoDB handlerton */ - ha_panic_function type MY_ATTRIBUTE((unused))) - /*!< in: ha_panic() parameter */ +innobase_end(handlerton*, ha_panic_function) { - int err= 0; - DBUG_ENTER("innobase_end"); - DBUG_ASSERT(hton == innodb_hton_ptr); if (innodb_inited) { @@ -4543,9 +4518,7 @@ innobase_end( innodb_inited = 0; hash_table_free(innobase_open_tables); innobase_open_tables = NULL; - if (innobase_shutdown_for_mysql() != DB_SUCCESS) { - err = 1; - } + innodb_shutdown(); srv_free_paths_and_sizes(); my_free(internal_innobase_data_file_path); mysql_mutex_destroy(&innobase_share_mutex); @@ -4554,7 +4527,7 @@ innobase_end( mysql_mutex_destroy(&pending_checkpoint_mutex); } - DBUG_RETURN(err); + DBUG_RETURN(0); } /****************************************************************//** @@ -4609,22 +4582,6 @@ innobase_purge_changed_page_bitmaps( } /*****************************************************************//** -Check whether this is a fake change transaction. -@return TRUE if a fake change transaction */ -static -my_bool -innobase_is_fake_change( -/*====================*/ - handlerton *hton MY_ATTRIBUTE((unused)), - /*!< in: InnoDB handlerton */ - THD* thd) /*!< in: MySQL thread handle of the user for - whom the transaction is being committed */ -{ - trx_t* trx = check_trx_exists(thd); - return UNIV_UNLIKELY(trx->fake_changes); -} - -/*****************************************************************//** Commits a transaction in an InnoDB database. */ static void @@ -8172,17 +8129,31 @@ build_template_field( templ->rec_field_is_prefix = FALSE; if (dict_index_is_clust(index)) { + templ->rec_field_is_prefix = false; templ->rec_field_no = templ->clust_rec_field_no; templ->rec_prefix_field_no = ULINT_UNDEFINED; } else { - /* If we're in a secondary index, keep track - * of the original index position even if this - * is just a prefix index; we will use this - * later to avoid a cluster index lookup in - * some cases.*/ + /* If we're in a secondary index, keep track of the original + index position even if this is just a prefix index; we will use + this later to avoid a cluster index lookup in some cases.*/ templ->rec_field_no = dict_index_get_nth_col_pos(index, i, &templ->rec_prefix_field_no); + templ->rec_field_is_prefix + = (templ->rec_field_no == ULINT_UNDEFINED) + && (templ->rec_prefix_field_no != ULINT_UNDEFINED); +#ifdef UNIV_DEBUG + if (templ->rec_prefix_field_no != ULINT_UNDEFINED) + { + const dict_field_t* field = dict_index_get_nth_field( + index, + templ->rec_prefix_field_no); + ut_ad(templ->rec_field_is_prefix + == (field->prefix_len != 0)); + } else { + ut_ad(!templ->rec_field_is_prefix); + } +#endif } if (field->real_maybe_null()) { @@ -8572,8 +8543,8 @@ ha_innobase::innobase_lock_autoinc(void) break; } } - /* Fall through to old style locking. */ - + /* Use old style locking. */ + /* fall through */ case AUTOINC_OLD_STYLE_LOCKING: DBUG_EXECUTE_IF("die_if_autoinc_old_lock_style_used", ut_ad(0);); @@ -9150,8 +9121,8 @@ calc_row_difference( } } - if (o_len != n_len || (o_len != UNIV_SQL_NULL && - 0 != memcmp(o_ptr, n_ptr, o_len))) { + if (o_len != n_len || (o_len != 0 && o_len != UNIV_SQL_NULL + && 0 != memcmp(o_ptr, n_ptr, o_len))) { /* The field has changed */ ufield = uvect->fields + n_changed; @@ -11875,7 +11846,8 @@ create_options_are_invalid( case ROW_TYPE_DYNAMIC: CHECK_ERROR_ROW_TYPE_NEEDS_FILE_PER_TABLE(use_tablespace); CHECK_ERROR_ROW_TYPE_NEEDS_GT_ANTELOPE; - /* fall through since dynamic also shuns KBS */ + /* ROW_FORMAT=DYNAMIC also shuns KEY_BLOCK_SIZE */ + /* fall through */ case ROW_TYPE_COMPACT: case ROW_TYPE_REDUNDANT: if (kbs_specified) { @@ -12285,7 +12257,8 @@ index_bad: break; /* Correct row_format */ } zip_allowed = FALSE; - /* fall through to set row_format = COMPACT */ + /* Set ROW_FORMAT = COMPACT */ + /* fall through */ case ROW_TYPE_NOT_USED: case ROW_TYPE_FIXED: case ROW_TYPE_PAGE: @@ -12294,6 +12267,7 @@ index_bad: thd, Sql_condition::WARN_LEVEL_WARN, ER_ILLEGAL_HA_CREATE_OPTION, "InnoDB: assuming ROW_FORMAT=COMPACT."); + /* fall through */ case ROW_TYPE_DEFAULT: /* If we fell through, set row format to Compact. */ row_format = ROW_TYPE_COMPACT; @@ -13109,7 +13083,8 @@ ha_innobase::delete_table( extension, in contrast to ::create */ normalize_table_name(norm_name, name); - if (srv_read_only_mode) { + if (srv_read_only_mode + || srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) { DBUG_RETURN(HA_ERR_TABLE_READONLY); } else if (row_is_magic_monitor_table(norm_name) && check_global_access(thd, PROCESS_ACL)) { @@ -15078,7 +15053,8 @@ fill_foreign_key_list(THD* thd, { ut_ad(mutex_own(&dict_sys->mutex)); - for (dict_foreign_set::iterator it = table->referenced_set.begin(); + for (dict_foreign_set::const_iterator it + = table->referenced_set.begin(); it != table->referenced_set.end(); ++it) { dict_foreign_t* foreign = *it; @@ -18633,15 +18609,17 @@ innodb_buffer_pool_evict_uncompressed(void) ut_ad(block->page.in_LRU_list); mutex_enter(&block->mutex); - if (!buf_LRU_free_page(&block->page, false)) { - mutex_exit(&block->mutex); - all_evicted = false; - } else { - mutex_exit(&block->mutex); + all_evicted = buf_LRU_free_page(&block->page, false); + mutex_exit(&block->mutex); + + if (all_evicted) { + mutex_enter(&buf_pool->LRU_list_mutex); - } + block = UT_LIST_GET_LAST(buf_pool->unzip_LRU); + } else { - block = prev_block; + block = prev_block; + } } mutex_exit(&buf_pool->LRU_list_mutex); @@ -18832,6 +18810,10 @@ innodb_sched_priority_cleaner_update( const void* save) /*!< in: immediate result from check function */ { + if (srv_read_only_mode) { + return; + } + ulint priority = *static_cast<const ulint *>(save); ulint actual_priority; ulint nice = 0; @@ -18858,10 +18840,6 @@ innodb_sched_priority_cleaner_update( } /* Set the priority for the page cleaner thread */ - if (srv_read_only_mode) { - - return; - } ut_ad(buf_page_cleaner_is_active); nice = os_thread_get_priority(srv_cleaner_tid); @@ -19252,8 +19230,15 @@ checkpoint_now_set( log_make_checkpoint_at(LSN_MAX, TRUE); fil_flush_file_spaces(FIL_LOG); } - fil_write_flushed_lsn_to_data_files(log_sys->lsn, 0); - fil_flush_file_spaces(FIL_TABLESPACE); + + dberr_t err = fil_write_flushed_lsn(log_sys->lsn); + + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Failed to write flush lsn to the " + "system tablespace at checkpoint err=%s", + ut_strerr(err)); + } } } @@ -19855,32 +19840,6 @@ wsrep_fake_trx_id( /*************************************************************//** -Empty free list algorithm. -Checks if buffer pool is big enough to enable backoff algorithm. -InnoDB empty free list algorithm backoff requires free pages -from LRU for the best performance. -buf_LRU_buf_pool_running_out cancels query if 1/4 of -buffer pool belongs to LRU or freelist. -At the same time buf_flush_LRU_list_batch -keeps up to BUF_LRU_MIN_LEN in LRU. -In order to avoid deadlock baclkoff requires buffer pool -to be at least 4*BUF_LRU_MIN_LEN, -but flush peformance is bad because of trashing -and additional BUF_LRU_MIN_LEN pages are requested. -@return true if it's possible to enable backoff. */ -static -bool -innodb_empty_free_list_algorithm_backoff_allowed( - srv_empty_free_list_t algorithm, /*!< in: desired algorithm - from srv_empty_free_list_t */ - long long buf_pool_pages) /*!< in: total number - of pages inside buffer pool */ -{ - return(buf_pool_pages >= BUF_LRU_MIN_LEN * (4 + 1) - || algorithm != SRV_EMPTY_FREE_LIST_BACKOFF); -} - -/*************************************************************//** Empty free list algorithm. This function is registered as a callback with MySQL. @return 0 for valid algorithm */ @@ -19921,13 +19880,11 @@ innodb_srv_empty_free_list_algorithm_validate( return(1); algorithm = static_cast<srv_empty_free_list_t>(algo); - if (!innodb_empty_free_list_algorithm_backoff_allowed( - algorithm, - innobase_buffer_pool_size / srv_page_size)) { + if (!innodb_empty_free_list_algorithm_allowed(algorithm)) { sql_print_warning( "InnoDB: innodb_empty_free_list_algorithm " "= 'backoff' requires at least" - " 20MB buffer pool.\n"); + " 20MB buffer pool instances.\n"); return(1); } diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h index f6f2f1b0eee..3bb67532954 100644 --- a/storage/xtradb/handler/ha_innodb.h +++ b/storage/xtradb/handler/ha_innodb.h @@ -540,7 +540,7 @@ innobase_index_name_is_reserved( const KEY* key_info, /*!< in: Indexes to be created */ ulint num_of_keys) /*!< in: Number of indexes to be created. */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); + MY_ATTRIBUTE((nonnull(1), warn_unused_result)); /*****************************************************************//** #ifdef WITH_WSREP diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index ef8c62849ae..0e7cc9a655b 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -2307,10 +2307,10 @@ online_retry_drop_indexes_with_trx( @param drop_fk constraints being dropped @param n_drop_fk number of constraints that are being dropped @return whether the constraint is being dropped */ -inline MY_ATTRIBUTE((pure, nonnull, warn_unused_result)) +MY_ATTRIBUTE((pure, nonnull(1), warn_unused_result)) +inline bool innobase_dropping_foreign( -/*======================*/ const dict_foreign_t* foreign, dict_foreign_t** drop_fk, ulint n_drop_fk) @@ -2334,10 +2334,10 @@ column that is being dropped or modified to NOT NULL. @retval true Not allowed (will call my_error()) @retval false Allowed */ -static MY_ATTRIBUTE((pure, nonnull, warn_unused_result)) +MY_ATTRIBUTE((pure, nonnull(1,4), warn_unused_result)) +static bool innobase_check_foreigns_low( -/*========================*/ const dict_table_t* user_table, dict_foreign_t** drop_fk, ulint n_drop_fk, @@ -2434,10 +2434,10 @@ column that is being dropped or modified to NOT NULL. @retval true Not allowed (will call my_error()) @retval false Allowed */ -static MY_ATTRIBUTE((pure, nonnull, warn_unused_result)) +MY_ATTRIBUTE((pure, nonnull(1,2,3,4), warn_unused_result)) +static bool innobase_check_foreigns( -/*====================*/ Alter_inplace_info* ha_alter_info, const TABLE* altered_table, const TABLE* old_table, diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc index 086d5642dbb..9cef04c4244 100644 --- a/storage/xtradb/handler/i_s.cc +++ b/storage/xtradb/handler/i_s.cc @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 2007, 2016, Oracle and/or its affiliates. -Copyrigth (c) 2014, 2017, MariaDB Corporation +Copyright (c) 2007, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2014, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -2966,14 +2966,16 @@ i_s_fts_deleted_generic_fill( fields = table->field; + int ret = 0; + for (ulint j = 0; j < ib_vector_size(deleted->doc_ids); ++j) { doc_id_t doc_id; doc_id = *(doc_id_t*) ib_vector_get_const(deleted->doc_ids, j); - OK(fields[I_S_FTS_DOC_ID]->store((longlong) doc_id, true)); + BREAK_IF(ret = fields[I_S_FTS_DOC_ID]->store(doc_id, true)); - OK(schema_table_store_record(thd, table)); + BREAK_IF(ret = schema_table_store_record(thd, table)); } trx_free_for_background(trx); @@ -2984,7 +2986,7 @@ i_s_fts_deleted_generic_fill( rw_lock_s_unlock(&dict_operation_lock); - DBUG_RETURN(0); + DBUG_RETURN(ret); } /*******************************************************************//** @@ -3222,13 +3224,13 @@ i_s_fts_index_cache_fill_one_index( /*===============================*/ fts_index_cache_t* index_cache, /*!< in: FTS index cache */ THD* thd, /*!< in: thread */ + fts_string_t* conv_str, /*!< in/out: buffer */ TABLE_LIST* tables) /*!< in/out: tables to fill */ { TABLE* table = (TABLE*) tables->table; Field** fields; CHARSET_INFO* index_charset; const ib_rbt_node_t* rbt_node; - fts_string_t conv_str; uint dummy_errors; char* word_str; @@ -3237,10 +3239,9 @@ i_s_fts_index_cache_fill_one_index( fields = table->field; index_charset = index_cache->charset; - conv_str.f_len = system_charset_info->mbmaxlen - * FTS_MAX_WORD_LEN_IN_CHAR; - conv_str.f_str = static_cast<byte*>(ut_malloc(conv_str.f_len)); - conv_str.f_n_char = 0; + conv_str->f_n_char = 0; + + int ret = 0; /* Go through each word in the index cache */ for (rbt_node = rbt_first(index_cache->words); @@ -3252,16 +3253,16 @@ i_s_fts_index_cache_fill_one_index( /* Convert word from index charset to system_charset_info */ if (index_charset->cset != system_charset_info->cset) { - conv_str.f_n_char = my_convert( - reinterpret_cast<char*>(conv_str.f_str), - static_cast<uint32>(conv_str.f_len), + conv_str->f_n_char = my_convert( + reinterpret_cast<char*>(conv_str->f_str), + static_cast<uint32>(conv_str->f_len), system_charset_info, reinterpret_cast<char*>(word->text.f_str), static_cast<uint32>(word->text.f_len), index_charset, &dummy_errors); - ut_ad(conv_str.f_n_char <= conv_str.f_len); - conv_str.f_str[conv_str.f_n_char] = 0; - word_str = reinterpret_cast<char*>(conv_str.f_str); + ut_ad(conv_str->f_n_char <= conv_str->f_len); + conv_str->f_str[conv_str->f_n_char] = 0; + word_str = reinterpret_cast<char*>(conv_str->f_str); } else { word_str = reinterpret_cast<char*>(word->text.f_str); } @@ -3319,9 +3320,7 @@ i_s_fts_index_cache_fill_one_index( } } - ut_free(conv_str.f_str); - - DBUG_RETURN(0); + DBUG_RETURN(ret); } /*******************************************************************//** Fill the dynamic table INFORMATION_SCHEMA.INNODB_FT_INDEX_CACHED @@ -3365,18 +3364,27 @@ i_s_fts_index_cache_fill( ut_a(cache); + int ret = 0; + fts_string_t conv_str; + conv_str.f_len = system_charset_info->mbmaxlen + * FTS_MAX_WORD_LEN_IN_CHAR; + conv_str.f_str = static_cast<byte*>(ut_malloc(conv_str.f_len)); + for (ulint i = 0; i < ib_vector_size(cache->indexes); i++) { fts_index_cache_t* index_cache; index_cache = static_cast<fts_index_cache_t*> ( ib_vector_get(cache->indexes, i)); - i_s_fts_index_cache_fill_one_index(index_cache, thd, tables); + BREAK_IF(ret = i_s_fts_index_cache_fill_one_index( + index_cache, thd, &conv_str, tables)); } + ut_free(conv_str.f_str); + dict_table_close(user_table, FALSE, FALSE); - DBUG_RETURN(0); + DBUG_RETURN(ret); } /*******************************************************************//** @@ -3679,8 +3687,6 @@ i_s_fts_index_table_fill_one_fetch( } } - i_s_fts_index_table_free_one_fetch(words); - DBUG_RETURN(ret); } @@ -3694,13 +3700,13 @@ i_s_fts_index_table_fill_one_index( /*===============================*/ dict_index_t* index, /*!< in: FTS index */ THD* thd, /*!< in: thread */ + fts_string_t* conv_str, /*!< in/out: buffer */ TABLE_LIST* tables) /*!< in/out: tables to fill */ { ib_vector_t* words; mem_heap_t* heap; fts_string_t word; CHARSET_INFO* index_charset; - fts_string_t conv_str; dberr_t error; int ret = 0; @@ -3717,10 +3723,6 @@ i_s_fts_index_table_fill_one_index( word.f_n_char = 0; index_charset = fts_index_get_charset(index); - conv_str.f_len = system_charset_info->mbmaxlen - * FTS_MAX_WORD_LEN_IN_CHAR; - conv_str.f_str = static_cast<byte*>(ut_malloc(conv_str.f_len)); - conv_str.f_n_char = 0; /* Iterate through each auxiliary table as described in fts_index_selector */ @@ -3754,17 +3756,17 @@ i_s_fts_index_table_fill_one_index( /* Fill into tables */ ret = i_s_fts_index_table_fill_one_fetch( - index_charset, thd, tables, words, &conv_str, has_more); + index_charset, thd, tables, words, conv_str, + has_more); + i_s_fts_index_table_free_one_fetch(words); if (ret != 0) { - i_s_fts_index_table_free_one_fetch(words); goto func_exit; } } while (has_more); } func_exit: - ut_free(conv_str.f_str); mem_heap_free(heap); DBUG_RETURN(ret); @@ -3806,10 +3808,17 @@ i_s_fts_index_table_fill( DBUG_RETURN(0); } + int ret = 0; + fts_string_t conv_str; + conv_str.f_len = system_charset_info->mbmaxlen + * FTS_MAX_WORD_LEN_IN_CHAR; + conv_str.f_str = static_cast<byte*>(ut_malloc(conv_str.f_len)); + for (index = dict_table_get_first_index(user_table); index; index = dict_table_get_next_index(index)) { if (index->type & DICT_FTS) { - i_s_fts_index_table_fill_one_index(index, thd, tables); + BREAK_IF(ret = i_s_fts_index_table_fill_one_index( + index, thd, &conv_str, tables)); } } @@ -3817,7 +3826,9 @@ i_s_fts_index_table_fill( rw_lock_s_unlock(&dict_operation_lock); - DBUG_RETURN(0); + ut_free(conv_str.f_str); + + DBUG_RETURN(ret); } /*******************************************************************//** @@ -3982,6 +3993,8 @@ i_s_fts_config_fill( DBUG_ASSERT(!dict_index_is_online_ddl(index)); } + int ret = 0; + while (fts_config_key[i]) { fts_string_t value; char* key_name; @@ -4006,13 +4019,14 @@ i_s_fts_config_fill( ut_free(key_name); } - OK(field_store_string( - fields[FTS_CONFIG_KEY], fts_config_key[i])); + BREAK_IF(ret = field_store_string( + fields[FTS_CONFIG_KEY], fts_config_key[i])); - OK(field_store_string( - fields[FTS_CONFIG_VALUE], (const char*) value.f_str)); + BREAK_IF(ret = field_store_string( + fields[FTS_CONFIG_VALUE], + reinterpret_cast<const char*>(value.f_str))); - OK(schema_table_store_record(thd, table)); + BREAK_IF(ret = schema_table_store_record(thd, table)); i++; } @@ -4025,7 +4039,7 @@ i_s_fts_config_fill( rw_lock_s_unlock(&dict_operation_lock); - DBUG_RETURN(0); + DBUG_RETURN(ret); } /*******************************************************************//** @@ -4864,34 +4878,29 @@ i_s_innodb_buffer_page_fill( state_str = NULL; OK(fields[IDX_BUFFER_POOL_ID]->store( - static_cast<double>(page_info->pool_id))); + page_info->pool_id, true)); OK(fields[IDX_BUFFER_BLOCK_ID]->store( - static_cast<double>(page_info->block_id))); + page_info->block_id, true)); OK(fields[IDX_BUFFER_PAGE_SPACE]->store( - static_cast<double>(page_info->space_id))); + page_info->space_id, true)); OK(fields[IDX_BUFFER_PAGE_NUM]->store( - static_cast<double>(page_info->page_num))); + page_info->page_num, true)); OK(field_store_string( fields[IDX_BUFFER_PAGE_TYPE], i_s_page_type[page_info->page_type].type_str)); OK(fields[IDX_BUFFER_PAGE_FLUSH_TYPE]->store( - page_info->flush_type)); + page_info->flush_type, true)); OK(fields[IDX_BUFFER_PAGE_FIX_COUNT]->store( - page_info->fix_count)); + page_info->fix_count, true)); - if (page_info->hashed) { - OK(field_store_string( - fields[IDX_BUFFER_PAGE_HASHED], "YES")); - } else { - OK(field_store_string( - fields[IDX_BUFFER_PAGE_HASHED], "NO")); - } + OK(field_store_string(fields[IDX_BUFFER_PAGE_HASHED], + page_info->hashed ? "YES" : "NO")); OK(fields[IDX_BUFFER_PAGE_NEWEST_MOD]->store( (longlong) page_info->newest_mod, true)); @@ -4900,7 +4909,7 @@ i_s_innodb_buffer_page_fill( (longlong) page_info->oldest_mod, true)); OK(fields[IDX_BUFFER_PAGE_ACCESS_TIME]->store( - page_info->access_time)); + page_info->access_time, true)); fields[IDX_BUFFER_PAGE_TABLE_NAME]->set_null(); @@ -4909,44 +4918,48 @@ i_s_innodb_buffer_page_fill( /* If this is an index page, fetch the index name and table name */ if (page_info->page_type == I_S_PAGE_TYPE_INDEX) { - const dict_index_t* index; + bool ret = false; mutex_enter(&dict_sys->mutex); - index = dict_index_get_if_in_cache_low( - page_info->index_id); - - if (index) { + if (const dict_index_t* index = + dict_index_get_if_in_cache_low( + page_info->index_id)) { table_name_end = innobase_convert_name( table_name, sizeof(table_name), index->table_name, strlen(index->table_name), thd, TRUE); - OK(fields[IDX_BUFFER_PAGE_TABLE_NAME]->store( - table_name, - static_cast<uint>(table_name_end - table_name), - system_charset_info)); - fields[IDX_BUFFER_PAGE_TABLE_NAME]->set_notnull(); - - OK(field_store_index_name( - fields[IDX_BUFFER_PAGE_INDEX_NAME], - index->name)); + ret = fields[IDX_BUFFER_PAGE_TABLE_NAME] + ->store(table_name, + static_cast<uint>( + table_name_end + - table_name), + system_charset_info) + || field_store_index_name( + fields + [IDX_BUFFER_PAGE_INDEX_NAME], + index->name); } mutex_exit(&dict_sys->mutex); + + OK(ret); + + fields[IDX_BUFFER_PAGE_TABLE_NAME]->set_notnull(); } OK(fields[IDX_BUFFER_PAGE_NUM_RECS]->store( - page_info->num_recs)); + page_info->num_recs, true)); OK(fields[IDX_BUFFER_PAGE_DATA_SIZE]->store( - page_info->data_size)); + page_info->data_size, true)); OK(fields[IDX_BUFFER_PAGE_ZIP_SIZE]->store( - page_info->zip_ssize - ? (UNIV_ZIP_SIZE_MIN >> 1) << page_info->zip_ssize - : 0)); + page_info->zip_ssize + ? (UNIV_ZIP_SIZE_MIN >> 1) << page_info->zip_ssize + : 0, true)); #if BUF_PAGE_STATE_BITS > 3 # error "BUF_PAGE_STATE_BITS > 3, please ensure that all 1<<BUF_PAGE_STATE_BITS values are checked for" @@ -4984,32 +4997,29 @@ i_s_innodb_buffer_page_fill( switch (page_info->io_fix) { case BUF_IO_NONE: - OK(field_store_string(fields[IDX_BUFFER_PAGE_IO_FIX], - "IO_NONE")); + state_str = "IO_NONE"; break; case BUF_IO_READ: - OK(field_store_string(fields[IDX_BUFFER_PAGE_IO_FIX], - "IO_READ")); + state_str = "IO_READ"; break; case BUF_IO_WRITE: - OK(field_store_string(fields[IDX_BUFFER_PAGE_IO_FIX], - "IO_WRITE")); + state_str = "IO_WRITE"; break; case BUF_IO_PIN: - OK(field_store_string(fields[IDX_BUFFER_PAGE_IO_FIX], - "IO_PIN")); + state_str = "IO_PIN"; break; } + OK(field_store_string(fields[IDX_BUFFER_PAGE_IO_FIX], + state_str)); + OK(field_store_string(fields[IDX_BUFFER_PAGE_IS_OLD], (page_info->is_old) ? "YES" : "NO")); OK(fields[IDX_BUFFER_PAGE_FREE_CLOCK]->store( page_info->freed_page_clock)); - if (schema_table_store_record(thd, table)) { - DBUG_RETURN(1); - } + OK(schema_table_store_record(thd, table)); } DBUG_RETURN(0); @@ -5550,17 +5560,10 @@ i_s_innodb_buf_page_lru_fill( ulint num_page) /*!< in: number of page info cached */ { - TABLE* table; - Field** fields; - mem_heap_t* heap; - DBUG_ENTER("i_s_innodb_buf_page_lru_fill"); - table = tables->table; - - fields = table->field; - - heap = mem_heap_create(1000); + TABLE* table = tables->table; + Field** fields = table->field; /* Iterate through the cached array and fill the I_S table rows */ for (ulint i = 0; i < num_page; i++) { @@ -5575,43 +5578,37 @@ i_s_innodb_buf_page_lru_fill( page_info = info_array + i; OK(fields[IDX_BUF_LRU_POOL_ID]->store( - static_cast<double>(page_info->pool_id))); - + page_info->pool_id, true)); OK(fields[IDX_BUF_LRU_POS]->store( - static_cast<double>(page_info->block_id))); + page_info->block_id, true)); OK(fields[IDX_BUF_LRU_PAGE_SPACE]->store( - static_cast<double>(page_info->space_id))); + page_info->space_id, true)); OK(fields[IDX_BUF_LRU_PAGE_NUM]->store( - static_cast<double>(page_info->page_num))); + page_info->page_num, true)); OK(field_store_string( - fields[IDX_BUF_LRU_PAGE_TYPE], - i_s_page_type[page_info->page_type].type_str)); + fields[IDX_BUF_LRU_PAGE_TYPE], + i_s_page_type[page_info->page_type].type_str)); OK(fields[IDX_BUF_LRU_PAGE_FLUSH_TYPE]->store( - static_cast<double>(page_info->flush_type))); + page_info->flush_type, true)); OK(fields[IDX_BUF_LRU_PAGE_FIX_COUNT]->store( - static_cast<double>(page_info->fix_count))); + page_info->fix_count, true)); - if (page_info->hashed) { - OK(field_store_string( - fields[IDX_BUF_LRU_PAGE_HASHED], "YES")); - } else { - OK(field_store_string( - fields[IDX_BUF_LRU_PAGE_HASHED], "NO")); - } + OK(field_store_string(fields[IDX_BUF_LRU_PAGE_HASHED], + page_info->hashed ? "YES" : "NO")); OK(fields[IDX_BUF_LRU_PAGE_NEWEST_MOD]->store( - page_info->newest_mod, true)); + page_info->newest_mod, true)); OK(fields[IDX_BUF_LRU_PAGE_OLDEST_MOD]->store( - page_info->oldest_mod, true)); + page_info->oldest_mod, true)); OK(fields[IDX_BUF_LRU_PAGE_ACCESS_TIME]->store( - page_info->access_time)); + page_info->access_time, true)); fields[IDX_BUF_LRU_PAGE_TABLE_NAME]->set_null(); @@ -5620,43 +5617,47 @@ i_s_innodb_buf_page_lru_fill( /* If this is an index page, fetch the index name and table name */ if (page_info->page_type == I_S_PAGE_TYPE_INDEX) { - const dict_index_t* index; + bool ret = false; mutex_enter(&dict_sys->mutex); - index = dict_index_get_if_in_cache_low( - page_info->index_id); - - if (index) { + if (const dict_index_t* index = + dict_index_get_if_in_cache_low( + page_info->index_id)) { table_name_end = innobase_convert_name( table_name, sizeof(table_name), index->table_name, strlen(index->table_name), thd, TRUE); - OK(fields[IDX_BUF_LRU_PAGE_TABLE_NAME]->store( - table_name, - static_cast<uint>(table_name_end - table_name), - system_charset_info)); - fields[IDX_BUF_LRU_PAGE_TABLE_NAME]->set_notnull(); - - OK(field_store_index_name( - fields[IDX_BUF_LRU_PAGE_INDEX_NAME], - index->name)); + ret = fields[IDX_BUF_LRU_PAGE_TABLE_NAME] + ->store(table_name, + static_cast<uint>( + table_name_end + - table_name), + system_charset_info) + || field_store_index_name( + fields + [IDX_BUF_LRU_PAGE_INDEX_NAME], + index->name); } mutex_exit(&dict_sys->mutex); + + OK(ret); + + fields[IDX_BUF_LRU_PAGE_TABLE_NAME]->set_notnull(); } OK(fields[IDX_BUF_LRU_PAGE_NUM_RECS]->store( - page_info->num_recs)); + page_info->num_recs, true)); OK(fields[IDX_BUF_LRU_PAGE_DATA_SIZE]->store( - page_info->data_size)); + page_info->data_size, true)); OK(fields[IDX_BUF_LRU_PAGE_ZIP_SIZE]->store( - page_info->zip_ssize ? - 512 << page_info->zip_ssize : 0)); + page_info->zip_ssize + ? 512 << page_info->zip_ssize : 0, true)); state = static_cast<enum buf_page_state>(page_info->page_state); @@ -5685,35 +5686,31 @@ i_s_innodb_buf_page_lru_fill( switch (page_info->io_fix) { case BUF_IO_NONE: - OK(field_store_string(fields[IDX_BUF_LRU_PAGE_IO_FIX], - "IO_NONE")); + state_str = "IO_NONE"; break; case BUF_IO_READ: - OK(field_store_string(fields[IDX_BUF_LRU_PAGE_IO_FIX], - "IO_READ")); + state_str = "IO_READ"; break; case BUF_IO_WRITE: - OK(field_store_string(fields[IDX_BUF_LRU_PAGE_IO_FIX], - "IO_WRITE")); + state_str = "IO_WRITE"; + break; + case BUF_IO_PIN: + state_str = "IO_PIN"; break; } + OK(field_store_string(fields[IDX_BUF_LRU_PAGE_IO_FIX], + state_str)); + OK(field_store_string(fields[IDX_BUF_LRU_PAGE_IS_OLD], - (page_info->is_old) ? "YES" : "NO")); + page_info->is_old ? "YES" : "NO")); OK(fields[IDX_BUF_LRU_PAGE_FREE_CLOCK]->store( - page_info->freed_page_clock)); - - if (schema_table_store_record(thd, table)) { - mem_heap_free(heap); - DBUG_RETURN(1); - } + page_info->freed_page_clock, true)); - mem_heap_empty(heap); + OK(schema_table_store_record(thd, table)); } - mem_heap_free(heap); - DBUG_RETURN(0); } diff --git a/storage/xtradb/handler/i_s.h b/storage/xtradb/handler/i_s.h index 55ef6e7ea42..4bb3ea33462 100644 --- a/storage/xtradb/handler/i_s.h +++ b/storage/xtradb/handler/i_s.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2007, 2013, Oracle and/or its affiliates. All Rights Reserved. -Copyrigth (c) 2014, 2015, MariaDB Corporation +Copyrigth (c) 2014, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -77,6 +77,8 @@ extern struct st_mysql_plugin i_s_innodb_changed_page_bitmaps; DBUG_RETURN(1); \ } +#define BREAK_IF(expr) if ((expr)) break + #define RETURN_IF_INNODB_NOT_STARTED(plugin_name) \ do { \ if (!srv_was_started) { \ diff --git a/storage/xtradb/ibuf/ibuf0ibuf.cc b/storage/xtradb/ibuf/ibuf0ibuf.cc index e66568565e1..0445bb557e1 100644 --- a/storage/xtradb/ibuf/ibuf0ibuf.cc +++ b/storage/xtradb/ibuf/ibuf0ibuf.cc @@ -2963,8 +2963,7 @@ ibuf_get_volume_buffered_hash( fold = ut_fold_binary(data, len); hash += (fold / (CHAR_BIT * sizeof *hash)) % size; - bitmask = static_cast<ulint>( - 1 << (fold % (CHAR_BIT * sizeof(*hash)))); + bitmask = static_cast<ulint>(1) << (fold % (CHAR_BIT * sizeof(*hash))); if (*hash & bitmask) { @@ -3733,7 +3732,7 @@ fail_exit: if (mode == BTR_MODIFY_PREV) { err = btr_cur_optimistic_insert( - BTR_NO_LOCKING_FLAG, + BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG, cursor, &offsets, &offsets_heap, ibuf_entry, &ins_rec, &dummy_big_rec, 0, thr, &mtr); diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h index 960bd55d3d9..e478b33bf8e 100644 --- a/storage/xtradb/include/btr0cur.h +++ b/storage/xtradb/include/btr0cur.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -220,15 +221,17 @@ btr_cur_optimistic_insert( btr_cur_t* cursor, /*!< in: cursor on page after which to insert; cursor stays valid */ ulint** offsets,/*!< out: offsets on *rec */ - mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ + mem_heap_t** heap, /*!< in/out: pointer to memory heap */ dtuple_t* entry, /*!< in/out: entry to insert */ rec_t** rec, /*!< out: pointer to inserted record if succeed */ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to - be stored externally by the caller, or - NULL */ + be stored externally by the caller */ ulint n_ext, /*!< in: number of externally stored columns */ - que_thr_t* thr, /*!< in: query thread or NULL */ + que_thr_t* thr, /*!< in/out: query thread; can be NULL if + !(~flags + & (BTR_NO_LOCKING_FLAG + | BTR_NO_UNDO_LOG_FLAG)) */ mtr_t* mtr) /*!< in/out: mini-transaction; if this function returns DB_SUCCESS on a leaf page of a secondary index in a @@ -256,15 +259,17 @@ btr_cur_pessimistic_insert( cursor stays valid */ ulint** offsets,/*!< out: offsets on *rec */ mem_heap_t** heap, /*!< in/out: pointer to memory heap - that can be emptied, or NULL */ + that can be emptied */ dtuple_t* entry, /*!< in/out: entry to insert */ rec_t** rec, /*!< out: pointer to inserted record if succeed */ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to - be stored externally by the caller, or - NULL */ + be stored externally by the caller */ ulint n_ext, /*!< in: number of externally stored columns */ - que_thr_t* thr, /*!< in: query thread or NULL */ + que_thr_t* thr, /*!< in/out: query thread; can be NULL if + !(~flags + & (BTR_NO_LOCKING_FLAG + | BTR_NO_UNDO_LOG_FLAG)) */ mtr_t* mtr) /*!< in/out: mini-transaction */ MY_ATTRIBUTE((nonnull(2,3,4,5,6,7,10), warn_unused_result)); /*************************************************************//** @@ -392,12 +397,12 @@ btr_cur_pessimistic_update( ulint** offsets,/*!< out: offsets on cursor->page_cur.rec */ mem_heap_t** offsets_heap, /*!< in/out: pointer to memory heap - that can be emptied, or NULL */ + that can be emptied */ mem_heap_t* entry_heap, /*!< in/out: memory heap for allocating big_rec and the index tuple */ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to - be stored externally by the caller, or NULL */ + be stored externally by the caller */ const upd_t* update, /*!< in: update vector; this is allowed also contain trx id and roll ptr fields, but the values in update vector have no effect */ diff --git a/storage/xtradb/include/btr0defragment.h b/storage/xtradb/include/btr0defragment.h index 5c54b898e37..477824c1a35 100644 --- a/storage/xtradb/include/btr0defragment.h +++ b/storage/xtradb/include/btr0defragment.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (C) 2013, 2014 Facebook, Inc. All Rights Reserved. -Copyright (C) 2014, 2015, MariaDB Corporation. +Copyright (C) 2014, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -89,15 +89,14 @@ UNIV_INTERN void btr_defragment_save_defrag_stats_if_needed( dict_index_t* index); /*!< in: index */ -/******************************************************************//** -Thread that merges consecutive b-tree pages into fewer pages to defragment -the index. */ + +/** Merge consecutive b-tree pages into fewer pages to defragment indexes */ extern "C" UNIV_INTERN os_thread_ret_t -DECLARE_THREAD(btr_defragment_thread)( -/*==========================================*/ - void* arg); /*!< in: a dummy parameter required by - os_thread_create */ +DECLARE_THREAD(btr_defragment_thread)(void*); + +/** Whether btr_defragment_thread is active */ +extern bool btr_defragment_thread_active; #endif /* !UNIV_HOTBACKUP */ #endif diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index 9b4276efaa8..1899165ace0 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -1577,20 +1577,13 @@ directory (buf) to see it. Do not use from outside! */ typedef struct { bool reserved; /*!< true if this slot is reserved */ -#ifdef HAVE_LZO - byte* lzo_mem; /*!< Temporal memory used by LZO */ -#endif byte* crypt_buf; /*!< for encryption the data needs to be copied to a separate buffer before it's encrypted&written. this as a page can be read while it's being flushed */ - byte* crypt_buf_free; /*!< for encryption, allocated buffer - that is then alligned */ byte* comp_buf; /*!< for compression we need temporal buffer because page can be read while it's being flushed */ - byte* comp_buf_free; /*!< for compression, allocated - buffer that is then alligned */ byte* out_buf; /*!< resulting buffer after encryption/compression. This is a pointer and not allocated. */ diff --git a/storage/xtradb/include/buf0dblwr.h b/storage/xtradb/include/buf0dblwr.h index 5582778825c..7b7464761cc 100644 --- a/storage/xtradb/include/buf0dblwr.h +++ b/storage/xtradb/include/buf0dblwr.h @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -39,13 +39,15 @@ extern buf_dblwr_t* buf_dblwr; /** Set to TRUE when the doublewrite buffer is being created */ extern ibool buf_dblwr_being_created; -/****************************************************************//** -Creates the doublewrite buffer to a new InnoDB installation. The header of the -doublewrite buffer is placed on the trx system header page. */ +/** Create the doublewrite buffer if the doublewrite buffer header +is not present in the TRX_SYS page. +@return whether the operation succeeded +@retval true if the doublewrite buffer exists or was created +@retval false if the creation failed (too small first data file) */ UNIV_INTERN -void -buf_dblwr_create(void); -/*==================*/ +bool +buf_dblwr_create() + MY_ATTRIBUTE((warn_unused_result)); /****************************************************************//** At a database startup initializes the doublewrite buffer memory structure if @@ -56,7 +58,7 @@ recovery, this function loads the pages from double write buffer into memory. */ void buf_dblwr_init_or_load_pages( /*=========================*/ - os_file_t file, + pfs_os_file_t file, char* path, bool load_corrupt_pages); diff --git a/storage/xtradb/include/buf0flu.h b/storage/xtradb/include/buf0flu.h index af50a5498ef..6089baf81e8 100644 --- a/storage/xtradb/include/buf0flu.h +++ b/storage/xtradb/include/buf0flu.h @@ -34,7 +34,7 @@ Created 11/5/1995 Heikki Tuuri #include "buf0types.h" /** Flag indicating if the page_cleaner is in active state. */ -extern ibool buf_page_cleaner_is_active; +extern bool buf_page_cleaner_is_active; /** Flag indicating if the lru_manager is in active state. */ extern bool buf_lru_manager_is_active; diff --git a/storage/xtradb/include/data0type.ic b/storage/xtradb/include/data0type.ic index 555852474aa..8f5cee0fd5f 100644 --- a/storage/xtradb/include/data0type.ic +++ b/storage/xtradb/include/data0type.ic @@ -576,7 +576,8 @@ dtype_get_fixed_size_low( #else /* !UNIV_HOTBACKUP */ return(len); #endif /* !UNIV_HOTBACKUP */ - /* fall through for variable-length charsets */ + /* Treat as variable-length. */ + /* Fall through */ case DATA_VARCHAR: case DATA_BINARY: case DATA_DECIMAL: diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index 6da8eb892d9..0290b884ece 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -1192,7 +1192,7 @@ dict_index_get_nth_col_pos( const dict_index_t* index, /*!< in: index */ ulint n, /*!< in: column number */ ulint* prefix_col_pos) /*!< out: col num if prefix */ - __attribute__((nonnull(1), warn_unused_result)); + MY_ATTRIBUTE((nonnull(1), warn_unused_result)); /********************************************************************//** Looks for column n in an index. @return position in internal representation of the index; @@ -1207,7 +1207,7 @@ dict_index_get_nth_col_or_prefix_pos( column prefixes too */ ulint* prefix_col_pos) /*!< out: col num if prefix */ - __attribute__((nonnull(1), warn_unused_result)); + MY_ATTRIBUTE((nonnull(1), warn_unused_result)); /********************************************************************//** Returns TRUE if the index contains a column or a prefix of that column. @return TRUE if contains the column or its prefix */ diff --git a/storage/xtradb/include/dict0stats_bg.h b/storage/xtradb/include/dict0stats_bg.h index d5f0870718d..8f3385eb22b 100644 --- a/storage/xtradb/include/dict0stats_bg.h +++ b/storage/xtradb/include/dict0stats_bg.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -144,6 +144,10 @@ DECLARE_THREAD(dict_stats_thread)( void* arg); /*!< in: a dummy parameter required by os_thread_create */ +/** Shut down the dict_stats_thread. */ +void +dict_stats_shutdown(); + # ifndef UNIV_NONINL # include "dict0stats_bg.ic" # endif diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index 2f03d2aa0f5..a09833c3a73 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2013, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -210,9 +210,8 @@ struct fsp_open_info { ibool success; /*!< Has the tablespace been opened? */ const char* check_msg; /*!< fil_check_first_page() message */ ibool valid; /*!< Is the tablespace valid? */ - os_file_t file; /*!< File handle */ + pfs_os_file_t file; /*!< File handle */ char* filepath; /*!< File path to open */ - lsn_t lsn; /*!< Flushed LSN from header page */ ulint id; /*!< Space ID */ ulint flags; /*!< Tablespace flags */ ulint encryption_error; /*!< if an encryption error occurs */ @@ -228,7 +227,7 @@ struct fil_node_t { belongs */ char* name; /*!< path to the file */ ibool open; /*!< TRUE if file open */ - os_file_t handle; /*!< OS handle to the file, if file open */ + pfs_os_file_t handle; /*!< OS handle to the file, if file open */ os_event_t sync_event;/*!< Condition event to group and serialize calls to fsync; os_event_set() and os_event_reset() @@ -351,9 +350,6 @@ struct fil_space_t { compression failure */ fil_space_crypt_t* crypt_data; /*!< tablespace crypt data or NULL */ - bool page_0_crypt_read; - /*!< tablespace crypt data has been - read */ ulint file_block_size; /*!< file system block size */ @@ -643,17 +639,17 @@ void fil_set_max_space_id_if_bigger( /*===========================*/ ulint max_id);/*!< in: maximum known id */ + #ifndef UNIV_HOTBACKUP -/****************************************************************//** -Writes the flushed lsn and the latest archived log number to the page -header of the first page of each data file in the system tablespace. -@return DB_SUCCESS or error number */ -UNIV_INTERN + +/** Write the flushed LSN to the page header of the first page in the +system tablespace. +@param[in] lsn flushed LSN +@return DB_SUCCESS or error number */ dberr_t -fil_write_flushed_lsn_to_data_files( -/*================================*/ - lsn_t lsn, /*!< in: lsn to write */ - ulint arch_log_no); /*!< in: latest archived log file number */ +fil_write_flushed_lsn( + lsn_t lsn) + MY_ATTRIBUTE((warn_unused_result)); /** Acquire a tablespace when it could be dropped concurrently. Used by background threads that do not necessarily hold proper locks @@ -799,28 +795,28 @@ private: fil_space_t* m_space; }; -/*******************************************************************//** -Reads the flushed lsn, arch no, and tablespace flag fields from a data -file at database startup. +/** Reads the flushed lsn, arch no, space_id and tablespace flag fields from +the first page of a first data file at database startup. +@param[in] data_file open data file +@param[in] one_read_only true if first datafile is already + read +@param[out] flags FSP_SPACE_FLAGS +@param[out] space_id tablepspace ID +@param[out] flushed_lsn flushed lsn value +@param[out] crypt_data encryption crypt data @retval NULL on success, or if innodb_force_recovery is set @return pointer to an error message string */ UNIV_INTERN const char* fil_read_first_page( -/*================*/ - os_file_t data_file, /*!< in: open data file */ - ibool one_read_already, /*!< in: TRUE if min and max - parameters below already - contain sensible data */ - ulint* flags, /*!< out: FSP_SPACE_FLAGS */ - ulint* space_id, /*!< out: tablespace ID */ - lsn_t* min_flushed_lsn, /*!< out: min of flushed - lsn values in data files */ - lsn_t* max_flushed_lsn, /*!< out: max of flushed - lsn values in data files */ - fil_space_crypt_t** crypt_data) /*!< out: crypt data */ - - __attribute__((warn_unused_result)); + pfs_os_file_t data_file, + ibool one_read_already, + ulint* flags, + ulint* space_id, + lsn_t* flushed_lsn, + fil_space_crypt_t** crypt_data) + MY_ATTRIBUTE((warn_unused_result)); + #endif /* !UNIV_HOTBACKUP */ /*******************************************************************//** Parses the body of a log record written about an .ibd file operation. That is, @@ -1006,7 +1002,7 @@ fil_create_new_single_table_tablespace( must be >= FIL_IBD_FILE_INITIAL_SIZE */ fil_encryption_t mode, /*!< in: encryption mode */ ulint key_id) /*!< in: encryption key_id */ - __attribute__((nonnull, warn_unused_result)); + MY_ATTRIBUTE((nonnull(2), warn_unused_result)); #ifndef UNIV_HOTBACKUP /** Try to adjust FSP_SPACE_FLAGS if they differ from the expectations. (Typically when upgrading from MariaDB 10.1.0..10.1.20.) @@ -1340,12 +1336,12 @@ struct PageCallback { Called for every page in the tablespace. If the page was not updated then its state must be set to BUF_PAGE_NOT_USED. For compressed tables the page descriptor memory will be at offset: - block->frame + UNIV_PAGE_SIZE; + block->frame + UNIV_PAGE_SIZE; @param offset - physical offset within the file @param block - block read from file, note it is not from the buffer pool @retval DB_SUCCESS or error code. */ virtual dberr_t operator()( - os_offset_t offset, + os_offset_t offset, buf_block_t* block) UNIV_NOTHROW = 0; /** @@ -1353,7 +1349,7 @@ struct PageCallback { to open it for the file that is being iterated over. @param filename - then physical name of the tablespace file. @param file - OS file handle */ - void set_file(const char* filename, os_file_t file) UNIV_NOTHROW + void set_file(const char* filename, pfs_os_file_t file) UNIV_NOTHROW { m_file = file; m_filepath = filename; @@ -1389,7 +1385,7 @@ struct PageCallback { ulint m_page_size; /** File handle to the tablespace */ - os_file_t m_file; + pfs_os_file_t m_file; /** Physical file path. */ const char* m_filepath; diff --git a/storage/xtradb/include/fil0pagecompress.h b/storage/xtradb/include/fil0pagecompress.h index 73667c5420e..03e16699ce3 100644 --- a/storage/xtradb/include/fil0pagecompress.h +++ b/storage/xtradb/include/fil0pagecompress.h @@ -65,9 +65,8 @@ fil_compress_page( ulint level, /* in: compression level */ ulint block_size, /*!< in: block size */ bool encrypted, /*!< in: is page also encrypted */ - ulint* out_len, /*!< out: actual length of compressed + ulint* out_len); /*!< out: actual length of compressed page */ - byte* lzo_mem); /*!< in: temporal memory used by LZO */ /****************************************************************//** For page compressed pages decompress the page after actual read diff --git a/storage/xtradb/include/fsp0fsp.h b/storage/xtradb/include/fsp0fsp.h index 6ed78eba6f9..715572199ab 100644 --- a/storage/xtradb/include/fsp0fsp.h +++ b/storage/xtradb/include/fsp0fsp.h @@ -519,16 +519,14 @@ fsp_header_init_fields( ulint space_id, /*!< in: space id */ ulint flags); /*!< in: tablespace flags (FSP_SPACE_FLAGS): 0, or table->flags if newer than COMPACT */ -/**********************************************************************//** -Initializes the space header of a new created space and creates also the -insert buffer tree root if space == 0. */ +/** Initialize a tablespace header. +@param[in] space_id space id +@param[in] size current size in blocks +@param[in,out] mtr mini-transaction */ UNIV_INTERN void -fsp_header_init( -/*============*/ - ulint space, /*!< in: space id */ - ulint size, /*!< in: current size in blocks */ - mtr_t* mtr); /*!< in/out: mini-transaction */ +fsp_header_init(ulint space_id, ulint size, mtr_t* mtr); + /**********************************************************************//** Increases the space size field of a space. */ UNIV_INTERN diff --git a/storage/xtradb/include/ha0ha.h b/storage/xtradb/include/ha0ha.h index 7351b407e8c..58eb581e76a 100644 --- a/storage/xtradb/include/ha0ha.h +++ b/storage/xtradb/include/ha0ha.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -107,7 +107,7 @@ chosen to be a slightly bigger prime number. @param level in: level of the mutexes in the latching order @param n_m in: number of mutexes to protect the hash table; must be a power of 2, or 0 */ -# define ha_create(n_c,n_m,type,level) ha_create_func(n_c,level,n_m,type) +# define ib_create(n_c,n_m,type,level) ha_create_func(n_c,level,n_m,type) #else /* UNIV_SYNC_DEBUG */ /** Creates a hash table. @return own: created table @@ -116,7 +116,7 @@ chosen to be a slightly bigger prime number. @param level in: level of the mutexes in the latching order @param n_m in: number of mutexes to protect the hash table; must be a power of 2, or 0 */ -# define ha_create(n_c,n_m,type,level) ha_create_func(n_c,n_m,type) +# define ib_create(n_c,n_m,type,level) ha_create_func(n_c,n_m,type) #endif /* UNIV_SYNC_DEBUG */ /*************************************************************//** diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h index a161ec8c06c..b053be9e61d 100644 --- a/storage/xtradb/include/ha_prototypes.h +++ b/storage/xtradb/include/ha_prototypes.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 2006, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -144,7 +145,7 @@ enum durability_properties thd_requested_durability( /*=====================*/ const THD* thd) /*!< in: thread handle */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); + MY_ATTRIBUTE((warn_unused_result)); /******************************************************************//** Returns true if the transaction this thread is processing has edited diff --git a/storage/xtradb/include/log0online.h b/storage/xtradb/include/log0online.h index 722336dd6b4..5c3e7d07fd9 100644 --- a/storage/xtradb/include/log0online.h +++ b/storage/xtradb/include/log0online.h @@ -130,7 +130,7 @@ log_online_bitmap_iterator_next( /** Struct for single bitmap file information */ struct log_online_bitmap_file_struct { char name[FN_REFLEN]; /*!< Name with full path */ - os_file_t file; /*!< Handle to opened file */ + pfs_os_file_t file; /*!< Handle to opened file */ ib_uint64_t size; /*!< Size of the file */ os_offset_t offset; /*!< Offset of the next read, or count of already-read bytes diff --git a/storage/xtradb/include/log0recv.h b/storage/xtradb/include/log0recv.h index e7b6a937f01..73d53d2ddab 100644 --- a/storage/xtradb/include/log0recv.h +++ b/storage/xtradb/include/log0recv.h @@ -137,26 +137,25 @@ a freshly read page) */ # define recv_recover_page(jri, block) recv_recover_page_func(block) #endif /* !UNIV_HOTBACKUP */ -/********************************************************//** -Recovers from a checkpoint. When this function returns, the database is able + +/** Recovers from a checkpoint. When this function returns, the database is able to start processing of new user transactions, but the function recv_recovery_from_checkpoint_finish should be called later to complete the recovery and free the resources used in it. +@param[in] type LOG_CHECKPOINT or LOG_ARCHIVE +@param[in] limit_lsn recover up to this lsn if possible +@param[in] flushed_lsn flushed lsn from first data file @return error code or DB_SUCCESS */ UNIV_INTERN dberr_t recv_recovery_from_checkpoint_start_func( -/*=====================================*/ #ifdef UNIV_LOG_ARCHIVE - ulint type, /*!< in: LOG_CHECKPOINT or - LOG_ARCHIVE */ - lsn_t limit_lsn, /*!< in: recover up to this lsn - if possible */ + ulint type, + lsn_t limit_lsn, #endif /* UNIV_LOG_ARCHIVE */ - lsn_t min_flushed_lsn,/*!< in: min flushed lsn from - data files */ - lsn_t max_flushed_lsn);/*!< in: max flushed lsn from - data files */ + lsn_t flushed_lsn) + MY_ATTRIBUTE((warn_unused_result)); + #ifdef UNIV_LOG_ARCHIVE /** Wrapper for recv_recovery_from_checkpoint_start_func(). Recovers from a checkpoint. When this function returns, the database is able @@ -165,11 +164,10 @@ recv_recovery_from_checkpoint_finish should be called later to complete the recovery and free the resources used in it. @param type in: LOG_CHECKPOINT or LOG_ARCHIVE @param lim in: recover up to this log sequence number if possible -@param min in: minimum flushed log sequence number from data files -@param max in: maximum flushed log sequence number from data files +@param lsn in: flushed log sequence number from first data file @return error code or DB_SUCCESS */ -# define recv_recovery_from_checkpoint_start(type,lim,min,max) \ - recv_recovery_from_checkpoint_start_func(type,lim,min,max) +# define recv_recovery_from_checkpoint_start(type,lim,lsn) \ + recv_recovery_from_checkpoint_start_func(type,lim,lsn) #else /* UNIV_LOG_ARCHIVE */ /** Wrapper for recv_recovery_from_checkpoint_start_func(). Recovers from a checkpoint. When this function returns, the database is able @@ -178,12 +176,12 @@ recv_recovery_from_checkpoint_finish should be called later to complete the recovery and free the resources used in it. @param type ignored: LOG_CHECKPOINT or LOG_ARCHIVE @param lim ignored: recover up to this log sequence number if possible -@param min in: minimum flushed log sequence number from data files -@param max in: maximum flushed log sequence number from data files +@param lsn in: flushed log sequence number from first data file @return error code or DB_SUCCESS */ -# define recv_recovery_from_checkpoint_start(type,lim,min,max) \ - recv_recovery_from_checkpoint_start_func(min,max) +# define recv_recovery_from_checkpoint_start(type,lim,lsn) \ + recv_recovery_from_checkpoint_start_func(lsn) #endif /* UNIV_LOG_ARCHIVE */ + /********************************************************//** Completes recovery from a checkpoint. */ UNIV_INTERN diff --git a/storage/xtradb/include/mach0data.ic b/storage/xtradb/include/mach0data.ic index 3904d96c09f..3b1cf9c0378 100644 --- a/storage/xtradb/include/mach0data.ic +++ b/storage/xtradb/include/mach0data.ic @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -779,13 +780,13 @@ mach_swap_byte_order( dest += len; switch (len & 0x7) { - case 0: *--dest = *from++; - case 7: *--dest = *from++; - case 6: *--dest = *from++; - case 5: *--dest = *from++; - case 4: *--dest = *from++; - case 3: *--dest = *from++; - case 2: *--dest = *from++; + case 0: *--dest = *from++; /* fall through */ + case 7: *--dest = *from++; /* fall through */ + case 6: *--dest = *from++; /* fall through */ + case 5: *--dest = *from++; /* fall through */ + case 4: *--dest = *from++; /* fall through */ + case 3: *--dest = *from++; /* fall through */ + case 2: *--dest = *from++; /* fall through */ case 1: *--dest = *from; } } diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index 06bb6a6fbac..b17e09cf0fa 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -1,6 +1,6 @@ /*********************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. Copyright (c) 2013, 2017, MariaDB Corporation. @@ -65,26 +65,54 @@ extern ibool os_aio_print_debug; /** File offset in bytes */ typedef ib_uint64_t os_offset_t; -#ifdef __WIN__ -#define SRV_PATH_SEPARATOR '\\' +#ifdef _WIN32 +# define SRV_PATH_SEPARATOR '\\' /** File handle */ -# define os_file_t HANDLE -# define os_file_invalid INVALID_HANDLE_VALUE +typedef HANDLE os_file_t; /** Convert a C file descriptor to a native file handle @param fd file descriptor @return native file handle */ -# define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd) +# define OS_FILE_FROM_FD(fd) reinterpret_cast<HANDLE>(_get_osfhandle(fd)) #else -#define SRV_PATH_SEPARATOR '/' +# define SRV_PATH_SEPARATOR '/' /** File handle */ typedef int os_file_t; -# define os_file_invalid (-1) /** Convert a C file descriptor to a native file handle @param fd file descriptor @return native file handle */ # define OS_FILE_FROM_FD(fd) fd #endif +/** File descriptor with optional PERFORMANCE_SCHEMA instrumentation */ +struct pfs_os_file_t +{ + /** Default constructor */ + pfs_os_file_t() : m_file( +#ifdef _WIN32 + INVALID_HANDLE_VALUE +#else + -1 +#endif + ) +#ifdef UNIV_PFS_IO + , m_psi(NULL) +#endif + {} + + /** The wrapped file handle */ + os_file_t m_file; +#ifdef UNIV_PFS_IO + /** PERFORMANCE_SCHEMA descriptor */ + struct PSI_file *m_psi; +#endif + /** Implicit type conversion. + @return the wrapped file handle */ + operator os_file_t() const { return m_file; } + /** Assignment operator. + @param[in] file file handle to be assigned */ + void operator=(os_file_t file) { m_file = file; } +}; + /** Umask for creating files */ extern ulint os_innodb_umask; @@ -120,6 +148,21 @@ enum os_file_create_t { ON_ERROR_NO_EXIT is set */ }; +/** Options for os_file_advise_func @{ */ +enum os_file_advise_t { + OS_FILE_ADVISE_NORMAL = 1, /*!< no advice on access pattern + (default) */ + OS_FILE_ADVISE_RANDOM = 2, /*!< access in random order */ + OS_FILE_ADVISE_SEQUENTIAL = 4, /*!< access the specified data + sequentially (with lower offsets read + before higher ones) */ + OS_FILE_ADVISE_WILLNEED = 8, /*!< specified data will be accessed + in the near future */ + OS_FILE_ADVISE_DONTNEED = 16, /*!< specified data will not be + accessed in the near future */ + OS_FILE_ADVISE_NOREUSE = 32 /*!< access only once */ +}; + #define OS_FILE_READ_ONLY 333 #define OS_FILE_READ_WRITE 444 #define OS_FILE_READ_ALLOW_DELETE 555 /* for mysqlbackup */ @@ -221,6 +264,8 @@ extern mysql_pfs_key_t innodb_file_bmp_key; various file I/O operations with performance schema. 1) register_pfs_file_open_begin() and register_pfs_file_open_end() are used to register file creation, opening, closing and renaming. +2) register_pfs_file_rename_begin() and register_pfs_file_rename_end() +are used to register file renaming 2) register_pfs_file_io_begin() and register_pfs_file_io_end() are used to register actual file read, write and flush 3) register_pfs_file_close_begin() and register_pfs_file_close_end() @@ -230,17 +275,30 @@ are used to register file deletion operations*/ do { \ locker = PSI_FILE_CALL(get_thread_file_name_locker)( \ state, key, op, name, &locker); \ - if (UNIV_LIKELY(locker != NULL)) { \ + if (locker != NULL) { \ PSI_FILE_CALL(start_file_open_wait)( \ locker, src_file, src_line); \ } \ } while (0) -# define register_pfs_file_open_end(locker, file) \ +# define register_pfs_file_open_end(locker, file, result) \ do { \ - if (UNIV_LIKELY(locker != NULL)) { \ - PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(\ - locker, file); \ + if (locker != NULL) { \ + file.m_psi = PSI_FILE_CALL( \ + end_file_open_wait)( \ + locker, result); \ + } \ +} while (0) + +# define register_pfs_file_rename_begin(state, locker, key, op, name, \ + src_file, src_line) \ + register_pfs_file_open_begin(state, locker, key, op, name, \ + src_file, src_line) \ + +# define register_pfs_file_rename_end(locker, result) \ +do { \ + if (locker != NULL) { \ + PSI_FILE_CALL(end_file_open_wait)(locker, result); \ } \ } while (0) @@ -266,9 +324,9 @@ do { \ # define register_pfs_file_io_begin(state, locker, file, count, op, \ src_file, src_line) \ do { \ - locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( \ - state, file, op); \ - if (UNIV_LIKELY(locker != NULL)) { \ + locker = PSI_FILE_CALL(get_thread_file_stream_locker)( \ + state, file.m_psi, op); \ + if (locker != NULL) { \ PSI_FILE_CALL(start_file_wait)( \ locker, count, src_file, src_line); \ } \ @@ -276,7 +334,7 @@ do { \ # define register_pfs_file_io_end(locker, count) \ do { \ - if (UNIV_LIKELY(locker != NULL)) { \ + if (locker != NULL) { \ PSI_FILE_CALL(end_file_wait)(locker, count); \ } \ } while (0) @@ -290,11 +348,16 @@ os_file_create os_file_create_simple os_file_create_simple_no_error_handling os_file_close +os_file_close_no_error_handling os_file_rename os_aio os_file_read os_file_read_no_error_handling +os_file_read_no_error_handling_int_fd os_file_write +os_file_write_int_fd +os_file_set_eof_at +os_file_allocate The wrapper functions have the prefix of "innodb_". */ @@ -315,6 +378,9 @@ The wrapper functions have the prefix of "innodb_". */ # define os_file_close(file) \ pfs_os_file_close_func(file, __FILE__, __LINE__) +# define os_file_close_no_error_handling(file) \ + pfs_os_file_close_no_error_handling_func(file, __FILE__, __LINE__) + # define os_aio(type, is_log, mode, name, file, buf, offset, \ n, page_size, message1, message2, space_id, \ trx, write_size) \ @@ -334,9 +400,18 @@ The wrapper functions have the prefix of "innodb_". */ pfs_os_file_read_no_error_handling_func(file, buf, offset, n, \ __FILE__, __LINE__) -# define os_file_write(name, file, buf, offset, n) \ - pfs_os_file_write_func(name, file, buf, offset, n, \ - __FILE__, __LINE__) +# define os_file_read_no_error_handling_int_fd( \ + file, buf, offset, n) \ + pfs_os_file_read_no_error_handling_int_fd_func( \ + file, buf, offset, n, __FILE__, __LINE__) + +# define os_file_write(name, file, buf, offset, n) \ + pfs_os_file_write_func(name, file, buf, offset, \ + n, __FILE__, __LINE__) + +# define os_file_write_int_fd(name, file, buf, offset, n) \ + pfs_os_file_write_int_fd_func(name, file, buf, offset, \ + n, __FILE__, __LINE__) # define os_file_flush(file) \ pfs_os_file_flush_func(file, __FILE__, __LINE__) @@ -349,6 +424,15 @@ The wrapper functions have the prefix of "innodb_". */ # define os_file_delete_if_exists(key, name) \ pfs_os_file_delete_if_exists_func(key, name, __FILE__, __LINE__) + +# define os_file_set_eof_at(file, new_len) \ + pfs_os_file_set_eof_at_func(file, new_len, __FILE__, __LINE__) + +# ifdef HAVE_POSIX_FALLOCATE +# define os_file_allocate(file, offset, len) \ + pfs_os_file_allocate_func(file, offset, len, __FILE__, __LINE__) +# endif + #else /* UNIV_PFS_IO */ /* If UNIV_PFS_IO is not defined, these I/O APIs point @@ -364,7 +448,11 @@ to original un-instrumented file I/O APIs */ os_file_create_simple_no_error_handling_func( \ name, create_mode, access, success, atomic_writes) -# define os_file_close(file) os_file_close_func(file) +# define os_file_close(file) \ + os_file_close_func(file) + +# define os_file_close_no_error_handling(file) \ + os_file_close_no_error_handling_func(file) # define os_aio(type, is_log, mode, name, file, buf, offset, n, page_size, message1, \ message2, space_id, trx, write_size) \ @@ -379,11 +467,17 @@ to original un-instrumented file I/O APIs */ # define os_file_read_no_error_handling(file, buf, offset, n) \ os_file_read_no_error_handling_func(file, buf, offset, n) +# define os_file_read_no_error_handling_int_fd( \ + file, buf, offset, n) \ + os_file_read_no_error_handling_func(file, buf, offset, n) +# define os_file_write_int_fd(name, file, buf, offset, n) \ + os_file_write_func(name, file, buf, offset, n) # define os_file_write(name, file, buf, offset, n) \ os_file_write_func(name, file, buf, offset, n) -# define os_file_flush(file) os_file_flush_func(file) + +# define os_file_flush(file) os_file_flush_func(file) # define os_file_rename(key, oldpath, newpath) \ os_file_rename_func(oldpath, newpath) @@ -393,6 +487,9 @@ to original un-instrumented file I/O APIs */ # define os_file_delete_if_exists(key, name) \ os_file_delete_if_exists_func(name) +# define os_file_set_eof_at(file, new_len) \ + os_file_set_eof_at_func(file, new_len) + #endif /* UNIV_PFS_IO */ /* File types for directory entry data type */ @@ -530,7 +627,7 @@ A simple function to open or create a file. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INTERN -os_file_t +pfs_os_file_t os_file_create_simple_no_error_handling_func( /*=========================================*/ const char* name, /*!< in: name of the file or path as a @@ -565,7 +662,7 @@ Opens an existing file or creates a new. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INTERN -os_file_t +pfs_os_file_t os_file_create_func( /*================*/ const char* name, /*!< in: name of the file or path as a @@ -626,6 +723,42 @@ ibool os_file_close_func( /*===============*/ os_file_t file); /*!< in, own: handle to a file */ +/***********************************************************************//** +NOTE! Use the corresponding macro os_file_close(), not directly this +function! +Closes a file handle. In case of error, error number can be retrieved with +os_file_get_last_error. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_close_no_error_handling_func( +/*===============*/ + os_file_t file); /*!< in, own: handle to a file */ + +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_set_eof_at(), not +directly this function! +Truncates a file at the specified position. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_set_eof_at_func( + os_file_t file, /*!< in: handle to a file */ + ib_uint64_t new_len);/*!< in: new file length */ + +#ifdef HAVE_POSIX_FALLOCATE +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_allocate(), not +directly this function! +Ensures that disk space is allocated for the file. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_allocate_func( + os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len); /*!< in: file region length */ +#endif #ifdef UNIV_PFS_IO /****************************************************************//** @@ -636,7 +769,7 @@ os_file_create_simple() which opens or creates a file. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_simple_func( /*===========================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -661,7 +794,7 @@ monitor file creation/open. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_simple_no_error_handling_func( /*=============================================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -687,7 +820,7 @@ Add instrumentation to monitor file creation/open. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_func( /*====================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -718,7 +851,20 @@ UNIV_INLINE ibool pfs_os_file_close_func( /*===================*/ - os_file_t file, /*!< in, own: handle to a file */ + pfs_os_file_t file, /*!< in, own: handle to a file */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_close_no_error_handling(), +not directly this function! +A performance schema instrumented wrapper function for +os_file_close_no_error_handling(). +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_close_no_error_handling_func( +/*===================*/ + pfs_os_file_t file, /*!< in, own: handle to a file */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ /*******************************************************************//** @@ -731,7 +877,7 @@ UNIV_INLINE ibool pfs_os_file_read_func( /*==================*/ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ @@ -750,7 +896,7 @@ UNIV_INLINE ibool pfs_os_file_read_no_error_handling_func( /*====================================*/ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ @@ -772,7 +918,7 @@ pfs_os_aio_func( ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read or from which to write */ os_offset_t offset, /*!< in: file offset where to read or write */ @@ -807,7 +953,7 @@ pfs_os_file_write_func( /*===================*/ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ const void* buf, /*!< in: buffer from which to write */ os_offset_t offset, /*!< in: file offset where to write */ ulint n, /*!< in: number of bytes to write */ @@ -824,7 +970,7 @@ UNIV_INLINE ibool pfs_os_file_flush_func( /*===================*/ - os_file_t file, /*!< in, own: handle to a file */ + pfs_os_file_t file, /*!< in, own: handle to a file */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ @@ -876,16 +1022,66 @@ pfs_os_file_delete_if_exists_func( string */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ + +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_set_eof_at(), not +directly this function! +This is the performance schema instrumented wrapper function for +os_file_set_eof_at() +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_set_eof_at_func( + pfs_os_file_t file, /*!< in: handle to a file */ + ib_uint64_t new_len,/*!< in: new file length */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ + +#ifdef HAVE_POSIX_FALLOCATE +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_allocate(), not +directly this function! +Ensures that disk space is allocated for the file. +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_allocate_func( + pfs_os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len, /*!< in: file region length */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ +#endif + #endif /* UNIV_PFS_IO */ /***********************************************************************//** -Closes a file handle. -@return TRUE if success */ +Checks if the file is marked as invalid. +@return TRUE if invalid */ UNIV_INTERN -ibool -os_file_close_no_error_handling( -/*============================*/ - os_file_t file); /*!< in, own: handle to a file */ +bool +os_file_is_invalid( + pfs_os_file_t file); /*!< in, own: handle to a file */ + +/***********************************************************************//** +Marks the file as invalid. */ +UNIV_INTERN +void +os_file_mark_invalid( + pfs_os_file_t* file); /*!< out: pointer to a handle to a file */ + +/***********************************************************************//** +Announces an intention to access file data in a specific pattern in the +future. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_advise( + pfs_os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len, /*!< in: file region length */ + ulint advice);/*!< in: advice for access pattern */ + /***********************************************************************//** Gets a file size. @return file size, or (os_offset_t) -1 on failure */ @@ -893,7 +1089,7 @@ UNIV_INTERN os_offset_t os_file_get_size( /*=============*/ - os_file_t file) /*!< in: handle to a file */ + pfs_os_file_t file) /*!< in: handle to a file */ MY_ATTRIBUTE((warn_unused_result)); /** Set the size of a newly created file. @param[in] name file name @@ -905,7 +1101,7 @@ UNIV_INTERN bool os_file_set_size( const char* name, - os_file_t file, + pfs_os_file_t file, os_offset_t size, bool is_sparse = false) MY_ATTRIBUTE((nonnull, warn_unused_result)); @@ -918,14 +1114,6 @@ os_file_set_eof( /*============*/ FILE* file); /*!< in: file to be truncated */ /***********************************************************************//** -Truncates a file at the specified position. -@return TRUE if success */ -UNIV_INTERN -ibool -os_file_set_eof_at( - os_file_t file, /*!< in: handle to a file */ - ib_uint64_t new_len);/*!< in: new file length */ -/***********************************************************************//** NOTE! Use the corresponding macro os_file_flush(), not directly this function! Flushes the write buffers of a given file to the disk. @return TRUE if success */ @@ -1155,7 +1343,7 @@ os_aio_func( caution! */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read or from which to write */ os_offset_t offset, /*!< in: file offset where to read or write */ diff --git a/storage/xtradb/include/os0file.ic b/storage/xtradb/include/os0file.ic index b284d7ea9ac..72ac9d9dd6a 100644 --- a/storage/xtradb/include/os0file.ic +++ b/storage/xtradb/include/os0file.ic @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, SkySQL Ab. All Rights Reserved. +Copyright (c) 2010, 2017, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -35,7 +35,7 @@ os_file_create_simple() which opens or creates a file. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_simple_func( /*===========================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -50,7 +50,7 @@ pfs_os_file_create_simple_func( const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { - os_file_t file; + pfs_os_file_t file; struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; @@ -64,8 +64,9 @@ pfs_os_file_create_simple_func( file = os_file_create_simple_func(name, create_mode, access_type, success, atomic_writes); - /* Regsiter the returning "file" value with the system */ - register_pfs_file_open_end(locker, file); + /* Register psi value for the file */ + register_pfs_file_open_end(locker, file, + (*success == TRUE ? success : 0)); return(file); } @@ -79,7 +80,7 @@ monitor file creation/open. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_simple_no_error_handling_func( /*=============================================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -96,7 +97,7 @@ pfs_os_file_create_simple_no_error_handling_func( const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { - os_file_t file; + pfs_os_file_t file; struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; @@ -108,9 +109,10 @@ pfs_os_file_create_simple_no_error_handling_func( name, src_file, src_line); file = os_file_create_simple_no_error_handling_func( - name, create_mode, access_type, success, atomic_writes); + name, create_mode, access_type, success, atomic_writes); - register_pfs_file_open_end(locker, file); + register_pfs_file_open_end(locker, file, + (*success == TRUE ? success : 0)); return(file); } @@ -123,7 +125,7 @@ Add instrumentation to monitor file creation/open. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_func( /*====================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -144,7 +146,7 @@ pfs_os_file_create_func( const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { - os_file_t file; + pfs_os_file_t file; struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; @@ -158,7 +160,8 @@ pfs_os_file_create_func( file = os_file_create_func(name, create_mode, purpose, type, success, atomic_writes); - register_pfs_file_open_end(locker, file); + register_pfs_file_open_end(locker, file, + (*success == TRUE ? success : 0)); return(file); } @@ -172,7 +175,7 @@ UNIV_INLINE ibool pfs_os_file_close_func( /*===================*/ - os_file_t file, /*!< in, own: handle to a file */ + pfs_os_file_t file, /*!< in, own: handle to a file */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -190,6 +193,34 @@ pfs_os_file_close_func( return(result); } +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_close_no_error_handling(), +not directly this function! +A performance schema instrumented wrapper function for +os_file_close_no_error_handling(). +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_close_no_error_handling_func( +/*===================*/ + pfs_os_file_t file, /*!< in, own: handle to a file */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + bool result; + struct PSI_file_locker* locker = NULL; + PSI_file_locker_state state; + + /* register the file close */ + register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CLOSE, + src_file, src_line); + + result = os_file_close_no_error_handling_func(file); + + register_pfs_file_io_end(locker, 0); + + return(result); +} /*******************************************************************//** NOTE! Please use the corresponding macro os_aio(), not directly this @@ -206,7 +237,7 @@ pfs_os_aio_func( ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read or from which to write */ os_offset_t offset, /*!< in: file offset where to read or write */ @@ -260,7 +291,7 @@ UNIV_INLINE ibool pfs_os_file_read_func( /*==================*/ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ @@ -294,7 +325,7 @@ UNIV_INLINE ibool pfs_os_file_read_no_error_handling_func( /*====================================*/ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ @@ -315,6 +346,42 @@ pfs_os_file_read_no_error_handling_func( return(result); } +/** NOTE! Please use the corresponding macro +os_file_read_no_error_handling_int_fd(), not directly this function! +This is the performance schema instrumented wrapper function for +os_file_read_no_error_handling_int_fd_func() which requests a +synchronous read operation. +@return TRUE if request was successful, FALSE if fail */ +UNIV_INLINE +ibool +pfs_os_file_read_no_error_handling_int_fd_func( + int file, /*!< in: handle to a file */ + void* buf, /*!< in: buffer where to read */ + os_offset_t offset, /*!< in: file offset where to read */ + ulint n, /*!< in: number of bytes to read */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + PSI_file_locker_state state; + struct PSI_file_locker* locker; + + locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( + &state, file, PSI_FILE_READ); + if (locker != NULL) { + PSI_FILE_CALL(start_file_wait)( + locker, n, + __FILE__, __LINE__); + } + ibool result = os_file_read_no_error_handling_func( + OS_FILE_FROM_FD(file), buf, offset, n); + + if (locker != NULL) { + PSI_FILE_CALL(end_file_wait)(locker, n); + } + + return(result); +} + /*******************************************************************//** NOTE! Please use the corresponding macro os_file_write(), not directly this function! @@ -327,7 +394,7 @@ pfs_os_file_write_func( /*===================*/ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ const void* buf, /*!< in: buffer from which to write */ os_offset_t offset, /*!< in: file offset where to write */ ulint n, /*!< in: number of bytes to write */ @@ -348,6 +415,43 @@ pfs_os_file_write_func( return(result); } +/** NOTE! Please use the corresponding macro os_file_write(), not +directly this function! +This is the performance schema instrumented wrapper function for +os_file_write() which requests a synchronous write operation. +@return TRUE if request was successful, FALSE if fail */ +UNIV_INLINE +ibool +pfs_os_file_write_int_fd_func( + const char* name, /*!< in: name of the file or path as a + null-terminated string */ + int file, /*!< in: handle to a file */ + const void* buf, /*!< in: buffer from which to write */ + os_offset_t offset, /*!< in: file offset where to write */ + ulint n, /*!< in: number of bytes to write */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + PSI_file_locker_state state; + struct PSI_file_locker* locker = NULL; + + locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( + &state, file, PSI_FILE_WRITE); + if (locker != NULL) { + PSI_FILE_CALL(start_file_wait)( + locker, n, + __FILE__, __LINE__); + } + ibool result = os_file_write_func( + name, OS_FILE_FROM_FD(file), buf, offset, n); + + if (locker != NULL) { + PSI_FILE_CALL(end_file_wait)(locker, n); + } + + return(result); +} + /***********************************************************************//** NOTE! Please use the corresponding macro os_file_flush(), not directly this function! @@ -358,7 +462,7 @@ UNIV_INLINE ibool pfs_os_file_flush_func( /*===================*/ - os_file_t file, /*!< in, own: handle to a file */ + pfs_os_file_t file, /*!< in, own: handle to a file */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -396,12 +500,12 @@ pfs_os_file_rename_func( struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; - register_pfs_file_open_begin(&state, locker, key, PSI_FILE_RENAME, newpath, + register_pfs_file_rename_begin(&state, locker, key, PSI_FILE_RENAME, newpath, src_file, src_line); result = os_file_rename_func(oldpath, newpath); - register_pfs_file_open_end(locker, 0); + register_pfs_file_rename_end(locker, 0); return(result); } @@ -465,4 +569,61 @@ pfs_os_file_delete_if_exists_func( return(result); } + +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_set_eof_at(), not +directly this function! +This is the performance schema instrumented wrapper function for +os_file_set_eof_at() +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_set_eof_at_func( + pfs_os_file_t file, /*!< in: handle to a file */ + ib_uint64_t new_len,/*!< in: new file length */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + bool result; + struct PSI_file_locker* locker = NULL; + PSI_file_locker_state state; + + register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CHSIZE, + src_file, src_line); + result = os_file_set_eof_at_func(file, new_len); + + register_pfs_file_io_end(locker, 0); + + return(result); +} + +#ifdef HAVE_POSIX_FALLOCATE +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_allocate(), not +directly this function! +Ensures that disk space is allocated for the file. +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_allocate_func( + pfs_os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len, /*!< in: file region length */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + bool result; + struct PSI_file_locker* locker = NULL; + PSI_file_locker_state state; + + register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CHSIZE, + src_file, src_line); + result = os_file_allocate_func(file, offset, len); + + register_pfs_file_io_end(locker, 0); + + return(result); +} +#endif + #endif /* UNIV_PFS_IO */ diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h index 62f651413e1..ce03f6a2124 100644 --- a/storage/xtradb/include/os0sync.h +++ b/storage/xtradb/include/os0sync.h @@ -959,7 +959,14 @@ struct MY_ALIGNED(CACHE_LINE_SIZE) simple_counter { compile_time_assert(!atomic || sizeof(Type) == sizeof(ulint)); if (atomic) { - return os_atomic_increment_ulint(&m_counter, i); + /* GCC would perform a type check in this code + also in case the template is instantiated with + simple_counter<Type=not_ulint, atomic=false>. + On Solaris, os_atomic_increment_ulint() maps + to atomic_add_long_nv(), which expects the + parameter to be correctly typed. */ + return os_atomic_increment_ulint( + reinterpret_cast<ulint*>(&m_counter), i); } else { return m_counter += i; } diff --git a/storage/xtradb/include/page0zip.ic b/storage/xtradb/include/page0zip.ic index 6c7d8cd32c7..9a583086925 100644 --- a/storage/xtradb/include/page0zip.ic +++ b/storage/xtradb/include/page0zip.ic @@ -2,6 +2,7 @@ Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -172,7 +173,8 @@ page_zip_rec_needs_ext( ignored if zip_size == 0 */ ulint zip_size) /*!< in: compressed page size in bytes, or 0 */ { - ut_ad(rec_size > comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES); + ut_ad(rec_size + > (comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES)); ut_ad(ut_is_2pow(zip_size)); ut_ad(comp || !zip_size); diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h index 2bd17980896..a8503a5cfda 100644 --- a/storage/xtradb/include/row0mysql.h +++ b/storage/xtradb/include/row0mysql.h @@ -614,7 +614,7 @@ struct mysql_row_templ_t { Innobase record in the current index; not defined if template_type is ROW_MYSQL_WHOLE_ROW */ - ibool rec_field_is_prefix; /* is this field in a prefix index? */ + bool rec_field_is_prefix; /* is this field in a prefix index? */ ulint rec_prefix_field_no; /* record field, even if just a prefix; same as rec_field_no when not a prefix, otherwise rec_field_no is diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index cf7824d91e7..4e98ce0f1cb 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All rights reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, 2009, Google Inc. Copyright (c) 2009, Percona Inc. Copyright (c) 2013, 2017, MariaDB Corporation. @@ -1096,6 +1096,13 @@ UNIV_INTERN void srv_purge_wakeup(); +/** Check whether given space id is undo tablespace id +@param[in] space_id space id to check +@return true if it is undo tablespace else false. */ +bool +srv_is_undo_tablespace( + ulint space_id); + /** Status variables to be passed to MySQL */ struct export_var_t{ ulint innodb_adaptive_hash_hash_searches; diff --git a/storage/xtradb/include/srv0start.h b/storage/xtradb/include/srv0start.h index 963b767f0fb..b055a9d834f 100644 --- a/storage/xtradb/include/srv0start.h +++ b/storage/xtradb/include/srv0start.h @@ -1,6 +1,7 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -75,22 +76,12 @@ are not found and the user wants. @return DB_SUCCESS or error code */ UNIV_INTERN dberr_t -innobase_start_or_create_for_mysql(void); -/*====================================*/ -/****************************************************************//** -Shuts down the Innobase database. -@return DB_SUCCESS or error code */ -UNIV_INTERN -dberr_t -innobase_shutdown_for_mysql(void); +innobase_start_or_create_for_mysql(); -/******************************************************************** -Signal all per-table background threads to shutdown, and wait for them to do -so. */ +/** Shut down InnoDB. */ UNIV_INTERN void -srv_shutdown_table_bg_threads(void); -/*=============================*/ +innodb_shutdown(); /*************************************************************//** Copy the file path component of the physical file to parameter. It will @@ -139,6 +130,8 @@ extern ibool srv_startup_is_before_trx_rollback_phase; /** TRUE if a raw partition is in use */ extern ibool srv_start_raw_disk_in_use; +/** Undo tablespaces starts with space_id. */ +extern ulint srv_undo_space_id_start; /** Shutdown state */ enum srv_shutdown_state { @@ -156,6 +149,9 @@ enum srv_shutdown_state { SRV_SHUTDOWN_EXIT_THREADS/*!< Exit all threads */ }; +/** Whether any undo log records can be generated */ +extern bool srv_undo_sources; + /** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */ extern enum srv_shutdown_state srv_shutdown_state; diff --git a/storage/xtradb/include/trx0rec.h b/storage/xtradb/include/trx0rec.h index 359937e3583..a6e202d04e4 100644 --- a/storage/xtradb/include/trx0rec.h +++ b/storage/xtradb/include/trx0rec.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -212,10 +213,6 @@ UNIV_INTERN dberr_t trx_undo_report_row_operation( /*==========================*/ - ulint flags, /*!< in: if BTR_NO_UNDO_LOG_FLAG bit is - set, does nothing */ - ulint op_type, /*!< in: TRX_UNDO_INSERT_OP or - TRX_UNDO_MODIFY_OP */ que_thr_t* thr, /*!< in: query thread */ dict_index_t* index, /*!< in: clustered index */ const dtuple_t* clust_entry, /*!< in: in the case of an insert, @@ -233,7 +230,7 @@ trx_undo_report_row_operation( inserted undo log record, 0 if BTR_NO_UNDO_LOG flag was specified */ - MY_ATTRIBUTE((nonnull(3,4,10), warn_unused_result)); + MY_ATTRIBUTE((nonnull(1,2,8), warn_unused_result)); /******************************************************************//** Copies an undo record to heap. This function can be called if we know that the undo log record exists. @@ -313,10 +310,6 @@ record */ storage fields: used by purge to free the external storage */ -/* Operation type flags used in trx_undo_report_row_operation */ -#define TRX_UNDO_INSERT_OP 1 -#define TRX_UNDO_MODIFY_OP 2 - #ifndef UNIV_NONINL #include "trx0rec.ic" #endif diff --git a/storage/xtradb/include/trx0rseg.h b/storage/xtradb/include/trx0rseg.h index b9c84ef2b06..e2853df7045 100644 --- a/storage/xtradb/include/trx0rseg.h +++ b/storage/xtradb/include/trx0rseg.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -124,13 +125,13 @@ trx_rseg_mem_free( /*==============*/ trx_rseg_t* rseg); /*!< in, own: instance to free */ -/********************************************************************* -Creates a rollback segment. */ +/** Create a rollback segment. +@param[in] space undo tablespace ID +@return pointer to new rollback segment +@retval NULL on failure */ UNIV_INTERN trx_rseg_t* -trx_rseg_create( -/*============*/ - ulint space); /*!< in: id of UNDO tablespace */ +trx_rseg_create(ulint space); /******************************************************************** Get the number of unique rollback tablespaces in use except space id 0. diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h index 766d61039b4..1b490eca2af 100644 --- a/storage/xtradb/include/trx0trx.h +++ b/storage/xtradb/include/trx0trx.h @@ -1,8 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2016, MariaDB Corporation. All Rights Reserved. - +Copyright (c) 2015, 2017, MariaDB Corporation This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -334,6 +333,24 @@ trx_print_low( /*!< in: mem_heap_get_size(trx->lock.lock_heap) */ MY_ATTRIBUTE((nonnull)); +#ifdef WITH_WSREP +/**********************************************************************//** +Prints info about a transaction. +Transaction information may be retrieved without having trx_sys->mutex acquired +so it may not be completely accurate. The caller must own lock_sys->mutex +and the trx must have some locks to make sure that it does not escape +without locking lock_sys->mutex. */ +UNIV_INTERN +void +wsrep_trx_print_locking( +/*==============*/ + FILE* f, /*!< in: output stream */ + const trx_t* trx, /*!< in: transaction */ + ulint max_query_len) /*!< in: max query length to print, + or 0 to use the default max length */ + MY_ATTRIBUTE((nonnull)); +#endif /* WITH_WSREP */ + /**********************************************************************//** Prints info about a transaction. The caller must hold lock_sys->mutex and trx_sys->mutex. diff --git a/storage/xtradb/include/trx0xa.h b/storage/xtradb/include/trx0xa.h index 7caddfb7ba4..4d5adc68dcd 100644 --- a/storage/xtradb/include/trx0xa.h +++ b/storage/xtradb/include/trx0xa.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -24,6 +24,8 @@ this program; if not, write to the Free Software Foundation, Inc., #ifndef XA_H #define XA_H +#include "handler.h" + /* * Transaction branch identification: XID and NULLXID: */ @@ -35,17 +37,6 @@ this program; if not, write to the Free Software Foundation, Inc., #define MAXGTRIDSIZE 64 /*!< maximum size in bytes of gtrid */ #define MAXBQUALSIZE 64 /*!< maximum size in bytes of bqual */ -/** X/Open XA distributed transaction identifier */ -struct xid_t { - long formatID; /*!< format identifier; -1 - means that the XID is null */ - long gtrid_length; /*!< value from 1 through 64 */ - long bqual_length; /*!< value from 1 through 64 */ - char data[XIDDATASIZE]; /*!< distributed transaction - identifier */ -}; -/** X/Open XA distributed transaction identifier */ -typedef struct xid_t XID; #endif /** X/Open XA distributed transaction status codes */ /* @{ */ diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 310053b9145..23c8c0a659d 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -45,10 +45,10 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 5 #define INNODB_VERSION_MINOR 6 -#define INNODB_VERSION_BUGFIX 35 +#define INNODB_VERSION_BUGFIX 36 #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 80.0 +#define PERCONA_INNODB_VERSION 82.0 #endif /* Enable UNIV_LOG_ARCHIVE in XtraDB */ @@ -146,14 +146,8 @@ HAVE_PSI_INTERFACE is defined. */ #if defined HAVE_PSI_INTERFACE && !defined UNIV_HOTBACKUP # define UNIV_PFS_MUTEX # define UNIV_PFS_RWLOCK -/* For I/O instrumentation, performance schema rely -on a native descriptor to identify the file, this -descriptor could conflict with our OS level descriptor. -Disable IO instrumentation on Windows until this is -resolved */ -# ifndef __WIN__ -# define UNIV_PFS_IO -# endif + +# define UNIV_PFS_IO # define UNIV_PFS_THREAD /* There are mutexes/rwlocks that we want to exclude from diff --git a/storage/xtradb/include/ut0rnd.ic b/storage/xtradb/include/ut0rnd.ic index 024c59e553b..987dfac03c1 100644 --- a/storage/xtradb/include/ut0rnd.ic +++ b/storage/xtradb/include/ut0rnd.ic @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2009, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -237,16 +238,22 @@ ut_fold_binary( switch (len & 0x7) { case 7: fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + /* fall through */ case 6: fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + /* fall through */ case 5: fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + /* fall through */ case 4: fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + /* fall through */ case 3: fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + /* fall through */ case 2: fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); + /* fall through */ case 1: fold = ut_fold_ulint_pair(fold, (ulint)(*str++)); } diff --git a/storage/xtradb/lock/lock0lock.cc b/storage/xtradb/lock/lock0lock.cc index 717fbf02536..71612f66fcd 100644 --- a/storage/xtradb/lock/lock0lock.cc +++ b/storage/xtradb/lock/lock0lock.cc @@ -921,12 +921,18 @@ lock_reset_lock_and_trx_wait( const char* stmt2=NULL; size_t stmt_len; trx_id_t trx_id = 0; - stmt = innobase_get_stmt(lock->trx->mysql_thd, &stmt_len); + stmt = lock->trx->mysql_thd + ? innobase_get_stmt(lock->trx->mysql_thd, &stmt_len) + : NULL; if (lock->trx->lock.wait_lock && lock->trx->lock.wait_lock->trx) { trx_id = lock->trx->lock.wait_lock->trx->id; - stmt2 = innobase_get_stmt(lock->trx->lock.wait_lock->trx->mysql_thd, &stmt_len); + stmt2 = lock->trx->lock.wait_lock->trx->mysql_thd + ? innobase_get_stmt( + lock->trx->lock.wait_lock + ->trx->mysql_thd, &stmt_len) + : NULL; } ib_logf(IB_LOG_LEVEL_INFO, @@ -5636,13 +5642,11 @@ lock_rec_unlock( trx_mutex_exit(trx); stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len); - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: unlock row could not" - " find a %lu mode lock on the record\n", - (ulong) lock_mode); - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: current statement: %.*s\n", + + ib_logf(IB_LOG_LEVEL_ERROR, + "unlock row could not find a %u mode lock on the record;" + " statement=%.*s", + lock_mode, (int) stmt_len, stmt); return; diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc index 309de7daaf8..3252cd793c9 100644 --- a/storage/xtradb/log/log0log.cc +++ b/storage/xtradb/log/log0log.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Google Inc. Copyright (c) 2014, 2017, MariaDB Corporation. @@ -55,12 +55,13 @@ Created 12/9/1995 Heikki Tuuri #include "mem0mem.h" #include "buf0buf.h" #include "buf0flu.h" -#include "srv0srv.h" #include "lock0lock.h" #include "log0recv.h" #include "fil0fil.h" #include "dict0boot.h" -#include "dict0stats_bg.h" /* dict_stats_event */ +#include "dict0stats_bg.h" +#include "dict0stats_bg.h" +#include "btr0defragment.h" #include "srv0srv.h" #include "srv0start.h" #include "trx0sys.h" @@ -2804,7 +2805,7 @@ log_group_archive( /*==============*/ log_group_t* group) /*!< in: log group */ { - os_file_t file_handle; + pfs_os_file_t file_handle; lsn_t start_lsn; lsn_t end_lsn; char name[OS_FILE_MAX_PATH]; @@ -3618,6 +3619,8 @@ loop: thread_name = "lock_wait_timeout_thread"; } else if (srv_buf_dump_thread_active) { thread_name = "buf_dump_thread"; + } else if (btr_defragment_thread_active) { + thread_name = "btr_defragment_thread"; } else if (srv_fast_shutdown != 2 && trx_rollback_or_clean_is_active) { thread_name = "rollback of recovered transactions"; } else { @@ -3639,8 +3642,8 @@ wait_suspend_loop: switch (srv_get_active_thread_type()) { case SRV_NONE: - srv_shutdown_state = SRV_SHUTDOWN_FLUSH_PHASE; if (!srv_n_fil_crypt_threads_started) { + srv_shutdown_state = SRV_SHUTDOWN_FLUSH_PHASE; break; } os_event_set(fil_crypt_threads_event); @@ -3820,7 +3823,8 @@ wait_suspend_loop: ut_a(freed); ut_a(lsn == log_sys->lsn); - ut_ad(lsn == log_sys->last_checkpoint_lsn); + ut_ad(srv_force_recovery >= SRV_FORCE_NO_LOG_REDO + || lsn == log_sys->last_checkpoint_lsn); if (lsn < srv_start_lsn) { ib_logf(IB_LOG_LEVEL_ERROR, @@ -3832,9 +3836,14 @@ wait_suspend_loop: srv_shutdown_lsn = lsn; if (!srv_read_only_mode) { - fil_write_flushed_lsn_to_data_files(lsn, 0); + dberr_t err = fil_write_flushed_lsn(lsn); - fil_flush_file_spaces(FIL_TABLESPACE); + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Failed to write flush lsn to the " + "system tablespace at shutdown err=%s", + ut_strerr(err)); + } } fil_close_all_files(); diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc index 74f2e2360a8..27382977e5c 100644 --- a/storage/xtradb/log/log0online.cc +++ b/storage/xtradb/log/log0online.cc @@ -329,7 +329,7 @@ log_online_read_last_tracked_lsn(void) lsn_t result; os_offset_t read_offset = log_bmp_sys->out.offset; - while (!checksum_ok && read_offset > 0 && !is_last_page) + while ((!checksum_ok || !is_last_page) && read_offset > 0) { read_offset -= MODIFIED_PAGE_BLOCK_SIZE; log_bmp_sys->out.offset = read_offset; @@ -554,9 +554,9 @@ log_online_rotate_bitmap_file( lsn_t next_file_start_lsn) /*!<in: the start LSN name part */ { - if (log_bmp_sys->out.file != os_file_invalid) { + if (!os_file_is_invalid(log_bmp_sys->out.file)) { os_file_close(log_bmp_sys->out.file); - log_bmp_sys->out.file = os_file_invalid; + os_file_mark_invalid(&log_bmp_sys->out.file); } log_bmp_sys->out_seq_num++; log_online_make_bitmap_name(next_file_start_lsn); @@ -723,7 +723,11 @@ log_online_read_init(void) } last_tracked_lsn = log_online_read_last_tracked_lsn(); + /* Do not rotate if we truncated the file to zero length - we + can just start writing there */ + const bool need_rotate = (last_tracked_lsn != 0); if (!last_tracked_lsn) { + last_tracked_lsn = last_file_start_lsn; } @@ -735,7 +739,8 @@ log_online_read_init(void) } else { file_start_lsn = tracking_start_lsn; } - ut_a(log_online_rotate_bitmap_file(file_start_lsn)); + ut_a(!need_rotate + || log_online_rotate_bitmap_file(file_start_lsn)); if (last_tracked_lsn < tracking_start_lsn) { @@ -773,9 +778,9 @@ log_online_read_shutdown(void) ib_rbt_node_t *free_list_node = log_bmp_sys->page_free_list; - if (log_bmp_sys->out.file != os_file_invalid) { + if (!os_file_is_invalid(log_bmp_sys->out.file)) { os_file_close(log_bmp_sys->out.file); - log_bmp_sys->out.file = os_file_invalid; + os_file_mark_invalid(&log_bmp_sys->out.file); } rbt_free(log_bmp_sys->modified_pages); @@ -1114,6 +1119,18 @@ log_online_write_bitmap_page( } }); + /* A crash injection site that ensures last checkpoint LSN > last + tracked LSN, so that LSN tracking for this interval is tested. */ + DBUG_EXECUTE_IF("crash_before_bitmap_write", + { + ulint space_id + = mach_read_from_4(block + + MODIFIED_PAGE_SPACE_ID); + if (space_id > 0) + DBUG_SUICIDE(); + }); + + ibool success = os_file_write(log_bmp_sys->out.name, log_bmp_sys->out.file, block, log_bmp_sys->out.offset, @@ -1137,10 +1154,8 @@ log_online_write_bitmap_page( return FALSE; } -#ifdef UNIV_LINUX - posix_fadvise(log_bmp_sys->out.file, log_bmp_sys->out.offset, - MODIFIED_PAGE_BLOCK_SIZE, POSIX_FADV_DONTNEED); -#endif + os_file_advise(log_bmp_sys->out.file, log_bmp_sys->out.offset, + MODIFIED_PAGE_BLOCK_SIZE, OS_FILE_ADVISE_DONTNEED); log_bmp_sys->out.offset += MODIFIED_PAGE_BLOCK_SIZE; return TRUE; @@ -1262,10 +1277,6 @@ log_online_follow_redo_log(void) group = UT_LIST_GET_NEXT(log_groups, group); } - /* A crash injection site that ensures last checkpoint LSN > last - tracked LSN, so that LSN tracking for this interval is tested. */ - DBUG_EXECUTE_IF("crash_before_bitmap_write", DBUG_SUICIDE();); - result = log_online_write_bitmap(); log_bmp_sys->start_lsn = log_bmp_sys->end_lsn; log_set_tracked_lsn(log_bmp_sys->start_lsn); @@ -1433,6 +1444,7 @@ log_online_setup_bitmap_file_range( if (UNIV_UNLIKELY(array_pos >= bitmap_files->count)) { log_online_diagnose_inconsistent_dir(bitmap_files); + os_file_closedir(bitmap_dir); return FALSE; } @@ -1535,10 +1547,8 @@ log_online_open_bitmap_file_read_only( bitmap_file->size = os_file_get_size(bitmap_file->file); bitmap_file->offset = 0; -#ifdef UNIV_LINUX - posix_fadvise(bitmap_file->file, 0, 0, POSIX_FADV_SEQUENTIAL); - posix_fadvise(bitmap_file->file, 0, 0, POSIX_FADV_NOREUSE); -#endif + os_file_advise(bitmap_file->file, 0, 0, OS_FILE_ADVISE_SEQUENTIAL); + os_file_advise(bitmap_file->file, 0, 0, OS_FILE_ADVISE_NOREUSE); return TRUE; } @@ -1624,7 +1634,7 @@ log_online_bitmap_iterator_init( /* Empty range */ i->in_files.count = 0; i->in_files.files = NULL; - i->in.file = os_file_invalid; + os_file_mark_invalid(&i->in.file); i->page = NULL; i->failed = FALSE; return TRUE; @@ -1642,7 +1652,7 @@ log_online_bitmap_iterator_init( if (i->in_files.count == 0) { /* Empty range */ - i->in.file = os_file_invalid; + os_file_mark_invalid(&i->in.file); i->page = NULL; i->failed = FALSE; return TRUE; @@ -1681,10 +1691,10 @@ log_online_bitmap_iterator_release( { ut_a(i); - if (i->in.file != os_file_invalid) { + if (!os_file_is_invalid(i->in.file)) { os_file_close(i->in.file); - i->in.file = os_file_invalid; + os_file_mark_invalid(&i->in.file); } if (i->in_files.files) { @@ -1738,8 +1748,9 @@ log_online_bitmap_iterator_next( /* Advance file */ i->in_i++; - success = os_file_close_no_error_handling(i->in.file); - i->in.file = os_file_invalid; + success = os_file_close_no_error_handling( + i->in.file); + os_file_mark_invalid(&i->in.file); if (UNIV_UNLIKELY(!success)) { os_file_get_last_error(TRUE); @@ -1848,7 +1859,7 @@ log_online_purge_changed_page_bitmaps( /* If we have to delete the current output file, close it first. */ os_file_close(log_bmp_sys->out.file); - log_bmp_sys->out.file = os_file_invalid; + os_file_mark_invalid(&log_bmp_sys->out.file); } for (i = 0; i < bitmap_files.count; i++) { diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc index 978e6051711..fb64309cee4 100644 --- a/storage/xtradb/log/log0recv.cc +++ b/storage/xtradb/log/log0recv.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. Copyright (c) 2013, 2017, MariaDB Corporation. @@ -332,6 +332,7 @@ DECLARE_THREAD(recv_writer_thread)( /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); ut_ad(!srv_read_only_mode); #ifdef UNIV_PFS_THREAD @@ -362,6 +363,7 @@ DECLARE_THREAD(recv_writer_thread)( recv_writer_thread_active = false; + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ @@ -3002,11 +3004,6 @@ recv_init_crash_recovery(void) possible */ if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { - - ib_logf(IB_LOG_LEVEL_INFO, - "Restoring possible half-written data pages " - "from the doublewrite buffer..."); - buf_dblwr_process(); /* Spawn the background thread to flush dirty pages @@ -3017,22 +3014,22 @@ recv_init_crash_recovery(void) } } -/********************************************************//** -Recovers from a checkpoint. When this function returns, the database is able +/** Recovers from a checkpoint. When this function returns, the database is able to start processing of new user transactions, but the function recv_recovery_from_checkpoint_finish should be called later to complete the recovery and free the resources used in it. +@param[in] type LOG_CHECKPOINT or LOG_ARCHIVE +@param[in] limit_lsn recover up to this lsn if possible +@param[in] flushed_lsn flushed lsn from first data file @return error code or DB_SUCCESS */ UNIV_INTERN dberr_t recv_recovery_from_checkpoint_start_func( -/*=====================================*/ #ifdef UNIV_LOG_ARCHIVE - ulint type, /*!< in: LOG_CHECKPOINT or LOG_ARCHIVE */ - lsn_t limit_lsn, /*!< in: recover up to this lsn if possible */ + ulint type, + lsn_t limit_lsn, #endif /* UNIV_LOG_ARCHIVE */ - lsn_t min_flushed_lsn,/*!< in: min flushed lsn from data files */ - lsn_t max_flushed_lsn)/*!< in: max flushed lsn from data files */ + lsn_t flushed_lsn) { log_group_t* group; log_group_t* max_cp_group; @@ -3260,6 +3257,7 @@ recv_recovery_from_checkpoint_start_func( group = UT_LIST_GET_NEXT(log_groups, group); } + /* Done with startup scan. Clear the flag. */ recv_log_scan_is_startup_type = FALSE; @@ -3272,38 +3270,16 @@ recv_recovery_from_checkpoint_start_func( there is something wrong we will print a message to the user about recovery: */ - if (checkpoint_lsn != max_flushed_lsn - || checkpoint_lsn != min_flushed_lsn) { - - if (checkpoint_lsn < max_flushed_lsn) { - - ib_logf(IB_LOG_LEVEL_WARN, - "The log sequence number " - "in the ibdata files is higher " - "than the log sequence number " - "in the ib_logfiles! Are you sure " - "you are using the right " - "ib_logfiles to start up the database. " - "Log sequence number in the " - "ib_logfiles is " LSN_PF ", log" - "sequence numbers stamped " - "to ibdata file headers are between " - "" LSN_PF " and " LSN_PF ".", - checkpoint_lsn, - min_flushed_lsn, - max_flushed_lsn); - } - + if (checkpoint_lsn != flushed_lsn) { if (!recv_needed_recovery) { ib_logf(IB_LOG_LEVEL_INFO, - "The log sequence numbers " - LSN_PF " and " LSN_PF - " in ibdata files do not match" + "The log sequence number " + LSN_PF + " in ibdata file do not match" " the log sequence number " LSN_PF " in the ib_logfiles!", - min_flushed_lsn, - max_flushed_lsn, + flushed_lsn, checkpoint_lsn); if (!srv_read_only_mode) { diff --git a/storage/xtradb/mysql-test/storage_engine/suite.pm b/storage/xtradb/mysql-test/storage_engine/suite.pm new file mode 100644 index 00000000000..e186a532dcc --- /dev/null +++ b/storage/xtradb/mysql-test/storage_engine/suite.pm @@ -0,0 +1,8 @@ +package My::Suite::SE::XtraDB; + +@ISA = qw(My::Suite); + +return "Need XtraDB engine"; + +bless { }; + diff --git a/storage/xtradb/mysql-test/storage_engine/tbl_opt_data_index_dir.rdiff b/storage/xtradb/mysql-test/storage_engine/tbl_opt_index_dir.rdiff index e09e50b17ec..e09e50b17ec 100644 --- a/storage/xtradb/mysql-test/storage_engine/tbl_opt_data_index_dir.rdiff +++ b/storage/xtradb/mysql-test/storage_engine/tbl_opt_index_dir.rdiff diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc index 4f219b18428..20b202506f5 100644 --- a/storage/xtradb/os/os0file.cc +++ b/storage/xtradb/os/os0file.cc @@ -1,6 +1,6 @@ /*********************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. Copyright (c) 2013, 2017, MariaDB Corporation. @@ -114,10 +114,12 @@ my_umask */ #ifndef __WIN__ /** Umask for creating files */ UNIV_INTERN ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; +# define os_file_invalid (-1) #else /** Umask for creating files */ UNIV_INTERN ulint os_innodb_umask = 0; -#define ECANCELED 125 +# define ECANCELED 125 +# define os_file_invalid INVALID_HANDLE_VALUE #endif /* __WIN__ */ #ifndef UNIV_HOTBACKUP @@ -221,7 +223,7 @@ struct os_aio_slot_t{ ulint page_size; /*!< UNIV_PAGE_SIZE or zip_size */ os_offset_t offset; /*!< file offset in bytes */ - os_file_t file; /*!< file where to read or write */ + pfs_os_file_t file; /*!< file where to read or write */ const char* name; /*!< file name or path */ ibool io_already_done;/*!< used only in simulated aio: TRUE if the physical i/o already @@ -1568,7 +1570,7 @@ A simple function to open or create a file. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INTERN -os_file_t +pfs_os_file_t os_file_create_simple_no_error_handling_func( /*=========================================*/ const char* name, /*!< in: name of the file or path as a @@ -1584,7 +1586,7 @@ os_file_create_simple_no_error_handling_func( ulint atomic_writes) /*! in: atomic writes table option value */ { - os_file_t file; + pfs_os_file_t file; atomic_writes_t awrites = (atomic_writes_t) atomic_writes; *success = FALSE; @@ -1593,7 +1595,6 @@ os_file_create_simple_no_error_handling_func( DWORD create_flag; DWORD attributes = 0; DWORD share_mode = FILE_SHARE_READ; - ut_a(name); ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT)); @@ -1610,8 +1611,8 @@ os_file_create_simple_no_error_handling_func( ib_logf(IB_LOG_LEVEL_ERROR, "Unknown file create mode (%lu) for file '%s'", create_mode, name); - - return((os_file_t) -1); + file = INVALID_HANDLE_VALUE; + return(file); } if (access_type == OS_FILE_READ_ONLY) { @@ -1635,8 +1636,8 @@ os_file_create_simple_no_error_handling_func( ib_logf(IB_LOG_LEVEL_ERROR, "Unknown file access type (%lu) for file '%s'", access_type, name); - - return((os_file_t) -1); + file = INVALID_HANDLE_VALUE; + return(file); } if (IS_XTRABACKUP()) { @@ -1668,11 +1669,10 @@ os_file_create_simple_no_error_handling_func( } } - *success = (file != INVALID_HANDLE_VALUE); + *success = file != INVALID_HANDLE_VALUE; #else /* __WIN__ */ int create_flag; const char* mode_str = NULL; - ut_a(name); if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) WAIT_ALLOW_WRITES(); @@ -1717,13 +1717,13 @@ os_file_create_simple_no_error_handling_func( ib_logf(IB_LOG_LEVEL_ERROR, "Unknown file create mode (%lu) for file '%s'", create_mode, name); - - return((os_file_t) -1); + file = -1; + return(file); } - file = ::open(name, create_flag, os_innodb_umask); + file = open(name, create_flag, os_innodb_umask); - *success = file == -1 ? FALSE : TRUE; + *success = file != -1; /* This function is always called for data files, we should disable OS caching (O_DIRECT) here as we do in os_file_create_func(), so @@ -1872,7 +1872,7 @@ Opens an existing file or creates a new. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INTERN -os_file_t +pfs_os_file_t os_file_create_func( /*================*/ const char* name, /*!< in: name of the file or path as a @@ -1890,7 +1890,7 @@ os_file_create_func( ulint atomic_writes) /*! in: atomic writes table option value */ { - os_file_t file; + pfs_os_file_t file; ibool retry; ibool on_error_no_exit; ibool on_error_silent; @@ -1901,14 +1901,16 @@ os_file_create_func( "ib_create_table_fail_disk_full", *success = FALSE; SetLastError(ERROR_DISK_FULL); - return((os_file_t) -1); + file = INVALID_HANDLE_VALUE; + return(file); ); #else /* __WIN__ */ DBUG_EXECUTE_IF( "ib_create_table_fail_disk_full", *success = FALSE; errno = ENOSPC; - return((os_file_t) -1); + file = -1; + return(file); ); #endif /* __WIN__ */ @@ -1962,7 +1964,8 @@ os_file_create_func( "Unknown file create mode (%lu) for file '%s'", create_mode, name); - return((os_file_t) -1); + file = INVALID_HANDLE_VALUE; + return(file); } DWORD attributes = 0; @@ -1986,8 +1989,8 @@ os_file_create_func( ib_logf(IB_LOG_LEVEL_ERROR, "Unknown purpose flag (%lu) while opening file '%s'", purpose, name); - - return((os_file_t)(-1)); + file = INVALID_HANDLE_VALUE; + return(file); } #ifdef UNIV_NON_BUFFERED_IO @@ -2113,7 +2116,8 @@ os_file_create_func( "Unknown file create mode (%lu) for file '%s'", create_mode, name); - return((os_file_t) -1); + file = -1; + return(file); } ut_a(type == OS_LOG_FILE || type == OS_DATA_FILE); @@ -2133,7 +2137,7 @@ os_file_create_func( #endif /* O_SYNC */ do { - file = ::open(name, create_flag, os_innodb_umask); + file = open(name, create_flag, os_innodb_umask); if (file == -1) { const char* operation; @@ -2442,8 +2446,8 @@ os_file_close_func( Closes a file handle. @return TRUE if success */ UNIV_INTERN -ibool -os_file_close_no_error_handling( +bool +os_file_close_no_error_handling_func( /*============================*/ os_file_t file) /*!< in, own: handle to a file */ { @@ -2453,10 +2457,10 @@ os_file_close_no_error_handling( ret = CloseHandle(file); if (ret) { - return(TRUE); + return(true); } - return(FALSE); + return(false); #else int ret; @@ -2464,10 +2468,83 @@ os_file_close_no_error_handling( if (ret == -1) { - return(FALSE); + return(false); } - return(TRUE); + return(true); +#endif /* __WIN__ */ +} + +#ifdef HAVE_POSIX_FALLOCATE +/***********************************************************************//** +Ensures that disk space is allocated for the file. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_allocate_func( + os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len) /*!< in: file region length */ +{ + return(posix_fallocate(file, offset, len) == 0); +} +#endif + +/***********************************************************************//** +Checks if the file is marked as invalid. +@return TRUE if invalid */ +UNIV_INTERN +bool +os_file_is_invalid( + pfs_os_file_t file) /*!< in, own: handle to a file */ +{ + return(file == os_file_invalid); +} + +/***********************************************************************//** +Marks the file as invalid. */ +UNIV_INTERN +void +os_file_mark_invalid( + pfs_os_file_t* file) /*!< out: pointer to a handle to a file */ +{ + file->m_file = os_file_invalid; +} + +/***********************************************************************//** +Announces an intention to access file data in a specific pattern in the +future. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_advise( + pfs_os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len, /*!< in: file region length */ + ulint advice)/*!< in: advice for access pattern */ +{ +#ifdef __WIN__ + return(true); +#else +#ifdef UNIV_LINUX + int native_advice = 0; + if ((advice & OS_FILE_ADVISE_NORMAL) != 0) + native_advice |= POSIX_FADV_NORMAL; + if ((advice & OS_FILE_ADVISE_RANDOM) != 0) + native_advice |= POSIX_FADV_RANDOM; + if ((advice & OS_FILE_ADVISE_SEQUENTIAL) != 0) + native_advice |= POSIX_FADV_SEQUENTIAL; + if ((advice & OS_FILE_ADVISE_WILLNEED) != 0) + native_advice |= POSIX_FADV_WILLNEED; + if ((advice & OS_FILE_ADVISE_DONTNEED) != 0) + native_advice |= POSIX_FADV_DONTNEED; + if ((advice & OS_FILE_ADVISE_NOREUSE) != 0) + native_advice |= POSIX_FADV_NOREUSE; + + return(posix_fadvise(file, offset, len, native_advice) == 0); +#else + return(true); +#endif #endif /* __WIN__ */ } @@ -2478,7 +2555,7 @@ UNIV_INTERN os_offset_t os_file_get_size( /*=============*/ - os_file_t file) /*!< in: handle to a file */ + pfs_os_file_t file) /*!< in: handle to a file */ { #ifdef __WIN__ os_offset_t offset; @@ -2496,6 +2573,7 @@ os_file_get_size( return(offset); #else return((os_offset_t) lseek(file, 0, SEEK_END)); + #endif /* __WIN__ */ } @@ -2509,7 +2587,7 @@ UNIV_INTERN bool os_file_set_size( const char* name, - os_file_t file, + pfs_os_file_t file, os_offset_t size, bool is_sparse) { @@ -2618,8 +2696,8 @@ os_file_set_eof( Truncates a file at the specified position. @return TRUE if success */ UNIV_INTERN -ibool -os_file_set_eof_at( +bool +os_file_set_eof_at_func( os_file_t file, /*!< in: handle to a file */ ib_uint64_t new_len)/*!< in: new file length */ { @@ -4553,7 +4631,7 @@ os_aio_array_reserve_slot( the aio operation */ void* message2,/*!< in: message to be passed along with the aio operation */ - os_file_t file, /*!< in: file handle */ + pfs_os_file_t file, /*!< in: file handle */ const char* name, /*!< in: name of the file or path as a null-terminated string */ void* buf, /*!< in: buffer where to read or from which @@ -4928,7 +5006,7 @@ os_aio_func( caution! */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read or from which to write */ os_offset_t offset, /*!< in: file offset where to read or write */ @@ -4958,7 +5036,6 @@ os_aio_func( BOOL ret; #endif ulint wake_later; - ut_ad(buf); ut_ad(n > 0); ut_ad(n % OS_MIN_LOG_BLOCK_SIZE == 0); @@ -5205,7 +5282,6 @@ os_aio_windows_handle( break; } } - *message1 = slot->message1; *message2 = slot->message2; @@ -5229,12 +5305,14 @@ os_aio_windows_handle( switch (slot->type) { case OS_FILE_WRITE: - ret_val = os_file_write(slot->name, slot->file, slot->buf, - slot->offset, slot->len); + ret_val = os_file_write( + slot->name, slot->file, slot->buf, + slot->offset, slot->len); break; case OS_FILE_READ: - ret_val = os_file_read(slot->file, slot->buf, - slot->offset, slot->len); + ret_val = os_file_read( + slot->file, slot->buf, + slot->offset, slot->len); break; default: ut_error; @@ -5503,12 +5581,14 @@ found: iocb = &(slot->control); if (slot->type == OS_FILE_READ) { - io_prep_pread(&slot->control, slot->file, slot->buf, - slot->len, (off_t) slot->offset); + io_prep_pread(&slot->control, slot->file, + slot->buf, slot->len, + (off_t) slot->offset); } else { ut_a(slot->type == OS_FILE_WRITE); - io_prep_pwrite(&slot->control, slot->file, slot->buf, - slot->len, (off_t) slot->offset); + io_prep_pwrite(&slot->control, slot->file, + slot->buf, slot->len, + (off_t) slot->offset); } /* Resubmit an I/O request */ submit_ret = io_submit(array->aio_ctx[segment], 1, &iocb); @@ -5742,7 +5822,6 @@ consecutive_loop: os_aio_slot_t* slot; slot = os_aio_array_get_nth_slot(array, i + segment * n); - if (slot->reserved && slot != aio_slot && slot->offset == aio_slot->offset + aio_slot->len @@ -6296,7 +6375,9 @@ os_file_trim( #ifdef __linux__ #if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) - int ret = fallocate(slot->file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len); + int ret = fallocate(slot->file, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + off, trim_len); if (ret) { /* After first failure do not try to trim again */ @@ -6342,22 +6423,27 @@ os_file_trim( flt.Ranges[0].Offset = off; flt.Ranges[0].Length = trim_len; + OVERLAPPED overlapped = { 0 }; + overlapped.hEvent = win_get_syncio_event(); BOOL ret = DeviceIoControl(slot->file, FSCTL_FILE_LEVEL_TRIM, - &flt, sizeof(flt), NULL, NULL, NULL, NULL); - + &flt, sizeof(flt), NULL, NULL, NULL, &overlapped); + DWORD tmp; + if (ret) { + ret = GetOverlappedResult(slot->file, &overlapped, &tmp, FALSE); + } + else if (GetLastError() == ERROR_IO_PENDING) { + ret = GetOverlappedResult(slot->file, &overlapped, &tmp, TRUE); + } if (!ret) { + DWORD last_error = GetLastError(); /* After first failure do not try to trim again */ os_fallocate_failed = true; srv_use_trim = FALSE; ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: fallocate call failed with error.\n" - " InnoDB: start: %lu len: %lu payload: %lu\n" - " InnoDB: Disabling fallocate for now.\n", off, trim_len, len); - os_file_handle_error_no_exit(slot->name, - " DeviceIOControl(FSCTL_FILE_LEVEL_TRIM) ", - FALSE, __FILE__, __LINE__); + fprintf(stderr, + " InnoDB: Warning: DeviceIoControl(FSCTL_FILE_LEVEL_TRIM) call failed with error %u%s. Disabling trimming.\n", + last_error, last_error == ERROR_NOT_SUPPORTED ? "(ERROR_NOT_SUPPORTED)" : ""); if (slot->write_size) { *slot->write_size = 0; diff --git a/storage/xtradb/rem/rem0rec.cc b/storage/xtradb/rem/rem0rec.cc index 6770748c38b..c62e8c90434 100644 --- a/storage/xtradb/rem/rem0rec.cc +++ b/storage/xtradb/rem/rem0rec.cc @@ -1293,8 +1293,10 @@ rec_convert_dtuple_to_rec_comp( } } - memcpy(end, dfield_get_data(field), len); - end += len; + if (len) { + memcpy(end, dfield_get_data(field), len); + end += len; + } } } diff --git a/storage/xtradb/row/row0ftsort.cc b/storage/xtradb/row/row0ftsort.cc index 4542aa31a6c..7ffcc59dc5f 100644 --- a/storage/xtradb/row/row0ftsort.cc +++ b/storage/xtradb/row/row0ftsort.cc @@ -249,9 +249,6 @@ row_fts_psort_info_init( each parallel sort thread. Each "sort bucket" holds records for a particular "FTS index partition" */ for (j = 0; j < fts_sort_pll_degree; j++) { - - UT_LIST_INIT(psort_info[j].fts_doc_list); - for (i = 0; i < FTS_NUM_AUX_INDEX; i++) { psort_info[j].merge_file[i] = diff --git a/storage/xtradb/row/row0import.cc b/storage/xtradb/row/row0import.cc index 81d6fda9e53..2f7aece665a 100644 --- a/storage/xtradb/row/row0import.cc +++ b/storage/xtradb/row/row0import.cc @@ -1995,6 +1995,7 @@ PageConverter::update_page( case FIL_PAGE_TYPE_XDES: err = set_current_xdes( buf_block_get_page_no(block), get_frame(block)); + /* fall through */ case FIL_PAGE_INODE: case FIL_PAGE_TYPE_TRX_SYS: case FIL_PAGE_IBUF_FREE_LIST: diff --git a/storage/xtradb/row/row0ins.cc b/storage/xtradb/row/row0ins.cc index f4f96d32c50..6072b303d3a 100644 --- a/storage/xtradb/row/row0ins.cc +++ b/storage/xtradb/row/row0ins.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -2166,14 +2167,10 @@ for a clustered index! @retval DB_SUCCESS if no error @retval DB_DUPLICATE_KEY if error, @retval DB_LOCK_WAIT if we have to wait for a lock on a possible duplicate -record -@retval DB_SUCCESS_LOCKED_REC if an exact match of the record was found -in online table rebuild (flags & (BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG)) */ +record */ static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t row_ins_duplicate_error_in_clust( -/*=============================*/ - ulint flags, /*!< in: undo logging and locking flags */ btr_cur_t* cursor, /*!< in: B-tree cursor */ const dtuple_t* entry, /*!< in: entry to insert */ que_thr_t* thr, /*!< in: query thread */ @@ -2454,7 +2451,7 @@ row_ins_clust_index_entry_low( DB_LOCK_WAIT */ err = row_ins_duplicate_error_in_clust( - flags, &cursor, entry, thr, &mtr); + &cursor, entry, thr, &mtr); } if (err != DB_SUCCESS) { diff --git a/storage/xtradb/row/row0log.cc b/storage/xtradb/row/row0log.cc index 666b59b42db..2cd663fd600 100644 --- a/storage/xtradb/row/row0log.cc +++ b/storage/xtradb/row/row0log.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -367,9 +368,9 @@ row_log_online_op( goto err_exit; } - ret = os_file_write( + ret = os_file_write_int_fd( "(modification log)", - OS_FILE_FROM_FD(log->fd), + log->fd, log->tail.block, byte_offset, srv_sort_buf_size); log->tail.blocks++; if (!ret) { @@ -483,9 +484,9 @@ row_log_table_close_func( goto err_exit; } - ret = os_file_write( + ret = os_file_write_int_fd( "(modification log)", - OS_FILE_FROM_FD(log->fd), + log->fd, log->tail.block, byte_offset, srv_sort_buf_size); log->tail.blocks++; if (!ret) { @@ -1880,6 +1881,7 @@ row_log_table_apply_update( When applying the subsequent ROW_T_DELETE, no matching record will be found. */ + /* fall through */ case DB_SUCCESS: ut_ad(row != NULL); break; @@ -2617,11 +2619,10 @@ all_done: goto func_exit; } - success = os_file_read_no_error_handling( - OS_FILE_FROM_FD(index->online_log->fd), + success = os_file_read_no_error_handling_int_fd( + index->online_log->fd, index->online_log->head.block, ofs, srv_sort_buf_size); - if (!success) { fprintf(stderr, "InnoDB: unable to read temporary file" " for table %s\n", index->table_name); @@ -3444,8 +3445,8 @@ all_done: goto func_exit; } - success = os_file_read_no_error_handling( - OS_FILE_FROM_FD(index->online_log->fd), + success = os_file_read_no_error_handling_int_fd( + index->online_log->fd, index->online_log->head.block, ofs, srv_sort_buf_size); diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc index 57b08801225..6a1298087eb 100644 --- a/storage/xtradb/row/row0merge.cc +++ b/storage/xtradb/row/row0merge.cc @@ -967,8 +967,8 @@ row_merge_read( } #endif /* UNIV_DEBUG */ - success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf, - ofs, srv_sort_buf_size); + success = os_file_read_no_error_handling_int_fd(fd, buf, + ofs, srv_sort_buf_size); /* For encrypted tables, decrypt data after reading and copy data */ if (crypt_data && crypt_buf) { @@ -1023,7 +1023,7 @@ row_merge_write( mach_write_to_4((byte *)out_buf, 0); } - ret = os_file_write("(merge)", OS_FILE_FROM_FD(fd), out_buf, ofs, buf_len); + ret = os_file_write_int_fd("(merge)", fd, out_buf, ofs, buf_len); #ifdef UNIV_DEBUG if (row_merge_print_block_write) { @@ -3427,14 +3427,21 @@ row_merge_file_create_low( performance schema */ struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; - register_pfs_file_open_begin(&state, locker, innodb_file_temp_key, - PSI_FILE_OPEN, - "Innodb Merge Temp File", - __FILE__, __LINE__); + locker = PSI_FILE_CALL(get_thread_file_name_locker)( + &state, innodb_file_temp_key, PSI_FILE_OPEN, + "Innodb Merge Temp File", &locker); + if (locker != NULL) { + PSI_FILE_CALL(start_file_open_wait)(locker, + __FILE__, + __LINE__); + } #endif fd = innobase_mysql_tmpfile(path); #ifdef UNIV_PFS_IO - register_pfs_file_open_end(locker, fd); + if (locker != NULL) { + PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)( + locker, fd); + } #endif if (fd < 0) { @@ -3481,15 +3488,20 @@ row_merge_file_destroy_low( #ifdef UNIV_PFS_IO struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; - register_pfs_file_io_begin(&state, locker, - fd, 0, PSI_FILE_CLOSE, - __FILE__, __LINE__); + locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( + &state, fd, PSI_FILE_CLOSE); + if (locker != NULL) { + PSI_FILE_CALL(start_file_wait)( + locker, 0, __FILE__, __LINE__); + } #endif if (fd >= 0) { close(fd); } #ifdef UNIV_PFS_IO - register_pfs_file_io_end(locker, 0); + if (locker != NULL) { + PSI_FILE_CALL(end_file_wait)(locker, 0); + } #endif } /*********************************************************************//** @@ -4025,6 +4037,7 @@ row_merge_build_indexes( for (i = 0; i < n_indexes; i++) { merge_files[i].fd = -1; + merge_files[i].offset = 0; } total_static_cost = COST_BUILD_INDEX_STATIC * n_indexes + COST_READ_CLUSTERED_INDEX; diff --git a/storage/xtradb/row/row0mysql.cc b/storage/xtradb/row/row0mysql.cc index 463981f51dd..59568f5c91b 100644 --- a/storage/xtradb/row/row0mysql.cc +++ b/storage/xtradb/row/row0mysql.cc @@ -5542,7 +5542,8 @@ loop: fputs(" InnoDB: Warning: CHECK TABLE on ", stderr); dict_index_name_print(stderr, prebuilt->trx, index); fprintf(stderr, " returned %lu\n", ret); - /* fall through (this error is ignored by CHECK TABLE) */ + /* (this error is ignored by CHECK TABLE) */ + /* fall through */ case DB_END_OF_INDEX: func_exit: mem_free(buf); diff --git a/storage/xtradb/row/row0purge.cc b/storage/xtradb/row/row0purge.cc index 8a1dbd6f69f..333677edf21 100644 --- a/storage/xtradb/row/row0purge.cc +++ b/storage/xtradb/row/row0purge.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -488,8 +489,9 @@ row_purge_remove_sec_if_poss_leaf( success = false; } } - /* fall through (the index entry is still needed, + /* (The index entry is still needed, or the deletion succeeded) */ + /* fall through */ case ROW_NOT_DELETED_REF: /* The index entry is still needed. */ case ROW_BUFFERED: diff --git a/storage/xtradb/row/row0sel.cc b/storage/xtradb/row/row0sel.cc index 7d4435eba5b..8e3ed3d1a4e 100644 --- a/storage/xtradb/row/row0sel.cc +++ b/storage/xtradb/row/row0sel.cc @@ -66,6 +66,8 @@ Created 12/19/1997 Heikki Tuuri #include "my_compare.h" /* enum icp_result */ +#include <vector> + /* Maximum number of rows to prefetch; MySQL interface has another parameter */ #define SEL_MAX_N_PREFETCH 16 @@ -2715,7 +2717,8 @@ row_sel_field_store_in_mysql_format_func( || !(templ->mysql_col_len % templ->mbmaxlen)); ut_ad(len * templ->mbmaxlen >= templ->mysql_col_len || (field_no == templ->icp_rec_field_no - && field->prefix_len > 0)); + && field->prefix_len > 0) + || templ->rec_field_is_prefix); ut_ad(!(field->prefix_len % templ->mbmaxlen)); if (templ->mbminlen == 1 && templ->mbmaxlen != 1) { @@ -2757,27 +2760,32 @@ row_sel_field_store_in_mysql_format_func( # define row_sel_store_mysql_field(m,p,r,i,o,f,t) \ row_sel_store_mysql_field_func(m,p,r,o,f,t) #endif /* UNIV_DEBUG */ -/**************************************************************//** -Convert a field in the Innobase format to a field in the MySQL format. */ +/** Convert a field in the Innobase format to a field in the MySQL format. +@param[out] mysql_rec record in the MySQL format +@param[in,out] prebuilt prebuilt struct +@param[in] rec InnoDB record; must be protected + by a page latch +@param[in] index index of rec +@param[in] offsets array returned by rec_get_offsets() +@param[in] field_no templ->rec_field_no or + templ->clust_rec_field_no + or templ->icp_rec_field_no + or sec field no if clust_templ_for_sec + is TRUE +@param[in] templ row template +*/ static MY_ATTRIBUTE((warn_unused_result)) ibool row_sel_store_mysql_field_func( -/*===========================*/ - byte* mysql_rec, /*!< out: record in the - MySQL format */ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct */ - const rec_t* rec, /*!< in: InnoDB record; - must be protected by - a page latch */ + byte* mysql_rec, + row_prebuilt_t* prebuilt, + const rec_t* rec, #ifdef UNIV_DEBUG - const dict_index_t* index, /*!< in: index of rec */ + const dict_index_t* index, #endif - const ulint* offsets, /*!< in: array returned by - rec_get_offsets() */ - ulint field_no, /*!< in: templ->rec_field_no or - templ->clust_rec_field_no or - templ->icp_rec_field_no */ - const mysql_row_templ_t*templ) /*!< in: row template */ + const ulint* offsets, + ulint field_no, + const mysql_row_templ_t*templ) { const byte* data; ulint len; @@ -2906,31 +2914,31 @@ row_sel_store_mysql_field_func( return(TRUE); } -/**************************************************************//** -Convert a row in the Innobase format to a row in the MySQL format. +/** Convert a row in the Innobase format to a row in the MySQL format. Note that the template in prebuilt may advise us to copy only a few columns to mysql_rec, other columns are left blank. All columns may not be needed in the query. +@param[out] mysql_rec row in the MySQL format +@param[in] prebuilt prebuilt structure +@param[in] rec Innobase record in the index + which was described in prebuilt's + template, or in the clustered index; + must be protected by a page latch +@param[in] rec_clust TRUE if the rec in the clustered index +@param[in] index index of rec +@param[in] offsets array returned by rec_get_offsets(rec) @return TRUE on success, FALSE if not all columns could be retrieved */ static MY_ATTRIBUTE((warn_unused_result)) ibool row_sel_store_mysql_rec( -/*====================*/ - byte* mysql_rec, /*!< out: row in the MySQL format */ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ - const rec_t* rec, /*!< in: Innobase record in the index - which was described in prebuilt's - template, or in the clustered index; - must be protected by a page latch */ - ibool rec_clust, /*!< in: TRUE if rec is in the - clustered index instead of - prebuilt->index */ - const dict_index_t* index, /*!< in: index of rec */ - const ulint* offsets) /*!< in: array returned by - rec_get_offsets(rec) */ + byte* mysql_rec, + row_prebuilt_t* prebuilt, + const rec_t* rec, + ibool rec_clust, + const dict_index_t* index, + const ulint* offsets) { ulint i; - ut_ad(rec_clust || index == prebuilt->index); ut_ad(!rec_clust || dict_index_is_clust(index)); @@ -2946,12 +2954,14 @@ row_sel_store_mysql_rec( ? templ->clust_rec_field_no : templ->rec_field_no; /* We should never deliver column prefixes to MySQL, - except for evaluating innobase_index_cond(). */ + except for evaluating innobase_index_cond() and if the prefix + index is longer than the actual row data. */ /* ...actually, we do want to do this in order to support the prefix query optimization. ut_ad(dict_index_get_nth_field(index, field_no)->prefix_len - == 0); + == 0 || templ->rec_field_is_prefix); + ...so we disable this assert. */ @@ -3681,7 +3691,7 @@ row_search_for_mysql( trx_t* trx = prebuilt->trx; dict_index_t* clust_index; que_thr_t* thr; - const rec_t* rec; + const rec_t* rec = NULL; const rec_t* result_rec = NULL; const rec_t* clust_rec; dberr_t err = DB_SUCCESS; @@ -3706,7 +3716,7 @@ row_search_for_mysql( ulint* offsets = offsets_; ibool table_lock_waited = FALSE; byte* next_buf = 0; - ibool use_clustered_index = FALSE; + bool use_clustered_index = false; rec_offs_init(offsets_); @@ -3966,7 +3976,8 @@ row_search_for_mysql( if (!row_sel_store_mysql_rec( buf, prebuilt, - rec, FALSE, index, offsets)) { + rec, FALSE, index, + offsets)) { /* Only fresh inserts may contain incomplete externally stored columns. Pretend that such @@ -4248,7 +4259,6 @@ rec_loop: } if (page_rec_is_supremum(rec)) { - if (set_also_gap_locks && !(srv_locks_unsafe_for_binlog || trx->isolation_level <= TRX_ISO_READ_COMMITTED) @@ -4777,17 +4787,17 @@ locks_ok: indexes are shorter than the prefix size This optimization can avoid many IOs for certain schemas. */ - ibool row_contains_all_values = TRUE; - int i; + bool row_contains_all_values = true; + unsigned int i; for (i = 0; i < prebuilt->n_template; i++) { /* Condition (1) from above: is the field in the index (prefix or not)? */ - mysql_row_templ_t* templ = + const mysql_row_templ_t* templ = prebuilt->mysql_template + i; ulint secondary_index_field_no = templ->rec_prefix_field_no; if (secondary_index_field_no == ULINT_UNDEFINED) { - row_contains_all_values = FALSE; + row_contains_all_values = false; break; } /* Condition (2) from above: if this is a @@ -4802,8 +4812,9 @@ locks_ok: index, secondary_index_field_no); ut_a(field->prefix_len > 0); - if (record_size >= field->prefix_len) { - row_contains_all_values = FALSE; + if (record_size >= field->prefix_len + / templ->mbmaxlen) { + row_contains_all_values = false; break; } } @@ -4819,7 +4830,7 @@ locks_ok: templ->rec_prefix_field_no; ut_a(templ->rec_field_no != ULINT_UNDEFINED); } - use_clustered_index = FALSE; + use_clustered_index = false; srv_stats.n_sec_rec_cluster_reads_avoided.inc(); } } diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index bf4b9124da7..cc5d1320142 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. Copyright (c) 2013, 2017, MariaDB Corporation. @@ -171,7 +171,8 @@ UNIV_INTERN unsigned long long srv_online_max_size; OS (provided we compiled Innobase with it in), otherwise we will use simulated aio we build below with threads. Currently we support native aio on windows and linux */ -UNIV_INTERN my_bool srv_use_native_aio = TRUE; +/* make srv_use_native_aio to be visible for other plugins */ +my_bool srv_use_native_aio = TRUE; UNIV_INTERN my_bool srv_numa_interleave = FALSE; /* Default compression level if page compression is used and no compression @@ -450,7 +451,7 @@ UNIV_INTERN my_bool srv_cleaner_thread_priority = FALSE; UNIV_INTERN my_bool srv_master_thread_priority = FALSE; /* The number of purge threads to use.*/ -UNIV_INTERN ulong srv_n_purge_threads = 1; +UNIV_INTERN ulong srv_n_purge_threads; /* the number of pages to purge in one batch */ UNIV_INTERN ulong srv_purge_batch_size = 20; @@ -689,16 +690,16 @@ UNIV_INTERN ulong srv_buf_dump_status_frequency = 0; /** Acquire the system_mutex. */ #define srv_sys_mutex_enter() do { \ - mutex_enter(&srv_sys->mutex); \ + mutex_enter(&srv_sys.mutex); \ } while (0) /** Test if the system mutex is owned. */ -#define srv_sys_mutex_own() (mutex_own(&srv_sys->mutex) \ +#define srv_sys_mutex_own() (mutex_own(&srv_sys.mutex) \ && !srv_read_only_mode) /** Release the system mutex. */ #define srv_sys_mutex_exit() do { \ - mutex_exit(&srv_sys->mutex); \ + mutex_exit(&srv_sys.mutex); \ } while (0) #define fetch_lock_wait_timeout(trx) \ @@ -793,7 +794,7 @@ struct srv_sys_t{ ulint n_sys_threads; /*!< size of the sys_threads array */ - srv_slot_t* sys_threads; /*!< server thread table; + srv_slot_t sys_threads[32 + 1]; /*!< server thread table; os_event_set() and os_event_reset() on sys_threads[]->event are @@ -817,7 +818,7 @@ struct srv_sys_t{ UNIV_INTERN ib_mutex_t server_mutex; #endif /* !HAVE_ATOMIC_BUILTINS */ -static srv_sys_t* srv_sys = NULL; +static srv_sys_t srv_sys; /** Event to signal srv_monitor_thread. Not protected by a mutex. Set after setting srv_print_innodb_monitor. */ @@ -839,10 +840,10 @@ and/or load it during startup. */ UNIV_INTERN char srv_buffer_pool_dump_at_shutdown = FALSE; UNIV_INTERN char srv_buffer_pool_load_at_startup = FALSE; -/** Slot index in the srv_sys->sys_threads array for the purge thread. */ +/** Slot index in the srv_sys.sys_threads array for the purge thread. */ static const ulint SRV_PURGE_SLOT = 1; -/** Slot index in the srv_sys->sys_threads array for the master thread. */ +/** Slot index in the srv_sys.sys_threads array for the master thread. */ static const ulint SRV_MASTER_SLOT = 0; UNIV_INTERN os_event_t srv_checkpoint_completed_event; @@ -952,21 +953,21 @@ srv_reserve_slot( switch (type) { case SRV_MASTER: - slot = &srv_sys->sys_threads[SRV_MASTER_SLOT]; + slot = &srv_sys.sys_threads[SRV_MASTER_SLOT]; break; case SRV_PURGE: - slot = &srv_sys->sys_threads[SRV_PURGE_SLOT]; + slot = &srv_sys.sys_threads[SRV_PURGE_SLOT]; break; case SRV_WORKER: /* Find an empty slot, skip the master and purge slots. */ - for (slot = &srv_sys->sys_threads[2]; + for (slot = &srv_sys.sys_threads[2]; slot->in_use; ++slot) { - ut_a(slot < &srv_sys->sys_threads[ - srv_sys->n_sys_threads]); + ut_a(slot < &srv_sys.sys_threads[ + srv_sys.n_sys_threads]); } break; @@ -982,7 +983,7 @@ srv_reserve_slot( ut_ad(srv_slot_get_type(slot) == type); - ++srv_sys->n_threads_active[type]; + ++srv_sys.n_threads_active[type]; srv_sys_mutex_exit(); @@ -1012,27 +1013,27 @@ srv_suspend_thread_low( case SRV_MASTER: /* We have only one master thread and it should be the first entry always. */ - ut_a(srv_sys->n_threads_active[type] == 1); + ut_a(srv_sys.n_threads_active[type] == 1); break; case SRV_PURGE: /* We have only one purge coordinator thread and it should be the second entry always. */ - ut_a(srv_sys->n_threads_active[type] == 1); + ut_a(srv_sys.n_threads_active[type] == 1); break; case SRV_WORKER: ut_a(srv_n_purge_threads > 1); - ut_a(srv_sys->n_threads_active[type] > 0); + ut_a(srv_sys.n_threads_active[type] > 0); break; } ut_a(!slot->suspended); slot->suspended = TRUE; - ut_a(srv_sys->n_threads_active[type] > 0); + ut_a(srv_sys.n_threads_active[type] > 0); - srv_sys->n_threads_active[type]--; + srv_sys.n_threads_active[type]--; return(os_event_reset(slot->event)); } @@ -1087,7 +1088,7 @@ srv_resume_thread(srv_slot_t* slot, ib_int64_t sig_count = 0, bool wait = true, ut_ad(slot->suspended); slot->suspended = FALSE; - ++srv_sys->n_threads_active[slot->type]; + ++srv_sys.n_threads_active[slot->type]; srv_sys_mutex_exit(); return(timeout); } @@ -1109,8 +1110,8 @@ srv_release_threads(enum srv_thread_type type, ulint n) srv_sys_mutex_enter(); - for (ulint i = 0; i < srv_sys->n_sys_threads; i++) { - srv_slot_t* slot = &srv_sys->sys_threads[i]; + for (ulint i = 0; i < srv_sys.n_sys_threads; i++) { + srv_slot_t* slot = &srv_sys.sys_threads[i]; if (!slot->in_use || srv_slot_get_type(slot) != type) { continue; @@ -1130,7 +1131,7 @@ srv_release_threads(enum srv_thread_type type, ulint n) should be the first entry always. */ ut_a(n == 1); ut_a(i == SRV_MASTER_SLOT); - ut_a(srv_sys->n_threads_active[type] == 0); + ut_a(srv_sys.n_threads_active[type] == 0); break; case SRV_PURGE: @@ -1139,12 +1140,12 @@ srv_release_threads(enum srv_thread_type type, ulint n) ut_a(n == 1); ut_a(i == SRV_PURGE_SLOT); ut_a(srv_n_purge_threads > 0); - ut_a(srv_sys->n_threads_active[type] == 0); + ut_a(srv_sys.n_threads_active[type] == 0); break; case SRV_WORKER: ut_a(srv_n_purge_threads > 1); - ut_a(srv_sys->n_threads_active[type] + ut_a(srv_sys.n_threads_active[type] < srv_n_purge_threads - 1); break; } @@ -1182,9 +1183,6 @@ void srv_init(void) /*==========*/ { - ulint n_sys_threads = 0; - ulint srv_sys_sz = sizeof(*srv_sys); - #ifndef HAVE_ATOMIC_BUILTINS mutex_create(server_mutex_key, &server_mutex, SYNC_ANY_LATCH); #endif /* !HAVE_ATOMIC_BUILTINS */ @@ -1192,29 +1190,19 @@ srv_init(void) mutex_create(srv_innodb_monitor_mutex_key, &srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK); - if (!srv_read_only_mode) { - - /* Number of purge threads + master thread */ - n_sys_threads = srv_n_purge_threads + 1; - - srv_sys_sz += n_sys_threads * sizeof(*srv_sys->sys_threads); - } - - srv_sys = static_cast<srv_sys_t*>(mem_zalloc(srv_sys_sz)); - - srv_sys->n_sys_threads = n_sys_threads; + srv_sys.n_sys_threads = srv_read_only_mode + ? 0 + : srv_n_purge_threads + 1/* purge coordinator */; if (!srv_read_only_mode) { - mutex_create(srv_sys_mutex_key, &srv_sys->mutex, SYNC_THREADS); + mutex_create(srv_sys_mutex_key, &srv_sys.mutex, SYNC_THREADS); mutex_create(srv_sys_tasks_mutex_key, - &srv_sys->tasks_mutex, SYNC_ANY_LATCH); - - srv_sys->sys_threads = (srv_slot_t*) &srv_sys[1]; + &srv_sys.tasks_mutex, SYNC_ANY_LATCH); - for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) { - srv_slot_t* slot = &srv_sys->sys_threads[i]; + for (ulint i = 0; i < srv_sys.n_sys_threads; ++i) { + srv_slot_t* slot = &srv_sys.sys_threads[i]; slot->event = os_event_create(); @@ -1234,8 +1222,6 @@ srv_init(void) if (srv_track_changed_pages) { os_event_set(srv_redo_log_tracked_event); } - - UT_LIST_INIT(srv_sys->tasks); } /* page_zip_stat_per_index_mutex is acquired from: @@ -1283,8 +1269,8 @@ srv_free(void) if (!srv_read_only_mode) { - for (ulint i = 0; i < srv_sys->n_sys_threads; i++) - os_event_free(srv_sys->sys_threads[i].event); + for (ulint i = 0; i < srv_sys.n_sys_threads; i++) + os_event_free(srv_sys.sys_threads[i].event); os_event_free(srv_error_event); srv_error_event = NULL; @@ -1296,8 +1282,8 @@ srv_free(void) srv_checkpoint_completed_event = NULL; os_event_free(srv_redo_log_tracked_event); srv_redo_log_tracked_event = NULL; - mutex_free(&srv_sys->mutex); - mutex_free(&srv_sys->tasks_mutex); + mutex_free(&srv_sys.mutex); + mutex_free(&srv_sys.tasks_mutex); } #ifdef WITH_INNODB_DISALLOW_WRITES @@ -1311,10 +1297,10 @@ srv_free(void) mutex_free(&srv_innodb_monitor_mutex); mutex_free(&page_zip_stat_per_index_mutex); - mem_free(srv_sys); - srv_sys = NULL; - trx_i_s_cache_free(trx_i_s_cache); + + /* This is needed for Mariabackup. */ + memset(&srv_sys, 0, sizeof srv_sys); } /*********************************************************************//** @@ -1793,8 +1779,10 @@ srv_export_innodb_status(void) buf_get_total_stat(&stat); buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len); buf_get_total_list_size_in_bytes(&buf_pools_list_size); - fil_crypt_total_stat(&crypt_stat); - btr_scrub_total_stat(&scrub_stat); + if (!srv_read_only_mode) { + fil_crypt_total_stat(&crypt_stat); + btr_scrub_total_stat(&scrub_stat); + } mem_adaptive_hash = 0; @@ -2108,6 +2096,7 @@ srv_export_innodb_status(void) export_vars.innodb_sec_rec_cluster_reads_avoided = srv_stats.n_sec_rec_cluster_reads_avoided; + if (!srv_read_only_mode) { export_vars.innodb_encryption_rotation_pages_read_from_cache = crypt_stat.pages_read_from_cache; export_vars.innodb_encryption_rotation_pages_read_from_disk = @@ -2135,6 +2124,7 @@ srv_export_innodb_status(void) scrub_stat.page_split_failures_missing_index; export_vars.innodb_scrub_page_split_failures_unknown = scrub_stat.page_split_failures_unknown; + } mutex_exit(&srv_innodb_monitor_mutex); } @@ -2289,7 +2279,7 @@ loop: } } - if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) { + if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { goto exit_func; } @@ -2427,7 +2417,7 @@ loop: os_event_wait_time_low(srv_error_event, 1000000, sig_count); - if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) { + if (srv_shutdown_state == SRV_SHUTDOWN_NONE) { goto loop; } @@ -2452,9 +2442,9 @@ srv_inc_activity_count( is caused by the background change buffer merge */ { - srv_sys->activity_count.inc(); + srv_sys.activity_count.inc(); if (ibuf_merge_activity) - srv_sys->ibuf_merge_activity_count.inc(); + srv_sys.ibuf_merge_activity_count.inc(); } /**********************************************************************//** @@ -2476,7 +2466,7 @@ srv_get_active_thread_type(void) srv_sys_mutex_enter(); for (ulint i = SRV_WORKER; i <= SRV_MASTER; ++i) { - if (srv_sys->n_threads_active[i] != 0) { + if (srv_sys.n_threads_active[i] != 0) { ret = static_cast<srv_thread_type>(i); break; } @@ -2599,7 +2589,8 @@ purge_archived_logs( if (dirnamelen + strlen(fileinfo.name) + 2 > OS_FILE_MAX_PATH) continue; - snprintf(archived_log_filename + dirnamelen, OS_FILE_MAX_PATH, + snprintf(archived_log_filename + dirnamelen, + OS_FILE_MAX_PATH - dirnamelen - 1, "%s", fileinfo.name); if (before_no) { @@ -2695,12 +2686,12 @@ srv_active_wake_master_thread(void) srv_inc_activity_count(); - if (srv_sys->n_threads_active[SRV_MASTER] == 0) { + if (srv_sys.n_threads_active[SRV_MASTER] == 0) { srv_slot_t* slot; srv_sys_mutex_enter(); - slot = &srv_sys->sys_threads[SRV_MASTER_SLOT]; + slot = &srv_sys.sys_threads[SRV_MASTER_SLOT]; /* Only if the master thread has been started. */ @@ -2727,7 +2718,7 @@ srv_wake_purge_thread_if_not_active(void) ut_ad(!srv_sys_mutex_own()); if (purge_sys->state == PURGE_STATE_RUN - && srv_sys->n_threads_active[SRV_PURGE] == 0) { + && srv_sys.n_threads_active[SRV_PURGE] == 0) { srv_release_threads(SRV_PURGE, 1); } @@ -2756,7 +2747,7 @@ ulint srv_get_activity_count(void) /*========================*/ { - return(srv_sys->activity_count); + return(srv_sys.activity_count); } /** Get current server ibuf merge activity count. @@ -2765,7 +2756,7 @@ static ulint srv_get_ibuf_merge_activity_count(void) { - return(srv_sys->ibuf_merge_activity_count); + return(srv_sys.ibuf_merge_activity_count); } /*******************************************************************//** @@ -2784,14 +2775,14 @@ srv_check_activity( ULINT_UNDEFINED */ ulint old_ibuf_merge_activity_count) { - ulint new_activity_count = srv_sys->activity_count; + ulint new_activity_count = srv_sys.activity_count; if (old_ibuf_merge_activity_count == ULINT_UNDEFINED) return(new_activity_count != old_activity_count); /* If we care about ibuf merge activity, then the server is considered idle if all activity, if any, was due to ibuf merge. */ ulint new_ibuf_merge_activity_count - = srv_sys->ibuf_merge_activity_count; + = srv_sys.ibuf_merge_activity_count; ut_ad(new_ibuf_merge_activity_count <= new_activity_count); ut_ad(new_ibuf_merge_activity_count >= old_ibuf_merge_activity_count); @@ -2871,7 +2862,7 @@ srv_shutdown_print_master_pending( time_elapsed = ut_difftime(current_time, *last_print_time); if (time_elapsed > 60) { - *last_print_time = ut_time(); + *last_print_time = current_time; if (n_tables_to_drop) { ut_print_timestamp(stderr); @@ -2924,7 +2915,7 @@ srv_master_do_active_tasks(void) MONITOR_INC_TIME_IN_MICRO_SECS( MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, counter_time); - if (srv_shutdown_state > 0) { + if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { return; } @@ -2958,11 +2949,7 @@ srv_master_do_active_tasks(void) MONITOR_SRV_MEM_VALIDATE_MICROSECOND, counter_time); } #endif - if (srv_shutdown_state > 0) { - return; - } - - if (srv_shutdown_state > 0) { + if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { return; } @@ -2975,7 +2962,7 @@ srv_master_do_active_tasks(void) MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time); } - if (srv_shutdown_state > 0) { + if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { return; } @@ -3019,7 +3006,7 @@ srv_master_do_idle_tasks(void) MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, counter_time); - if (srv_shutdown_state > 0) { + if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { return; } @@ -3035,7 +3022,7 @@ srv_master_do_idle_tasks(void) MONITOR_INC_TIME_IN_MICRO_SECS( MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time); - if (srv_shutdown_state > 0) { + if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { return; } @@ -3051,7 +3038,7 @@ srv_master_do_idle_tasks(void) MONITOR_INC_TIME_IN_MICRO_SECS( MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time); - if (srv_shutdown_state > 0) { + if (srv_shutdown_state != SRV_SHUTDOWN_NONE) { return; } @@ -3072,70 +3059,42 @@ srv_master_do_idle_tasks(void) } } -/*********************************************************************//** -Perform the tasks during shutdown. The tasks that we do at shutdown -depend on srv_fast_shutdown: -2 => very fast shutdown => do no book keeping -1 => normal shutdown => clear drop table queue and make checkpoint -0 => slow shutdown => in addition to above do complete purge and ibuf -merge -@return TRUE if some work was done. FALSE otherwise */ +/** Perform shutdown tasks. +@param[in] ibuf_merge whether to complete the change buffer merge */ static -ibool -srv_master_do_shutdown_tasks( -/*=========================*/ - ib_time_t* last_print_time)/*!< last time the function - print the message */ +void +srv_shutdown(bool ibuf_merge) { - ulint n_bytes_merged = 0; - ulint n_tables_to_drop = 0; + ulint n_bytes_merged = 0; + ulint n_tables_to_drop; + ib_time_t now = ut_time(); - ut_ad(!srv_read_only_mode); - - ++srv_main_shutdown_loops; - - ut_a(srv_shutdown_state > 0); - - /* In very fast shutdown none of the following is necessary */ - if (srv_fast_shutdown == 2) { - return(FALSE); - } - - /* ALTER TABLE in MySQL requires on Unix that the table handler - can drop tables lazily after there no longer are SELECT - queries to them. */ - srv_main_thread_op_info = "doing background drop tables"; - n_tables_to_drop = row_drop_tables_for_mysql_in_background(); - - /* make sure that there is enough reusable space in the redo - log files */ - srv_main_thread_op_info = "checking free log space"; - log_free_check(); - - /* In case of normal shutdown we don't do ibuf merge or purge */ - if (srv_fast_shutdown == 1) { - goto func_exit; - } - - /* Do an ibuf merge */ - srv_main_thread_op_info = "doing insert buffer merge"; - n_bytes_merged = ibuf_merge_in_background(true); - - /* Flush logs if needed */ - srv_sync_log_buffer_in_background(); - -func_exit: - /* Make a new checkpoint about once in 10 seconds */ - srv_main_thread_op_info = "making checkpoint"; - log_checkpoint(TRUE, FALSE, FALSE); - - /* Print progress message every 60 seconds during shutdown */ - if (srv_shutdown_state > 0 && srv_print_verbose_log) { - srv_shutdown_print_master_pending( - last_print_time, n_tables_to_drop, n_bytes_merged); - } + do { + ut_ad(!srv_read_only_mode); + ut_ad(srv_shutdown_state == SRV_SHUTDOWN_CLEANUP); + ++srv_main_shutdown_loops; + + /* FIXME: Remove the background DROP TABLE queue; it is not + crash-safe and breaks ACID. */ + srv_main_thread_op_info = "doing background drop tables"; + n_tables_to_drop = row_drop_tables_for_mysql_in_background(); + + if (ibuf_merge) { + srv_main_thread_op_info = "checking free log space"; + log_free_check(); + srv_main_thread_op_info = "doing insert buffer merge"; + n_bytes_merged = ibuf_merge_in_background(true); + + /* Flush logs if needed */ + srv_sync_log_buffer_in_background(); + } - return(n_bytes_merged || n_tables_to_drop); + /* Print progress message every 60 seconds during shutdown */ + if (srv_print_verbose_log) { + srv_shutdown_print_master_pending( + &now, n_tables_to_drop, n_bytes_merged); + } + } while (n_bytes_merged || n_tables_to_drop); } /*********************************************************************//** @@ -3163,11 +3122,12 @@ DECLARE_THREAD(srv_master_thread)( /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); + srv_slot_t* slot; ulint old_activity_count = srv_get_activity_count(); ulint old_ibuf_merge_activity_count = srv_get_ibuf_merge_activity_count(); - ib_time_t last_print_time; ut_ad(!srv_read_only_mode); @@ -3188,9 +3148,8 @@ DECLARE_THREAD(srv_master_thread)( srv_main_thread_id = os_thread_pf(os_thread_get_curr_id()); slot = srv_reserve_slot(SRV_MASTER); - ut_a(slot == srv_sys->sys_threads); + ut_a(slot == srv_sys.sys_threads); - last_print_time = ut_time(); loop: if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) { goto suspend_thread; @@ -3216,13 +3175,26 @@ loop: } } - while (srv_master_do_shutdown_tasks(&last_print_time)) { - - /* Shouldn't loop here in case of very fast shutdown */ - ut_ad(srv_fast_shutdown < 2); +suspend_thread: + switch (srv_shutdown_state) { + case SRV_SHUTDOWN_NONE: + break; + case SRV_SHUTDOWN_FLUSH_PHASE: + case SRV_SHUTDOWN_LAST_PHASE: + ut_ad(0); + /* fall through */ + case SRV_SHUTDOWN_EXIT_THREADS: + /* srv_init_abort() must have been invoked */ + case SRV_SHUTDOWN_CLEANUP: + if (srv_shutdown_state == SRV_SHUTDOWN_CLEANUP + && srv_fast_shutdown < 2) { + srv_shutdown(srv_fast_shutdown == 0); + } + srv_suspend_thread(slot); + my_thread_end(); + os_thread_exit(NULL); } -suspend_thread: srv_main_thread_op_info = "suspending"; srv_suspend_thread(slot); @@ -3234,41 +3206,32 @@ suspend_thread: srv_main_thread_op_info = "waiting for server activity"; srv_resume_thread(slot); - - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_thread_exit(NULL); - } - goto loop; - - OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */ } -/*********************************************************************//** -Check if purge should stop. -@return true if it should shutdown. */ +/** Check if purge should stop. +@param[in] n_purged pages purged in the last batch +@return whether purge should exit */ static bool -srv_purge_should_exit( -/*==============*/ - ulint n_purged) /*!< in: pages purged in last batch */ +srv_purge_should_exit(ulint n_purged) { - switch (srv_shutdown_state) { - case SRV_SHUTDOWN_NONE: - /* Normal operation. */ - break; + ut_ad(srv_shutdown_state == SRV_SHUTDOWN_NONE + || srv_shutdown_state == SRV_SHUTDOWN_CLEANUP); - case SRV_SHUTDOWN_CLEANUP: - case SRV_SHUTDOWN_EXIT_THREADS: - /* Exit unless slow shutdown requested or all done. */ - return(srv_fast_shutdown != 0 || n_purged == 0); - - case SRV_SHUTDOWN_LAST_PHASE: - case SRV_SHUTDOWN_FLUSH_PHASE: - ut_error; + if (srv_undo_sources) { + return(false); } - - return(false); + if (srv_fast_shutdown) { + return(true); + } + /* Slow shutdown was requested. */ + if (n_purged) { + /* The previous round still did some work. */ + return(false); + } + /* Exit if there are no active transactions to roll back. */ + return(trx_sys_any_active_transactions() == 0); } /*********************************************************************//** @@ -3284,18 +3247,18 @@ srv_task_execute(void) ut_ad(!srv_read_only_mode); ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); - mutex_enter(&srv_sys->tasks_mutex); + mutex_enter(&srv_sys.tasks_mutex); - if (UT_LIST_GET_LEN(srv_sys->tasks) > 0) { + if (UT_LIST_GET_LEN(srv_sys.tasks) > 0) { - thr = UT_LIST_GET_FIRST(srv_sys->tasks); + thr = UT_LIST_GET_FIRST(srv_sys.tasks); ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE); - UT_LIST_REMOVE(queue, srv_sys->tasks, thr); + UT_LIST_REMOVE(queue, srv_sys.tasks, thr); } - mutex_exit(&srv_sys->tasks_mutex); + mutex_exit(&srv_sys.tasks_mutex); if (thr != NULL) { @@ -3322,6 +3285,8 @@ DECLARE_THREAD(srv_worker_thread)( void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); + srv_slot_t* slot; ulint tid_i = os_atomic_increment_ulint(&purge_tid_i, 1); @@ -3345,7 +3310,7 @@ DECLARE_THREAD(srv_worker_thread)( srv_sys_mutex_enter(); - ut_a(srv_sys->n_threads_active[SRV_WORKER] < srv_n_purge_threads); + ut_a(srv_sys.n_threads_active[SRV_WORKER] < srv_n_purge_threads); srv_sys_mutex_exit(); @@ -3387,6 +3352,7 @@ DECLARE_THREAD(srv_worker_thread)( os_thread_pf(os_thread_get_curr_id())); #endif /* UNIV_DEBUG_THREAD_CREATION */ + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ os_thread_exit(NULL); @@ -3540,7 +3506,7 @@ srv_purge_coordinator_suspend( } rw_lock_x_unlock(&purge_sys->latch); - } while (stop); + } while (stop && srv_undo_sources); srv_resume_thread(slot, 0, false); } @@ -3555,6 +3521,8 @@ DECLARE_THREAD(srv_purge_coordinator_thread)( void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); + srv_slot_t* slot; ulint n_total_purged = ULINT_UNDEFINED; @@ -3592,6 +3560,7 @@ DECLARE_THREAD(srv_purge_coordinator_thread)( purge didn't purge any records then wait for activity. */ if (srv_shutdown_state == SRV_SHUTDOWN_NONE + && srv_undo_sources && (purge_sys->state == PURGE_STATE_STOP || n_total_purged == 0)) { @@ -3612,36 +3581,8 @@ DECLARE_THREAD(srv_purge_coordinator_thread)( srv_n_purge_threads, &n_total_purged); srv_inc_activity_count(); - } while (!srv_purge_should_exit(n_total_purged)); - /* Ensure that we don't jump out of the loop unless the - exit condition is satisfied. */ - - ut_a(srv_purge_should_exit(n_total_purged)); - - ulint n_pages_purged = ULINT_MAX; - - /* Ensure that all records are purged if it is not a fast shutdown. - This covers the case where a record can be added after we exit the - loop above. */ - while (srv_fast_shutdown == 0 && n_pages_purged > 0) { - n_pages_purged = trx_purge(1, srv_purge_batch_size, false); - } - - /* This trx_purge is called to remove any undo records (added by - background threads) after completion of the above loop. When - srv_fast_shutdown != 0, a large batch size can cause significant - delay in shutdown ,so reducing the batch size to magic number 20 - (which was default in 5.5), which we hope will be sufficient to - remove all the undo records */ - const uint temp_batch_size = 20; - - n_pages_purged = trx_purge(1, srv_purge_batch_size <= temp_batch_size - ? srv_purge_batch_size : temp_batch_size, - true); - ut_a(n_pages_purged == 0 || srv_fast_shutdown != 0); - /* The task queue should always be empty, independent of fast shutdown state. */ ut_a(srv_get_task_queue_length() == 0); @@ -3668,6 +3609,7 @@ DECLARE_THREAD(srv_purge_coordinator_thread)( srv_release_threads(SRV_WORKER, srv_n_purge_threads - 1); } + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ os_thread_exit(NULL); @@ -3685,11 +3627,11 @@ srv_que_task_enqueue_low( que_thr_t* thr) /*!< in: query thread */ { ut_ad(!srv_read_only_mode); - mutex_enter(&srv_sys->tasks_mutex); + mutex_enter(&srv_sys.tasks_mutex); - UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr); + UT_LIST_ADD_LAST(queue, srv_sys.tasks, thr); - mutex_exit(&srv_sys->tasks_mutex); + mutex_exit(&srv_sys.tasks_mutex); srv_release_threads(SRV_WORKER, 1); } @@ -3706,11 +3648,11 @@ srv_get_task_queue_length(void) ut_ad(!srv_read_only_mode); - mutex_enter(&srv_sys->tasks_mutex); + mutex_enter(&srv_sys.tasks_mutex); - n_tasks = UT_LIST_GET_LEN(srv_sys->tasks); + n_tasks = UT_LIST_GET_LEN(srv_sys.tasks); - mutex_exit(&srv_sys->tasks_mutex); + mutex_exit(&srv_sys.tasks_mutex); return(n_tasks); } @@ -3733,3 +3675,19 @@ srv_purge_wakeup() } } } + +/** Check whether given space id is undo tablespace id +@param[in] space_id space id to check +@return true if it is undo tablespace else false. */ +bool +srv_is_undo_tablespace( + ulint space_id) +{ + if (srv_undo_space_id_start == 0) { + return (false); + } + + return(space_id >= srv_undo_space_id_start + && space_id < (srv_undo_space_id_start + + srv_undo_tablespaces_open)); +} diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc index aa51012816d..fd129c3e55f 100644 --- a/storage/xtradb/srv/srv0start.cc +++ b/storage/xtradb/srv/srv0start.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All rights reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2009, Percona Inc. Copyright (c) 2013, 2017, MariaDB Corporation @@ -121,6 +121,9 @@ UNIV_INTERN ibool srv_have_fullfsync = FALSE; /** TRUE if a raw partition is in use */ UNIV_INTERN ibool srv_start_raw_disk_in_use = FALSE; +/** UNDO tablespaces starts with space id. */ +ulint srv_undo_space_id_start; + /** TRUE if the server is being started, before rolling back any incomplete transactions */ UNIV_INTERN ibool srv_startup_is_before_trx_rollback_phase = FALSE; @@ -129,7 +132,11 @@ UNIV_INTERN ibool srv_is_being_started = FALSE; /** TRUE if the server was successfully started */ UNIV_INTERN ibool srv_was_started = FALSE; /** TRUE if innobase_start_or_create_for_mysql() has been called */ -static ibool srv_start_has_been_called = FALSE; +static ibool srv_start_has_been_called; + +/** Whether any undo log records can be generated */ +UNIV_INTERN bool srv_undo_sources; + #ifdef UNIV_DEBUG /** InnoDB system tablespace to set during recovery */ UNIV_INTERN uint srv_sys_space_size_debug; @@ -139,8 +146,8 @@ UNIV_INTERN uint srv_sys_space_size_debug; SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */ UNIV_INTERN enum srv_shutdown_state srv_shutdown_state = SRV_SHUTDOWN_NONE; -/** Files comprising the system tablespace */ -os_file_t files[1000]; +/** Files comprising the system tablespace. Also used by Mariabackup. */ +UNIV_INTERN pfs_os_file_t files[1000]; /** io_handler_thread parameters for thread identification */ static ulint n[SRV_MAX_N_IO_THREADS]; @@ -203,6 +210,39 @@ UNIV_INTERN mysql_pfs_key_t srv_purge_thread_key; UNIV_INTERN mysql_pfs_key_t srv_log_tracking_thread_key; #endif /* UNIV_PFS_THREAD */ +/** Innobase start-up aborted. Perform cleanup actions. +@param[in] create_new_db TRUE if new db is being created +@param[in] file File name +@param[in] line Line number +@param[in] err Reason for aborting InnoDB startup +@return DB_SUCCESS or error code. */ +static +dberr_t +srv_init_abort( + bool create_new_db, + const char* file, + ulint line, + dberr_t err) +{ + if (create_new_db) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Database creation was aborted" + " at %s [" ULINTPF "]" + " with error %s. You may need" + " to delete the ibdata1 file before trying to start" + " up again.", + file, line, ut_strerr(err)); + } else { + ib_logf(IB_LOG_LEVEL_ERROR, + "Plugin initialization aborted" + " at %s [" ULINTPF "]" + " with error %s.", + file, line, ut_strerr(err)); + } + + return(err); +} + /*********************************************************************//** Convert a numeric string that optionally ends in G or M or K, to a number containing megabytes. @@ -584,7 +624,7 @@ static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t create_log_file( /*============*/ - os_file_t* file, /*!< out: file handle */ + pfs_os_file_t* file, /*!< out: file handle */ const char* name) /*!< in: log file name */ { ibool ret; @@ -802,7 +842,7 @@ static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t open_log_file( /*==========*/ - os_file_t* file, /*!< out: file handle */ + pfs_os_file_t* file, /*!< out: file handle */ const char* name, /*!< in: log file name */ os_offset_t* size) /*!< out: file size */ { @@ -823,32 +863,32 @@ open_log_file( return(DB_SUCCESS); } -/*********************************************************************//** -Creates or opens database data files and closes them. + +/** Creates or opens database data files and closes them. +@param[out] create_new_db true = create new database +@param[out] min_arch_log_no min of archived log numbers in + data files +@param[out] max_arch_log_no max of archived log numbers in + data files +@param[out] flushed_lsn flushed lsn in fist datafile +@param[out] sum_of_new_sizes sum of sizes of the new files + added @return DB_SUCCESS or error code */ MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t open_or_create_data_files( -/*======================*/ - ibool* create_new_db, /*!< out: TRUE if new database should be - created */ + bool* create_new_db, #ifdef UNIV_LOG_ARCHIVE - lsn_t* min_arch_log_no,/*!< out: min of archived log - numbers in data files */ - lsn_t* max_arch_log_no,/*!< out: max of archived log - numbers in data files */ + lsn_t* min_arch_log_no, + lsn_t* max_arch_log_no, #endif /* UNIV_LOG_ARCHIVE */ - lsn_t* min_flushed_lsn,/*!< out: min of flushed lsn - values in data files */ - lsn_t* max_flushed_lsn,/*!< out: max of flushed lsn - values in data files */ - ulint* sum_of_new_sizes)/*!< out: sum of sizes of the - new files added */ + lsn_t* flushed_lsn, + ulint* sum_of_new_sizes) { ibool ret; ulint i; - ibool one_opened = FALSE; - ibool one_created = FALSE; + bool one_opened = false; + bool one_created = false; os_offset_t size; ulint flags; ulint space; @@ -867,7 +907,7 @@ open_or_create_data_files( *sum_of_new_sizes = 0; - *create_new_db = FALSE; + *create_new_db = false; srv_normalize_path_for_win(srv_data_home); @@ -919,7 +959,7 @@ open_or_create_data_files( && os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS #ifdef UNIV_AIX - /* AIX 5.1 after security patch ML7 may have + /* AIX 5.1 after security patch ML7 may have errno set to 0 here, which causes our function to return 100; work around that AIX problem */ @@ -955,9 +995,10 @@ open_or_create_data_files( } const char* check_msg; + check_msg = fil_read_first_page( files[i], FALSE, &flags, &space, - min_flushed_lsn, max_flushed_lsn, NULL); + flushed_lsn, NULL); /* If first page is valid, don't overwrite DB. It prevents overwriting DB when mysql_install_db @@ -988,6 +1029,7 @@ open_or_create_data_files( name); return(DB_ERROR); } + if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) { ut_a(!srv_read_only_mode); files[i] = os_file_create( @@ -1007,7 +1049,6 @@ open_or_create_data_files( } if (!ret) { - os_file_get_last_error(true); ib_logf(IB_LOG_LEVEL_ERROR, @@ -1017,7 +1058,6 @@ open_or_create_data_files( } if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) { - goto skip_size_check; } @@ -1044,16 +1084,15 @@ size_check: "auto-extending " "data file %s is " "of a different size " - "%lu pages (rounded " + ULINTPF " pages (rounded " "down to MB) than specified " "in the .cnf file: " - "initial %lu pages, " - "max %lu (relevant if " + "initial " ULINTPF " pages, " + "max " ULINTPF " (relevant if " "non-zero) pages!", name, - (ulong) rounded_size_pages, - (ulong) srv_data_file_sizes[i], - (ulong) + rounded_size_pages, + srv_data_file_sizes[i], srv_last_file_size_max); return(DB_ERROR); @@ -1066,12 +1105,12 @@ size_check: ib_logf(IB_LOG_LEVEL_ERROR, "Data file %s is of a different " - "size %lu pages (rounded down to MB) " + "size " ULINTPF " pages (rounded down to MB) " "than specified in the .cnf file " - "%lu pages!", + ULINTPF " pages!", name, - (ulong) rounded_size_pages, - (ulong) srv_data_file_sizes[i]); + rounded_size_pages, + srv_data_file_sizes[i]); return(DB_ERROR); } @@ -1090,7 +1129,7 @@ skip_size_check: check_first_page: check_msg = fil_read_first_page( files[i], one_opened, &flags, &space, - min_flushed_lsn, max_flushed_lsn, &crypt_data); + flushed_lsn, &crypt_data); if (check_msg) { @@ -1127,9 +1166,9 @@ check_first_page: != fsp_flags_get_page_size(flags)) { ib_logf(IB_LOG_LEVEL_ERROR, - "Data file \"%s\" uses page size %lu," + "Data file \"%s\" uses page size " ULINTPF " ," "but the start-up parameter " - "is --innodb-page-size=%lu", + "is --innodb-page-size=" ULINTPF " .", name, fsp_flags_get_page_size(flags), UNIV_PAGE_SIZE); @@ -1160,9 +1199,9 @@ check_first_page: } ib_logf(IB_LOG_LEVEL_INFO, - "Setting file %s size to %lu MB", + "Setting file %s size to " ULINTPF " MB", name, - (ulong) (srv_data_file_sizes[i] + (srv_data_file_sizes[i] >> (20 - UNIV_PAGE_SIZE_SHIFT))); ret = os_file_set_size( @@ -1221,7 +1260,7 @@ srv_undo_tablespace_create( const char* name, /*!< in: tablespace name */ ulint size) /*!< in: tablespace size in pages */ { - os_file_t fh; + pfs_os_file_t fh; ibool ret; dberr_t err = DB_SUCCESS; @@ -1299,7 +1338,7 @@ srv_undo_tablespace_open( const char* name, /*!< in: tablespace name */ ulint space) /*!< in: tablespace id */ { - os_file_t fh; + pfs_os_file_t fh; dberr_t err = DB_ERROR; ibool ret; ulint flags; @@ -1404,13 +1443,23 @@ srv_undo_tablespaces_init( for (i = 0; create_new_db && i < n_conf_tablespaces; ++i) { char name[OS_FILE_MAX_PATH]; + ulint space_id = i + 1; + + DBUG_EXECUTE_IF("innodb_undo_upgrade", + space_id = i + 3;); ut_snprintf( name, sizeof(name), "%s%cundo%03lu", - srv_undo_dir, SRV_PATH_SEPARATOR, i + 1); + srv_undo_dir, SRV_PATH_SEPARATOR, space_id); + + if (i == 0) { + srv_undo_space_id_start = space_id; + prev_space_id = srv_undo_space_id_start - 1; + } + + undo_tablespace_ids[i] = space_id; - /* Undo space ids start from 1. */ err = srv_undo_tablespace_create( name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES); @@ -1432,14 +1481,16 @@ srv_undo_tablespaces_init( if (!create_new_db && !backup_mode) { n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces( undo_tablespace_ids); - } else { - n_undo_tablespaces = n_conf_tablespaces; - for (i = 1; i <= n_undo_tablespaces; ++i) { - undo_tablespace_ids[i - 1] = i; + if (n_undo_tablespaces != 0) { + srv_undo_space_id_start = undo_tablespace_ids[0]; + prev_space_id = srv_undo_space_id_start - 1; } - undo_tablespace_ids[i] = ULINT_UNDEFINED; + } else { + n_undo_tablespaces = n_conf_tablespaces; + + undo_tablespace_ids[n_conf_tablespaces] = ULINT_UNDEFINED; } /* Open all the undo tablespaces that are currently in use. If we @@ -1463,8 +1514,6 @@ srv_undo_tablespaces_init( ut_a(undo_tablespace_ids[i] != 0); ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED); - /* Undo space ids start from 1. */ - err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]); if (err != DB_SUCCESS) { @@ -1499,11 +1548,23 @@ srv_undo_tablespaces_init( break; } + /** Note the first undo tablespace id in case of + no active undo tablespace. */ + if (n_undo_tablespaces == 0) { + srv_undo_space_id_start = i; + } + ++n_undo_tablespaces; ++*n_opened; } + /** Explictly specify the srv_undo_space_id_start + as zero when there are no undo tablespaces. */ + if (n_undo_tablespaces == 0) { + srv_undo_space_id_start = 0; + } + /* If the user says that there are fewer than what we find we tolerate that discrepancy but not the inverse. Because there could be unused undo tablespaces for future use. */ @@ -1548,10 +1609,11 @@ srv_undo_tablespaces_init( mtr_start(&mtr); /* The undo log tablespace */ - for (i = 1; i <= n_undo_tablespaces; ++i) { + for (i = 0; i < n_undo_tablespaces; ++i) { fsp_header_init( - i, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr); + undo_tablespace_ids[i], + SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr); } mtr_commit(&mtr); @@ -1626,12 +1688,10 @@ are not found and the user wants. @return DB_SUCCESS or error code */ UNIV_INTERN dberr_t -innobase_start_or_create_for_mysql(void) -/*====================================*/ +innobase_start_or_create_for_mysql() { - ibool create_new_db; - lsn_t min_flushed_lsn; - lsn_t max_flushed_lsn; + bool create_new_db; + lsn_t flushed_lsn; #ifdef UNIV_LOG_ARCHIVE lsn_t min_arch_log_no = LSN_MAX; lsn_t max_arch_log_no = LSN_MAX; @@ -1665,6 +1725,10 @@ innobase_start_or_create_for_mysql(void) /* This should be initialized early */ ut_init_timer(); + if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) { + srv_read_only_mode = 1; + } + high_level_read_only = srv_read_only_mode || srv_force_recovery > SRV_FORCE_NO_TRX_UNDO; @@ -2176,7 +2240,7 @@ innobase_start_or_create_for_mysql(void) #ifdef UNIV_LOG_ARCHIVE &min_arch_log_no, &max_arch_log_no, #endif /* UNIV_LOG_ARCHIVE */ - &min_flushed_lsn, &max_flushed_lsn, + &flushed_lsn, &sum_of_new_sizes); if (err == DB_FAIL) { @@ -2220,12 +2284,12 @@ innobase_start_or_create_for_mysql(void) bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL); ut_a(success); - min_flushed_lsn = max_flushed_lsn = log_get_lsn(); + flushed_lsn = log_get_lsn(); buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); err = create_log_files(create_new_db, logfilename, dirnamelen, - max_flushed_lsn, logfile0); + flushed_lsn, logfile0); if (err != DB_SUCCESS) { return(err); @@ -2245,19 +2309,8 @@ innobase_start_or_create_for_mysql(void) if (err == DB_NOT_FOUND) { if (i == 0) { - if (max_flushed_lsn - != min_flushed_lsn) { - ib_logf(IB_LOG_LEVEL_ERROR, - "Cannot create" - " log files because" - " data files are" - " corrupt or" - " not in sync" - " with each other"); - return(DB_ERROR); - } - if (max_flushed_lsn < (lsn_t) 1000) { + if (flushed_lsn < (lsn_t) 1000) { ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create" " log files because" @@ -2272,14 +2325,14 @@ innobase_start_or_create_for_mysql(void) err = create_log_files( create_new_db, logfilename, - dirnamelen, max_flushed_lsn, + dirnamelen, flushed_lsn, logfile0); if (err == DB_SUCCESS) { err = create_log_files_rename( logfilename, dirnamelen, - max_flushed_lsn, + flushed_lsn, logfile0); } @@ -2289,8 +2342,7 @@ innobase_start_or_create_for_mysql(void) /* Suppress the message about crash recovery. */ - max_flushed_lsn = min_flushed_lsn - = log_get_lsn(); + flushed_lsn = log_get_lsn(); goto files_checked; } else if (i < 2 && !IS_XTRABACKUP()) { /* must have at least 2 log files */ @@ -2420,9 +2472,23 @@ files_checked: mtr_start(&mtr); fsp_header_init(0, sum_of_new_sizes, &mtr); + compile_time_assert(TRX_SYS_SPACE == 0); + compile_time_assert(IBUF_SPACE_ID == 0); + + ulint ibuf_root = btr_create( + DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, + 0, 0, DICT_IBUF_ID_MIN, + dict_ind_redundant, &mtr); mtr_commit(&mtr); + if (ibuf_root == FIL_NULL) { + return(srv_init_abort(true, __FILE__, __LINE__, + DB_ERROR)); + } + + ut_ad(ibuf_root == IBUF_TREE_ROOT_PAGE_NO); + /* To maintain backward compatibility we create only the first rollback segment before the double write buffer. All the remaining rollback segments will be created later, @@ -2448,17 +2514,19 @@ files_checked: bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL); ut_a(success); - min_flushed_lsn = max_flushed_lsn = log_get_lsn(); + flushed_lsn = log_get_lsn(); buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); /* Stamp the LSN to the data files. */ - fil_write_flushed_lsn_to_data_files(max_flushed_lsn, 0); + err = fil_write_flushed_lsn(flushed_lsn); - fil_flush_file_spaces(FIL_TABLESPACE); + if (err != DB_SUCCESS) { + return(err); + } err = create_log_files_rename(logfilename, dirnamelen, - max_flushed_lsn, logfile0); + flushed_lsn, logfile0); if (err != DB_SUCCESS) { return(err); @@ -2513,7 +2581,7 @@ files_checked: err = recv_recovery_from_checkpoint_start( LOG_CHECKPOINT, LSN_MAX, - min_flushed_lsn, max_flushed_lsn); + flushed_lsn); if (err != DB_SUCCESS) { return(err); @@ -2696,7 +2764,7 @@ files_checked: DBUG_EXECUTE_IF("innodb_log_abort_1", return(DB_ERROR);); - min_flushed_lsn = max_flushed_lsn = log_get_lsn(); + flushed_lsn = log_get_lsn(); ib_logf(IB_LOG_LEVEL_WARN, "Resizing redo log from %u*%u to %u*%u pages" @@ -2705,7 +2773,7 @@ files_checked: (unsigned) srv_log_file_size, (unsigned) srv_n_log_files, (unsigned) srv_log_file_size_requested, - max_flushed_lsn); + flushed_lsn); buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); @@ -2715,7 +2783,7 @@ files_checked: we need to explicitly flush the log buffers. */ fil_flush(SRV_LOG_SPACE_FIRST_ID); - ut_ad(max_flushed_lsn == log_get_lsn()); + ut_ad(flushed_lsn == log_get_lsn()); /* Prohibit redo log writes from any other threads until creating a log checkpoint at the @@ -2727,8 +2795,7 @@ files_checked: return(DB_ERROR);); /* Stamp the LSN to the data files. */ - fil_write_flushed_lsn_to_data_files( - max_flushed_lsn, 0); + err = fil_write_flushed_lsn(flushed_lsn); DBUG_EXECUTE_IF("innodb_log_abort_4", err = DB_ERROR;); @@ -2736,8 +2803,6 @@ files_checked: return(err); } - fil_flush_file_spaces(FIL_TABLESPACE); - /* Close and free the redo log files, so that we can replace them. */ fil_close_log_files(true); @@ -2754,28 +2819,23 @@ files_checked: srv_log_file_size = srv_log_file_size_requested; err = create_log_files(create_new_db, logfilename, - dirnamelen, max_flushed_lsn, + dirnamelen, flushed_lsn, logfile0); if (err != DB_SUCCESS) { return(err); } - /* create_log_files() can increase system lsn that is - why FIL_PAGE_FILE_FLUSH_LSN have to be updated */ - min_flushed_lsn = max_flushed_lsn = log_get_lsn(); - fil_write_flushed_lsn_to_data_files(min_flushed_lsn, 0); - fil_flush_file_spaces(FIL_TABLESPACE); - err = create_log_files_rename(logfilename, dirnamelen, log_get_lsn(), logfile0); + if (err != DB_SUCCESS) { return(err); } } - srv_startup_is_before_trx_rollback_phase = FALSE; recv_recovery_rollback_active(); + srv_startup_is_before_trx_rollback_phase = FALSE; /* It is possible that file_format tag has never been set. In this case we initialize it to minimum @@ -2815,10 +2875,9 @@ files_checked: /* fprintf(stderr, "Max allowed record size %lu\n", page_get_free_space_of_empty() / 2); */ - if (buf_dblwr == NULL) { - /* Create the doublewrite buffer to a new tablespace */ - - buf_dblwr_create(); + if (!buf_dblwr_create()) { + return(srv_init_abort(create_new_db, __FILE__, __LINE__, + DB_ERROR)); } /* Here the double write buffer has already been created and so @@ -2848,6 +2907,9 @@ files_checked: /* Can only happen if server is read only. */ ut_a(srv_read_only_mode); srv_undo_logs = ULONG_UNDEFINED; + } else if (srv_available_undo_logs < srv_undo_logs) { + /* Should due to out of file space. */ + return (srv_init_abort(create_new_db, __FILE__, __LINE__, DB_ERROR)); } if (!srv_read_only_mode) { @@ -2905,6 +2967,16 @@ files_checked: srv_master_thread, NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS)); thread_started[1 + SRV_MAX_N_IO_THREADS] = true; + + srv_undo_sources = true; + /* Create the dict stats gathering thread */ + srv_dict_stats_thread_active = true; + dict_stats_thread_handle = os_thread_create( + dict_stats_thread, NULL, NULL); + dict_stats_thread_started = true; + + /* Create the thread that will optimize the FTS sub-system. */ + fts_optimize_init(); } if (!srv_read_only_mode @@ -2949,12 +3021,16 @@ files_checked: } - buf_flush_page_cleaner_thread_handle = os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL); + buf_page_cleaner_is_active = true; + buf_flush_page_cleaner_thread_handle = os_thread_create( + buf_flush_page_cleaner_thread, NULL, NULL); buf_flush_page_cleaner_thread_started = true; - } - buf_flush_lru_manager_thread_handle = os_thread_create(buf_flush_lru_manager_thread, NULL, NULL); - buf_flush_lru_manager_thread_started = true; + buf_lru_manager_is_active = true; + buf_flush_lru_manager_thread_handle = os_thread_create( + buf_flush_lru_manager_thread, NULL, NULL); + buf_flush_lru_manager_thread_started = true; + } if (!srv_file_per_table && srv_pass_corrupt_table) { fprintf(stderr, "InnoDB: Warning:" @@ -3002,10 +3078,10 @@ files_checked: if (!wsrep_recovery) { #endif /* WITH_WSREP */ /* Create the buffer pool dump/load thread */ + srv_buf_dump_thread_active = true; buf_dump_thread_handle= os_thread_create(buf_dump_thread, NULL, NULL); - srv_buf_dump_thread_active = true; buf_dump_thread_started = true; #ifdef WITH_WSREP } else { @@ -3015,26 +3091,19 @@ files_checked: } #endif /* WITH_WSREP */ - /* Create the dict stats gathering thread */ - dict_stats_thread_handle = os_thread_create( - dict_stats_thread, NULL, NULL); - srv_dict_stats_thread_active = true; - dict_stats_thread_started = true; - - /* Create the thread that will optimize the FTS sub-system. */ - fts_optimize_init(); - /* Create thread(s) that handles key rotation */ fil_system_enter(); fil_crypt_threads_init(); fil_system_exit(); - } - /* Init data for datafile scrub threads */ - btr_scrub_init(); + /* Init data for datafile scrub threads */ + btr_scrub_init(); - /* Initialize online defragmentation. */ - btr_defragment_init(); + /* Initialize online defragmentation. */ + btr_defragment_init(); + btr_defragment_thread_active = true; + os_thread_create(btr_defragment_thread, NULL, NULL); + } srv_was_started = TRUE; @@ -3071,13 +3140,10 @@ srv_fts_close(void) } #endif -/****************************************************************//** -Shuts down the InnoDB database. -@return DB_SUCCESS or error code */ +/** Shut down InnoDB. */ UNIV_INTERN -dberr_t -innobase_shutdown_for_mysql(void) -/*=============================*/ +void +innodb_shutdown() { ulint i; @@ -3087,15 +3153,20 @@ innobase_shutdown_for_mysql(void) "Shutting down an improperly started, " "or created database!"); } - - return(DB_SUCCESS); } - if (!srv_read_only_mode) { + if (srv_undo_sources) { + ut_ad(!srv_read_only_mode); /* Shutdown the FTS optimize sub system. */ fts_optimize_start_shutdown(); fts_optimize_end(); + dict_stats_shutdown(); + while (row_get_background_drop_list_len_low()) { + srv_wake_master_thread(); + os_thread_yield(); + } + srv_undo_sources = false; } /* 1. Flush the buffer pool to disk, write the current lsn to @@ -3199,11 +3270,10 @@ innobase_shutdown_for_mysql(void) if (!srv_read_only_mode) { dict_stats_thread_deinit(); fil_crypt_threads_cleanup(); + btr_scrub_cleanup(); + btr_defragment_shutdown(); } - /* Cleanup data for datafile scrubbing */ - btr_scrub_cleanup(); - #ifdef __WIN__ /* MDEV-361: ha_innodb.dll leaks handles on Windows MDEV-7403: should not pass recv_writer_thread_handle to @@ -3311,88 +3381,9 @@ innobase_shutdown_for_mysql(void) srv_start_has_been_called = FALSE; /* reset io_tid_i, in case current process does second innodb start (xtrabackup might do that).*/ io_tid_i = 0; - return(DB_SUCCESS); } #endif /* !UNIV_HOTBACKUP */ - -/******************************************************************** -Signal all per-table background threads to shutdown, and wait for them to do -so. */ -UNIV_INTERN -void -srv_shutdown_table_bg_threads(void) -/*===============================*/ -{ - dict_table_t* table; - dict_table_t* first; - dict_table_t* last = NULL; - - mutex_enter(&dict_sys->mutex); - - /* Signal all threads that they should stop. */ - table = UT_LIST_GET_FIRST(dict_sys->table_LRU); - first = table; - while (table) { - dict_table_t* next; - fts_t* fts = table->fts; - - if (fts != NULL) { - fts_start_shutdown(table, fts); - } - - next = UT_LIST_GET_NEXT(table_LRU, table); - - if (!next) { - last = table; - } - - table = next; - } - - /* We must release dict_sys->mutex here; if we hold on to it in the - loop below, we will deadlock if any of the background threads try to - acquire it (for example, the FTS thread by calling que_eval_sql). - - Releasing it here and going through dict_sys->table_LRU without - holding it is safe because: - - a) MySQL only starts the shutdown procedure after all client - threads have been disconnected and no new ones are accepted, so no - new tables are added or old ones dropped. - - b) Despite its name, the list is not LRU, and the order stays - fixed. - - To safeguard against the above assumptions ever changing, we store - the first and last items in the list above, and then check that - they've stayed the same below. */ - - mutex_exit(&dict_sys->mutex); - - /* Wait for the threads of each table to stop. This is not inside - the above loop, because by signaling all the threads first we can - overlap their shutting down delays. */ - table = UT_LIST_GET_FIRST(dict_sys->table_LRU); - ut_a(first == table); - while (table) { - dict_table_t* next; - fts_t* fts = table->fts; - - if (fts != NULL) { - fts_shutdown(table, fts); - } - - next = UT_LIST_GET_NEXT(table_LRU, table); - - if (table == last) { - ut_a(!next); - } - - table = next; - } -} - /*****************************************************************//** Get the meta-data filename from the table name. */ UNIV_INTERN diff --git a/storage/xtradb/sync/sync0sync.cc b/storage/xtradb/sync/sync0sync.cc index 6692eef9fb0..37ac3c56fff 100644 --- a/storage/xtradb/sync/sync0sync.cc +++ b/storage/xtradb/sync/sync0sync.cc @@ -1236,6 +1236,7 @@ sync_thread_add_level( upgrading in innobase_start_or_create_for_mysql(). */ break; } + /* fall through */ case SYNC_MEM_POOL: case SYNC_MEM_HASH: case SYNC_RECV: @@ -1299,9 +1300,9 @@ sync_thread_add_level( } } ut_ad(found_current); - - /* fallthrough */ } + + /* fall through */ case SYNC_BUF_FLUSH_LIST: case SYNC_BUF_LRU_LIST: case SYNC_BUF_FREE_LIST: diff --git a/storage/xtradb/trx/trx0i_s.cc b/storage/xtradb/trx/trx0i_s.cc index eacd9212d2f..0c9618d98eb 100644 --- a/storage/xtradb/trx/trx0i_s.cc +++ b/storage/xtradb/trx/trx0i_s.cc @@ -507,7 +507,9 @@ fill_trx_row( row->trx_mysql_thread_id = thd_get_thread_id(trx->mysql_thd); - stmt = innobase_get_stmt(trx->mysql_thd, &stmt_len); + stmt = trx->mysql_thd + ? innobase_get_stmt(trx->mysql_thd, &stmt_len) + : NULL; if (stmt != NULL) { char query[TRX_I_S_TRX_QUERY_MAX_LEN + 1]; diff --git a/storage/xtradb/trx/trx0purge.cc b/storage/xtradb/trx/trx0purge.cc index 7d35bb12093..df4a3217820 100644 --- a/storage/xtradb/trx/trx0purge.cc +++ b/storage/xtradb/trx/trx0purge.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under @@ -247,6 +247,19 @@ trx_purge_add_update_undo_to_history( hist_size + undo->size, MLOG_4BYTES, mtr); } + /* Before any transaction-generating background threads or the + purge have been started, recv_recovery_rollback_active() can + start transactions in row_merge_drop_temp_indexes() and + fts_drop_orphaned_tables(), and roll back recovered transactions. + After the purge thread has been given permission to exit, + in fast shutdown, we may roll back transactions (trx->undo_no==0) + in THD::cleanup() invoked from unlink_thd(). */ + ut_ad(srv_undo_sources + || ((srv_startup_is_before_trx_rollback_phase + || trx_rollback_or_clean_is_active) + && purge_sys->state == PURGE_STATE_INIT) + || (trx->undo_no == 0 && srv_fast_shutdown)); + /* Add the log as the first in the history list */ flst_add_first(rseg_header + TRX_RSEG_HISTORY, undo_header + TRX_UNDO_HISTORY_NODE, mtr); @@ -685,7 +698,8 @@ trx_purge_get_rseg_with_min_trx_id( /* We assume in purge of externally stored fields that space id is in the range of UNDO tablespace space ids */ - ut_a(purge_sys->rseg->space <= srv_undo_tablespaces_open); + ut_a(purge_sys->rseg->space == 0 + || srv_is_undo_tablespace(purge_sys->rseg->space)); zip_size = purge_sys->rseg->zip_size; diff --git a/storage/xtradb/trx/trx0rec.cc b/storage/xtradb/trx/trx0rec.cc index 74a63b60286..8c0904dd57b 100644 --- a/storage/xtradb/trx/trx0rec.cc +++ b/storage/xtradb/trx/trx0rec.cc @@ -1186,10 +1186,6 @@ UNIV_INTERN dberr_t trx_undo_report_row_operation( /*==========================*/ - ulint flags, /*!< in: if BTR_NO_UNDO_LOG_FLAG bit is - set, does nothing */ - ulint op_type, /*!< in: TRX_UNDO_INSERT_OP or - TRX_UNDO_MODIFY_OP */ que_thr_t* thr, /*!< in: query thread */ dict_index_t* index, /*!< in: clustered index */ const dtuple_t* clust_entry, /*!< in: in the case of an insert, @@ -1223,16 +1219,8 @@ trx_undo_report_row_operation( ut_a(dict_index_is_clust(index)); ut_ad(!rec || rec_offs_validate(rec, index, offsets)); - if (flags & BTR_NO_UNDO_LOG_FLAG) { - - *roll_ptr = 0; - - return(DB_SUCCESS); - } - ut_ad(thr); - ut_ad((op_type != TRX_UNDO_INSERT_OP) - || (clust_entry && !update && !rec)); + ut_ad(!clust_entry || (!update && !rec)); trx = thr_get_trx(thr); @@ -1253,8 +1241,7 @@ trx_undo_report_row_operation( /* If the undo log is not assigned yet, assign one */ - switch (op_type) { - case TRX_UNDO_INSERT_OP: + if (clust_entry) { undo = trx->insert_undo; if (undo == NULL) { @@ -1270,10 +1257,7 @@ trx_undo_report_row_operation( ut_ad(err == DB_SUCCESS); } - break; - default: - ut_ad(op_type == TRX_UNDO_MODIFY_OP); - + } else { undo = trx->update_undo; if (undo == NULL) { @@ -1297,23 +1281,15 @@ trx_undo_report_row_operation( buf_block_dbg_add_level(undo_block, SYNC_TRX_UNDO_PAGE); do { - page_t* undo_page; - ulint offset; - - undo_page = buf_block_get_frame(undo_block); ut_ad(page_no == buf_block_get_page_no(undo_block)); - switch (op_type) { - case TRX_UNDO_INSERT_OP: - offset = trx_undo_page_report_insert( - undo_page, trx, index, clust_entry, &mtr); - break; - default: - ut_ad(op_type == TRX_UNDO_MODIFY_OP); - offset = trx_undo_page_report_modify( + page_t* undo_page = buf_block_get_frame(undo_block); + ulint offset = clust_entry + ? trx_undo_page_report_insert( + undo_page, trx, index, clust_entry, &mtr) + : trx_undo_page_report_modify( undo_page, trx, index, rec, offsets, update, cmpl_info, &mtr); - } if (UNIV_UNLIKELY(offset == 0)) { /* The record did not fit on the page. We erase the @@ -1364,7 +1340,7 @@ trx_undo_report_row_operation( mutex_exit(&trx->undo_mutex); *roll_ptr = trx_undo_build_roll_ptr( - op_type == TRX_UNDO_INSERT_OP, + clust_entry != NULL, rseg->id, page_no, offset); return(DB_SUCCESS); } diff --git a/storage/xtradb/trx/trx0roll.cc b/storage/xtradb/trx/trx0roll.cc index d228743d300..335ef8859c4 100644 --- a/storage/xtradb/trx/trx0roll.cc +++ b/storage/xtradb/trx/trx0roll.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2016, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -818,6 +818,7 @@ DECLARE_THREAD(trx_rollback_or_clean_all_recovered)( /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); ut_ad(!srv_read_only_mode); #ifdef UNIV_PFS_THREAD @@ -828,6 +829,7 @@ DECLARE_THREAD(trx_rollback_or_clean_all_recovered)( trx_rollback_or_clean_is_active = false; + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ diff --git a/storage/xtradb/trx/trx0rseg.cc b/storage/xtradb/trx/trx0rseg.cc index 003d1036a8c..16fa334872b 100644 --- a/storage/xtradb/trx/trx0rseg.cc +++ b/storage/xtradb/trx/trx0rseg.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -293,14 +294,13 @@ trx_rseg_create_instance( } } -/********************************************************************* -Creates a rollback segment. -@return pointer to new rollback segment if create successful */ +/** Create a rollback segment. +@param[in] space undo tablespace ID +@return pointer to new rollback segment +@retval NULL on failure */ UNIV_INTERN trx_rseg_t* -trx_rseg_create( -/*============*/ - ulint space) /*!< in: id of UNDO tablespace */ +trx_rseg_create(ulint space) { mtr_t mtr; ulint slot_no; @@ -323,22 +323,21 @@ trx_rseg_create( page_no = trx_rseg_header_create( space, 0, ULINT_MAX, slot_no, &mtr); - ut_a(page_no != FIL_NULL); - - sys_header = trx_sysf_get(&mtr); + if (page_no != FIL_NULL) { + sys_header = trx_sysf_get(&mtr); - id = trx_sysf_rseg_get_space(sys_header, slot_no, &mtr); - ut_a(id == space); + id = trx_sysf_rseg_get_space(sys_header, slot_no, &mtr); + ut_a(id == space); - zip_size = space ? fil_space_get_zip_size(space) : 0; + zip_size = space ? fil_space_get_zip_size(space) : 0; - rseg = trx_rseg_mem_create( - slot_no, space, zip_size, page_no, - purge_sys->ib_bh, &mtr); + rseg = trx_rseg_mem_create( + slot_no, space, zip_size, page_no, + purge_sys->ib_bh, &mtr); + } } mtr_commit(&mtr); - return(rseg); } diff --git a/storage/xtradb/trx/trx0sys.cc b/storage/xtradb/trx/trx0sys.cc index 558fe8a2c49..9accb4ef303 100644 --- a/storage/xtradb/trx/trx0sys.cc +++ b/storage/xtradb/trx/trx0sys.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1064,18 +1064,12 @@ trx_sys_create_rsegs( ulint new_rsegs = n_rsegs - n_used; for (i = 0; i < new_rsegs; ++i) { - ulint space; + ulint space_id; + space_id = (n_spaces == 0) ? 0 + : (srv_undo_space_id_start + i % n_spaces); - /* Tablespace 0 is the system tablespace. All UNDO - log tablespaces start from 1. */ - - if (n_spaces > 0) { - space = (i % n_spaces) + 1; - } else { - space = 0; /* System tablespace */ - } - - if (trx_rseg_create(space) != NULL) { + /* Tablespace 0 is the system tablespace. */ + if (trx_rseg_create(space_id) != NULL) { ++n_used; } else { break; diff --git a/storage/xtradb/trx/trx0trx.cc b/storage/xtradb/trx/trx0trx.cc index d0cb4a883cc..1d2f7ada54e 100644 --- a/storage/xtradb/trx/trx0trx.cc +++ b/storage/xtradb/trx/trx0trx.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2015, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -2283,6 +2284,7 @@ state_ok: } } #endif /* WITH_WSREP */ + /**********************************************************************//** Prints info about a transaction. Acquires and releases lock_sys->mutex and trx_sys->mutex. */ @@ -2744,4 +2746,3 @@ trx_start_for_ddl_low( ut_error; } - diff --git a/storage/xtradb/usr/usr0sess.cc b/storage/xtradb/usr/usr0sess.cc index ab7ba6bea09..e1bd71ff1a0 100644 --- a/storage/xtradb/usr/usr0sess.cc +++ b/storage/xtradb/usr/usr0sess.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -48,8 +49,6 @@ sess_open(void) sess->trx = trx_allocate_for_background(); sess->trx->sess = sess; - UT_LIST_INIT(sess->graphs); - return(sess); } |