diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2017-05-23 11:09:47 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2017-05-23 11:09:47 +0300 |
commit | 8f643e2063c9890a353149f39ef85b2cf3151fd0 (patch) | |
tree | c5468b905c8858dc1cc80ccb2072a923b28c8783 /storage/xtradb | |
parent | 3aecedb2f8f99c446a5ba0b02efcf422a252c9e8 (diff) | |
parent | b61700c22104b513caa9a85e5c6529aa5f2bf4e4 (diff) | |
download | mariadb-git-8f643e2063c9890a353149f39ef85b2cf3151fd0.tar.gz |
Merge 10.1 into 10.2
Diffstat (limited to 'storage/xtradb')
40 files changed, 1061 insertions, 472 deletions
diff --git a/storage/xtradb/btr/btr0sea.cc b/storage/xtradb/btr/btr0sea.cc index 68dbcdf1fa7..2f0428747d5 100644 --- a/storage/xtradb/btr/btr0sea.cc +++ b/storage/xtradb/btr/btr0sea.cc @@ -192,7 +192,7 @@ btr_search_sys_create( &btr_search_latch_arr[i], SYNC_SEARCH_SYS); btr_search_sys->hash_tables[i] - = ha_create(hash_size, 0, MEM_HEAP_FOR_BTR_SEARCH, 0); + = ib_create(hash_size, 0, MEM_HEAP_FOR_BTR_SEARCH, 0); #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG btr_search_sys->hash_tables[i]->adaptive = TRUE; diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc index 67c265cb827..4678b0eb6a8 100644 --- a/storage/xtradb/buf/buf0buf.cc +++ b/storage/xtradb/buf/buf0buf.cc @@ -65,6 +65,18 @@ Created 11/5/1995 Heikki Tuuri #include "fil0pagecompress.h" #include "ha_prototypes.h" +#ifdef UNIV_LINUX +#include <stdlib.h> +#endif + +#ifdef HAVE_LZO +#include "lzo/lzo1x.h" +#endif + +#ifdef HAVE_SNAPPY +#include "snappy-c.h" +#endif + /** Decrypt a page. @param[in,out] bpage Page control block @param[in,out] space tablespace @@ -77,6 +89,26 @@ buf_page_decrypt_after_read(buf_page_t* bpage, fil_space_t* space) /* prototypes for new functions added to ha_innodb.cc */ trx_t* innobase_get_trx(); +inline void* aligned_malloc(size_t size, size_t align) { + void *result; +#ifdef _MSC_VER + result = _aligned_malloc(size, align); +#else + if(posix_memalign(&result, align, size)) { + result = 0; + } +#endif + return result; +} + +inline void aligned_free(void *ptr) { +#ifdef _MSC_VER + _aligned_free(ptr); +#else + free(ptr); +#endif +} + static inline void _increment_page_get_statistics(buf_block_t* block, trx_t* trx) @@ -108,10 +140,6 @@ _increment_page_get_statistics(buf_block_t* block, trx_t* trx) return; } -#ifdef HAVE_LZO -#include "lzo/lzo1x.h" -#endif - /* IMPLEMENTATION OF THE BUFFER POOL ================================= @@ -1531,7 +1559,7 @@ buf_pool_init_instance( ut_a(srv_n_page_hash_locks != 0); ut_a(srv_n_page_hash_locks <= MAX_PAGE_HASH_LOCKS); - buf_pool->page_hash = ha_create(2 * buf_pool->curr_size, + buf_pool->page_hash = ib_create(2 * buf_pool->curr_size, srv_n_page_hash_locks, MEM_HEAP_FOR_PAGE_HASH, SYNC_BUF_PAGE_HASH); @@ -1640,20 +1668,14 @@ buf_pool_free_instance( if (buf_pool->tmp_arr) { for(ulint i = 0; i < buf_pool->tmp_arr->n_slots; i++) { buf_tmp_buffer_t* slot = &(buf_pool->tmp_arr->slots[i]); -#ifdef HAVE_LZO - if (slot && slot->lzo_mem) { - ut_free(slot->lzo_mem); - slot->lzo_mem = NULL; - } -#endif - if (slot && slot->crypt_buf_free) { - ut_free(slot->crypt_buf_free); - slot->crypt_buf_free = NULL; + if (slot && slot->crypt_buf) { + aligned_free(slot->crypt_buf); + slot->crypt_buf = NULL; } - if (slot && slot->comp_buf_free) { - ut_free(slot->comp_buf_free); - slot->comp_buf_free = NULL; + if (slot && slot->comp_buf) { + aligned_free(slot->comp_buf); + slot->comp_buf = NULL; } } } @@ -6174,22 +6196,27 @@ buf_pool_reserve_tmp_slot( buf_pool_mutex_exit(buf_pool); /* Allocate temporary memory for encryption/decryption */ - if (free_slot->crypt_buf_free == NULL) { - free_slot->crypt_buf_free = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2)); - free_slot->crypt_buf = static_cast<byte *>(ut_align(free_slot->crypt_buf_free, UNIV_PAGE_SIZE)); - memset(free_slot->crypt_buf_free, 0, UNIV_PAGE_SIZE *2); + if (free_slot->crypt_buf == NULL) { + free_slot->crypt_buf = static_cast<byte*>(aligned_malloc(UNIV_PAGE_SIZE, UNIV_PAGE_SIZE)); + memset(free_slot->crypt_buf, 0, UNIV_PAGE_SIZE); } /* For page compressed tables allocate temporary memory for compression/decompression */ - if (compressed && free_slot->comp_buf_free == NULL) { - free_slot->comp_buf_free = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2)); - free_slot->comp_buf = static_cast<byte *>(ut_align(free_slot->comp_buf_free, UNIV_PAGE_SIZE)); - memset(free_slot->comp_buf_free, 0, UNIV_PAGE_SIZE *2); -#ifdef HAVE_LZO - free_slot->lzo_mem = static_cast<byte *>(ut_malloc(LZO1X_1_15_MEM_COMPRESS)); - memset(free_slot->lzo_mem, 0, LZO1X_1_15_MEM_COMPRESS); + if (compressed && free_slot->comp_buf == NULL) { + ulint size = UNIV_PAGE_SIZE; + + /* Both snappy and lzo compression methods require that + output buffer used for compression is bigger than input + buffer. Increase the allocated buffer size accordingly. */ +#if HAVE_SNAPPY + size = snappy_max_compressed_length(size); +#endif +#if HAVE_LZO + size += LZO1X_1_15_MEM_COMPRESS; #endif + free_slot->comp_buf = static_cast<byte*>(aligned_malloc(size, UNIV_PAGE_SIZE)); + memset(free_slot->comp_buf, 0, size); } return (free_slot); @@ -6277,8 +6304,7 @@ buf_page_encrypt_before_write( fsp_flags_get_page_compression_level(space->flags), fil_space_get_block_size(space, bpage->offset), encrypted, - &out_len, - IF_LZO(slot->lzo_mem, NULL)); + &out_len); bpage->real_size = out_len; diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc index 1c9646c0bd6..c0c52deb57f 100644 --- a/storage/xtradb/buf/buf0dblwr.cc +++ b/storage/xtradb/buf/buf0dblwr.cc @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -351,7 +351,7 @@ recovery, this function loads the pages from double write buffer into memory. */ void buf_dblwr_init_or_load_pages( /*=========================*/ - os_file_t file, + pfs_os_file_t file, char* path, bool load_corrupt_pages) { diff --git a/storage/xtradb/buf/buf0dump.cc b/storage/xtradb/buf/buf0dump.cc index e728636042b..71b97b770e1 100644 --- a/storage/xtradb/buf/buf0dump.cc +++ b/storage/xtradb/buf/buf0dump.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2011, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under @@ -612,6 +612,7 @@ buf_load() if (dump_n == 0) { ut_free(dump); + ut_free(dump_tmp); ut_sprintf_timestamp(now); buf_load_status(STATUS_NOTICE, "Buffer pool(s) load completed at %s " @@ -685,6 +686,7 @@ extern "C" UNIV_INTERN os_thread_ret_t DECLARE_THREAD(buf_dump_thread)(void*) { + my_thread_init(); ut_ad(!srv_read_only_mode); buf_dump_status(STATUS_INFO, "Dumping buffer pool(s) not yet started"); @@ -721,6 +723,7 @@ DECLARE_THREAD(buf_dump_thread)(void*) srv_buf_dump_thread_active = false; + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ os_thread_exit(NULL); diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc index 1f5c3993be7..64b236d5688 100644 --- a/storage/xtradb/buf/buf0flu.cc +++ b/storage/xtradb/buf/buf0flu.cc @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, 2017, MariaDB Corporation. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2017, MariaDB Corporation. Copyright (c) 2013, 2014, Fusion-io This program is free software; you can redistribute it and/or modify it under @@ -352,6 +352,7 @@ buf_flush_insert_into_flush_list( buf_block_t* block, /*!< in/out: block which is modified */ lsn_t lsn) /*!< in: oldest modification */ { + ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE); ut_ad(log_flush_order_mutex_own()); ut_ad(mutex_own(&block->mutex)); @@ -410,6 +411,7 @@ buf_flush_insert_sorted_into_flush_list( buf_page_t* prev_b; buf_page_t* b; + ut_ad(srv_shutdown_state != SRV_SHUTDOWN_FLUSH_PHASE); ut_ad(log_flush_order_mutex_own()); ut_ad(mutex_own(&block->mutex)); ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); @@ -715,6 +717,7 @@ buf_flush_write_complete( buf_page_set_io_fix(bpage, BUF_IO_NONE); buf_pool->n_flush[flush_type]--; + ut_ad(buf_pool->n_flush[flush_type] != ULINT_MAX); #ifdef UNIV_MTFLUSH_DEBUG fprintf(stderr, "n pending flush %lu\n", @@ -1099,6 +1102,7 @@ buf_flush_page( } ++buf_pool->n_flush[flush_type]; + ut_ad(buf_pool->n_flush[flush_type] != 0); mutex_exit(&buf_pool->flush_state_mutex); @@ -2264,13 +2268,14 @@ Clears up tail of the LRU lists: * Flush dirty pages at the tail of LRU to the disk The depth to which we scan each buffer pool is controlled by dynamic config parameter innodb_LRU_scan_depth. -@return number of pages flushed */ +@return number of flushed and evicted pages */ UNIV_INTERN ulint buf_flush_LRU_tail(void) /*====================*/ { ulint total_flushed = 0; + ulint total_evicted = 0; ulint start_time = ut_time_ms(); ulint scan_depth[MAX_BUFFER_POOLS]; ulint requested_pages[MAX_BUFFER_POOLS]; @@ -2341,6 +2346,7 @@ buf_flush_LRU_tail(void) limited_scan[i] = (previous_evicted[i] > n.evicted); previous_evicted[i] = n.evicted; + total_evicted += n.evicted; requested_pages[i] += lru_chunk_size; @@ -2381,7 +2387,7 @@ buf_flush_LRU_tail(void) } } - return(total_flushed); + return(total_flushed + total_evicted); } /*********************************************************************//** @@ -2682,6 +2688,23 @@ buf_get_total_free_list_length(void) return result; } +/** Returns the aggregate LRU list length over all buffer pool instances. +@return total LRU list length. */ +MY_ATTRIBUTE((warn_unused_result)) +static +ulint +buf_get_total_LRU_list_length(void) +{ + ulint result = 0; + + for (ulint i = 0; i < srv_buf_pool_instances; i++) { + + result += UT_LIST_GET_LEN(buf_pool_from_array(i)->LRU); + } + + return result; +} + /*********************************************************************//** Adjust the desired page cleaner thread sleep time for LRU flushes. */ MY_ATTRIBUTE((nonnull)) @@ -2694,8 +2717,9 @@ page_cleaner_adapt_lru_sleep_time( ulint lru_n_flushed) /*!< in: number of flushed in previous batch */ { - ulint free_len = buf_get_total_free_list_length(); - ulint max_free_len = srv_LRU_scan_depth * srv_buf_pool_instances; + ulint free_len = buf_get_total_free_list_length(); + ulint max_free_len = ut_min(buf_get_total_LRU_list_length(), + srv_LRU_scan_depth * srv_buf_pool_instances); if (free_len < max_free_len / 100 && lru_n_flushed) { @@ -2707,7 +2731,7 @@ page_cleaner_adapt_lru_sleep_time( /* Free lists filled more than 20% or no pages flushed in previous batch, sleep a bit more */ - *lru_sleep_time += 50; + *lru_sleep_time += 1; if (*lru_sleep_time > srv_cleaner_max_lru_time) *lru_sleep_time = srv_cleaner_max_lru_time; } else if (free_len < max_free_len / 20 && *lru_sleep_time >= 50) { @@ -2754,6 +2778,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); ulint next_loop_time = ut_time_ms() + 1000; ulint n_flushed = 0; ulint last_activity = srv_get_activity_count(); @@ -2886,6 +2911,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)( thread_exit: buf_page_cleaner_is_active = FALSE; + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ os_thread_exit(NULL); diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc index bc46bcab63b..a1cfeb3860f 100644 --- a/storage/xtradb/dict/dict0dict.cc +++ b/storage/xtradb/dict/dict0dict.cc @@ -888,6 +888,12 @@ dict_index_get_nth_col_or_prefix_pos( ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + ut_ad((inc_prefix && !prefix_col_pos) || (!inc_prefix)); + + if (!prefix_col_pos) { + prefix_col_pos = &prefixed_pos_dummy; + } + *prefix_col_pos = ULINT_UNDEFINED; if (!prefix_col_pos) { prefix_col_pos = &prefixed_pos_dummy; diff --git a/storage/xtradb/dict/dict0mem.cc b/storage/xtradb/dict/dict0mem.cc index fa7177c5137..125d7d78a1f 100644 --- a/storage/xtradb/dict/dict0mem.cc +++ b/storage/xtradb/dict/dict0mem.cc @@ -320,8 +320,8 @@ dict_mem_table_col_rename_low( ut_ad(from_len <= NAME_LEN); ut_ad(to_len <= NAME_LEN); - char from[NAME_LEN]; - strncpy(from, s, NAME_LEN); + char from[NAME_LEN + 1]; + strncpy(from, s, NAME_LEN + 1); if (from_len == to_len) { /* The easy case: simply replace the column name in diff --git a/storage/xtradb/dict/dict0stats.cc b/storage/xtradb/dict/dict0stats.cc index d2e9a2618c0..c1463e98ce0 100644 --- a/storage/xtradb/dict/dict0stats.cc +++ b/storage/xtradb/dict/dict0stats.cc @@ -1168,8 +1168,9 @@ dict_stats_analyze_index_level( leaf-level delete marks because delete marks on non-leaf level do not make sense. */ - if (level == 0 && srv_stats_include_delete_marked? 0: - rec_get_deleted_flag( + if (level == 0 + && !srv_stats_include_delete_marked + && rec_get_deleted_flag( rec, page_is_comp(btr_pcur_get_page(&pcur)))) { diff --git a/storage/xtradb/dict/dict0stats_bg.cc b/storage/xtradb/dict/dict0stats_bg.cc index 55d34ff6ae1..ec5efe0dd7c 100644 --- a/storage/xtradb/dict/dict0stats_bg.cc +++ b/storage/xtradb/dict/dict0stats_bg.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2012, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under @@ -530,6 +530,7 @@ extern "C" UNIV_INTERN os_thread_ret_t DECLARE_THREAD(dict_stats_thread)(void*) { + my_thread_init(); ut_a(!srv_read_only_mode); while (!SHUTTING_DOWN()) { @@ -556,6 +557,7 @@ DECLARE_THREAD(dict_stats_thread)(void*) srv_dict_stats_thread_active = false; + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit instead of return(). */ os_thread_exit(NULL); diff --git a/storage/xtradb/fil/fil0crypt.cc b/storage/xtradb/fil/fil0crypt.cc index e27e93244ae..b40fd1dedaa 100644 --- a/storage/xtradb/fil/fil0crypt.cc +++ b/storage/xtradb/fil/fil0crypt.cc @@ -1294,10 +1294,10 @@ struct rotate_thread_t { bool should_shutdown() const { switch (srv_shutdown_state) { case SRV_SHUTDOWN_NONE: - case SRV_SHUTDOWN_CLEANUP: return thread_no >= srv_n_fil_crypt_threads; - case SRV_SHUTDOWN_FLUSH_PHASE: + case SRV_SHUTDOWN_CLEANUP: return true; + case SRV_SHUTDOWN_FLUSH_PHASE: case SRV_SHUTDOWN_LAST_PHASE: case SRV_SHUTDOWN_EXIT_THREADS: break; diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc index f94b8acc715..09ba89459ad 100644 --- a/storage/xtradb/fil/fil0fil.cc +++ b/storage/xtradb/fil/fil0fil.cc @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2014, 2017, MariaDB Corporation. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2014, 2017, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -166,7 +166,8 @@ UNIV_INTERN extern uint srv_fil_crypt_rotate_key_age; UNIV_INTERN extern ib_mutex_t fil_crypt_threads_mutex; /** Determine if (i) is a user tablespace id or not. */ -# define fil_is_user_tablespace_id(i) ((i) > srv_undo_tablespaces_open) +# define fil_is_user_tablespace_id(i) (i != 0 \ + && !srv_is_undo_tablespace(i)) /** Determine if user has explicitly disabled fsync(). */ #ifndef __WIN__ @@ -621,7 +622,8 @@ fil_node_open_file( size_bytes = os_file_get_size(node->handle); ut_a(size_bytes != (os_offset_t) -1); - node->file_block_size = os_file_get_block_size(node->handle, node->name); + node->file_block_size = os_file_get_block_size( + node->handle, node->name); space->file_block_size = node->file_block_size; #ifdef UNIV_HOTBACKUP @@ -731,7 +733,8 @@ add_size: } if (node->file_block_size == 0) { - node->file_block_size = os_file_get_block_size(node->handle, node->name); + node->file_block_size = os_file_get_block_size( + node->handle, node->name); space->file_block_size = node->file_block_size; } @@ -2416,7 +2419,7 @@ UNIV_INTERN const char* fil_read_first_page( /*================*/ - os_file_t data_file, /*!< in: open data file */ + pfs_os_file_t data_file, /*!< in: open data file */ ibool one_read_already, /*!< in: TRUE if min and max parameters below already contain sensible data */ @@ -3740,7 +3743,7 @@ fil_open_linked_file( /*===============*/ const char* tablename, /*!< in: database/tablename */ char** remote_filepath,/*!< out: remote filepath */ - os_file_t* remote_file, /*!< out: remote file handle */ + pfs_os_file_t* remote_file, /*!< out: remote file handle */ ulint atomic_writes) /*!< in: atomic writes table option value */ { @@ -3803,7 +3806,8 @@ fil_create_new_single_table_tablespace( fil_encryption_t mode, /*!< in: encryption mode */ ulint key_id) /*!< in: encryption key_id */ { - os_file_t file; + pfs_os_file_t file; + ibool ret; dberr_t err; byte* buf2; @@ -6882,7 +6886,7 @@ fil_buf_block_init( } struct fil_iterator_t { - os_file_t file; /*!< File handle */ + pfs_os_file_t file; /*!< File handle */ const char* filepath; /*!< File path name */ os_offset_t start; /*!< From where to start */ os_offset_t end; /*!< Where to stop */ @@ -7086,8 +7090,7 @@ fil_iterate( 0,/* FIXME: compression level */ 512,/* FIXME: use proper block size */ encrypted, - &len, - NULL); + &len); updated = true; } @@ -7155,7 +7158,7 @@ fil_tablespace_iterate( PageCallback& callback) { dberr_t err; - os_file_t file; + pfs_os_file_t file; char* filepath; ut_a(n_io_buffers > 0); diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc index c377c19dd0d..2b6ae95640f 100644 --- a/storage/xtradb/fil/fil0pagecompress.cc +++ b/storage/xtradb/fil/fil0pagecompress.cc @@ -99,17 +99,16 @@ fil_compress_page( ulint level, /* in: compression level */ ulint block_size, /*!< in: block size */ bool encrypted, /*!< in: is page also encrypted */ - ulint* out_len, /*!< out: actual length of compressed + ulint* out_len) /*!< out: actual length of compressed page */ - byte* lzo_mem) /*!< in: temporal memory used by LZO */ { int err = Z_OK; int comp_level = level; ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE; - ulint write_size=0; + ulint write_size = 0; /* Cache to avoid change during function execution */ ulint comp_method = innodb_compression_algorithm; - bool allocated=false; + bool allocated = false; /* page_compression does not apply to tables or tablespaces that use ROW_FORMAT=COMPRESSED */ @@ -121,13 +120,23 @@ fil_compress_page( if (!out_buf) { allocated = true; - out_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE)); -#ifdef HAVE_LZO + ulint size = UNIV_PAGE_SIZE; + + /* Both snappy and lzo compression methods require that + output buffer used for compression is bigger than input + buffer. Increase the allocated buffer size accordingly. */ +#if HAVE_SNAPPY + if (comp_method == PAGE_SNAPPY_ALGORITHM) { + size = snappy_max_compressed_length(size); + } +#endif +#if HAVE_LZO if (comp_method == PAGE_LZO_ALGORITHM) { - lzo_mem = static_cast<byte *>(ut_malloc(LZO1X_1_15_MEM_COMPRESS)); - memset(lzo_mem, 0, LZO1X_1_15_MEM_COMPRESS); + size += LZO1X_1_15_MEM_COMPRESS; } #endif + + out_buf = static_cast<byte *>(ut_malloc(size)); } ut_ad(buf); @@ -163,6 +172,7 @@ fil_compress_page( switch(comp_method) { #ifdef HAVE_LZ4 case PAGE_LZ4_ALGORITHM: + #ifdef HAVE_LZ4_COMPRESS_DEFAULT err = LZ4_compress_default((const char *)buf, (char *)out_buf+header_len, len, write_size); @@ -197,7 +207,7 @@ fil_compress_page( #ifdef HAVE_LZO case PAGE_LZO_ALGORITHM: err = lzo1x_1_15_compress( - buf, len, out_buf+header_len, &write_size, lzo_mem); + buf, len, out_buf+header_len, &write_size, out_buf+UNIV_PAGE_SIZE); if (err != LZO_E_OK || write_size > UNIV_PAGE_SIZE-header_len) { if (space && !space->printed_compression_failure) { @@ -288,6 +298,7 @@ fil_compress_page( case PAGE_SNAPPY_ALGORITHM: { snappy_status cstatus; + write_size = snappy_max_compressed_length(UNIV_PAGE_SIZE); cstatus = snappy_compress( (const char *)buf, @@ -443,11 +454,6 @@ fil_compress_page( err_exit: if (allocated) { ut_free(out_buf); -#ifdef HAVE_LZO - if (comp_method == PAGE_LZO_ALGORITHM) { - ut_free(lzo_mem); - } -#endif } return (buf); @@ -509,7 +515,7 @@ fil_decompress_page( ptype != FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED)) { ib_logf(IB_LOG_LEVEL_ERROR, "Corruption: We try to uncompress corrupted page" - " CRC %lu type %lu len %lu.", + " CRC " ULINTPF " type " ULINTPF " len " ULINTPF ".", mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM), mach_read_from_2(buf+FIL_PAGE_TYPE), len); @@ -533,7 +539,7 @@ fil_decompress_page( if (actual_size == 0 || actual_size > UNIV_PAGE_SIZE) { ib_logf(IB_LOG_LEVEL_ERROR, "Corruption: We try to uncompress corrupted page" - " actual size %lu compression %s.", + " actual size " ULINTPF " compression %s.", actual_size, fil_get_compression_alg_name(compression_alg)); fflush(stderr); if (return_error) { @@ -548,12 +554,9 @@ fil_decompress_page( *write_size = actual_size; } -#ifdef UNIV_PAGECOMPRESS_DEBUG - ib_logf(IB_LOG_LEVEL_INFO, - "Preparing for decompress for len %lu\n", - actual_size); -#endif /* UNIV_PAGECOMPRESS_DEBUG */ - + DBUG_PRINT("compress", + ("Preparing for decompress for len " ULINTPF ".", + actual_size)); switch(compression_alg) { case PAGE_ZLIB_ALGORITHM: @@ -565,7 +568,7 @@ fil_decompress_page( ib_logf(IB_LOG_LEVEL_ERROR, "Corruption: Page is marked as compressed" " but uncompress failed with error %d " - " size %lu len %lu.", + " size " ULINTPF " len " ULINTPF ".", err, actual_size, len); fflush(stderr); @@ -584,9 +587,10 @@ fil_decompress_page( if (err != (int)actual_size) { ib_logf(IB_LOG_LEVEL_ERROR, "Corruption: Page is marked as compressed" - " but decompression read only %d bytes " - " size %lu len %lu.", + " but uncompress failed with error %d " + " size " ULINTPF " len " ULINTPF ".", err, actual_size, len); + fflush(stderr); if (return_error) { @@ -598,16 +602,17 @@ fil_decompress_page( #endif /* HAVE_LZ4 */ #ifdef HAVE_LZO case PAGE_LZO_ALGORITHM: { - ulint olen=0; + ulint olen = 0; err = lzo1x_decompress((const unsigned char *)buf+header_len, actual_size,(unsigned char *)in_buf, &olen, NULL); if (err != LZO_E_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) { ib_logf(IB_LOG_LEVEL_ERROR, "Corruption: Page is marked as compressed" - " but decompression read only %ld bytes" - " size %lu len %lu.", - olen, actual_size, len); + " but uncompress failed with error %d " + " size " ULINTPF " len " ULINTPF ".", + err, actual_size, len); + fflush(stderr); if (return_error) { @@ -642,7 +647,7 @@ fil_decompress_page( ib_logf(IB_LOG_LEVEL_ERROR, "Corruption: Page is marked as compressed" " but decompression read only %ld bytes" - " size %lu len %lu.", + " size " ULINTPF "len " ULINTPF ".", dst_pos, actual_size, len); fflush(stderr); @@ -671,7 +676,7 @@ fil_decompress_page( ib_logf(IB_LOG_LEVEL_ERROR, "Corruption: Page is marked as compressed" " but decompression read only %du bytes" - " size %lu len %lu err %d.", + " size " ULINTPF " len " ULINTPF " err %d.", dst_pos, actual_size, len, err); fflush(stderr); @@ -687,7 +692,7 @@ fil_decompress_page( case PAGE_SNAPPY_ALGORITHM: { snappy_status cstatus; - ulint olen = 0; + ulint olen = UNIV_PAGE_SIZE; cstatus = snappy_uncompress( (const char *)(buf+header_len), @@ -695,11 +700,11 @@ fil_decompress_page( (char *)in_buf, (size_t*)&olen); - if (cstatus != SNAPPY_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) { + if (cstatus != SNAPPY_OK || olen != UNIV_PAGE_SIZE) { ib_logf(IB_LOG_LEVEL_ERROR, "Corruption: Page is marked as compressed" - " but decompression read only %lu bytes" - " size %lu len %lu err %d.", + " but decompression read only " ULINTPF " bytes" + " size " ULINTPF " len " ULINTPF " err %d.", olen, actual_size, len, (int)cstatus); fflush(stderr); @@ -708,6 +713,7 @@ fil_decompress_page( } ut_error; } + break; } #endif /* HAVE_SNAPPY */ @@ -733,8 +739,7 @@ fil_decompress_page( memcpy(buf, in_buf, len); error_return: - // Need to free temporal buffer if no buffer was given - if (page_buf == NULL) { + if (page_buf != in_buf) { ut_free(in_buf); } } diff --git a/storage/xtradb/fts/fts0que.cc b/storage/xtradb/fts/fts0que.cc index 2e335c1c255..f24973e26fb 100644 --- a/storage/xtradb/fts/fts0que.cc +++ b/storage/xtradb/fts/fts0que.cc @@ -953,6 +953,18 @@ fts_query_free_doc_ids( query->total_size -= SIZEOF_RBT_CREATE; } +/** +Free the query intersection +@param[in] query query instance */ +static +void +fts_query_free_intersection( + fts_query_t* query) +{ + fts_query_free_doc_ids(query, query->intersection); + query->intersection = NULL; +} + /*******************************************************************//** Add the word to the documents "list" of matching words from the query. We make a copy of the word from the query heap. */ @@ -1311,6 +1323,7 @@ fts_query_intersect( /* error is passed by 'query->error' */ if (query->error != DB_SUCCESS) { ut_ad(query->error == DB_FTS_EXCEED_RESULT_CACHE_LIMIT); + fts_query_free_intersection(query); return(query->error); } @@ -1339,6 +1352,8 @@ fts_query_intersect( ut_a(!query->multi_exist || (query->multi_exist && rbt_size(query->doc_ids) <= n_doc_ids)); + } else if (query->intersection != NULL) { + fts_query_free_intersection(query); } } @@ -1557,6 +1572,11 @@ fts_merge_doc_ids( query, ranking->doc_id, ranking->rank); if (query->error != DB_SUCCESS) { + if (query->intersection != NULL) + { + ut_a(query->oper == FTS_EXIST); + fts_query_free_intersection(query); + } DBUG_RETURN(query->error); } diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index cd2d84b4245..7383647496a 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2000, 2016, Oracle and/or its affiliates. +Copyright (c) 2000, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2013, 2017, MariaDB Corporation. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. @@ -876,6 +876,31 @@ innobase_purge_changed_page_bitmaps( /*================================*/ ulonglong lsn) __attribute__((unused)); /*!< in: LSN to purge files up to */ +/** Empty free list algorithm. +Checks if buffer pool is big enough to enable backoff algorithm. +InnoDB empty free list algorithm backoff requires free pages +from LRU for the best performance. +buf_LRU_buf_pool_running_out cancels query if 1/4 of +buffer pool belongs to LRU or freelist. +At the same time buf_flush_LRU_list_batch +keeps up to BUF_LRU_MIN_LEN in LRU. +In order to avoid deadlock baclkoff requires buffer pool +to be at least 4*BUF_LRU_MIN_LEN, +but flush peformance is bad because of trashing +and additional BUF_LRU_MIN_LEN pages are requested. +@param[in] algorithm desired algorithm from srv_empty_free_list_t +@return true if it's possible to enable backoff. */ +static inline +bool +innodb_empty_free_list_algorithm_allowed( + srv_empty_free_list_t algorithm) +{ + long long buf_pool_pages = srv_buf_pool_size / srv_page_size + / srv_buf_pool_instances; + + return(buf_pool_pages >= BUF_LRU_MIN_LEN * (4 + 1) + || algorithm != SRV_EMPTY_FREE_LIST_BACKOFF); +} /** Get the list of foreign keys referencing a specified table table. @@ -1692,28 +1717,6 @@ normalize_table_name_low( ibool set_lower_case); /* in: TRUE if we want to set name to lower case */ -/*************************************************************//** -Checks if buffer pool is big enough to enable backoff algorithm. -InnoDB empty free list algorithm backoff requires free pages -from LRU for the best performance. -buf_LRU_buf_pool_running_out cancels query if 1/4 of -buffer pool belongs to LRU or freelist. -At the same time buf_flush_LRU_list_batch -keeps up to BUF_LRU_MIN_LEN in LRU. -In order to avoid deadlock baclkoff requires buffer pool -to be at least 4*BUF_LRU_MIN_LEN, -but flush peformance is bad because of trashing -and additional BUF_LRU_MIN_LEN pages are requested. -@return true if it's possible to enable backoff. */ -static -bool -innodb_empty_free_list_algorithm_backoff_allowed( - srv_empty_free_list_t - algorithm, /*!< in: desired algorithm - from srv_empty_free_list_t */ - long long buf_pool_pages); /*!< in: total number - of pages inside buffer pool */ - #ifdef NOT_USED /*************************************************************//** Removes old archived transaction log files. @@ -3489,13 +3492,13 @@ innobase_convert_identifier( ibool file_id)/*!< in: TRUE=id is a table or database name; FALSE=id is an UTF-8 string */ { + char nz2[MAX_TABLE_NAME_LEN + 1]; const char* s = id; int q; if (file_id) { char nz[MAX_TABLE_NAME_LEN + 1]; - char nz2[MAX_TABLE_NAME_LEN + 1]; /* Decode the table name. The MySQL function expects a NUL-terminated string. The input and output strings @@ -4358,10 +4361,9 @@ innobase_change_buffering_inited_ok: srv_use_posix_fallocate = (ibool) innobase_use_fallocate; #endif /* Do not enable backoff algorithm for small buffer pool. */ - if (!innodb_empty_free_list_algorithm_backoff_allowed( + if (!innodb_empty_free_list_algorithm_allowed( static_cast<srv_empty_free_list_t>( - srv_empty_free_list_algorithm), - innobase_buffer_pool_size / srv_page_size)) { + srv_empty_free_list_algorithm))) { sql_print_information( "InnoDB: innodb_empty_free_list_algorithm " "has been changed to legacy " @@ -8140,17 +8142,31 @@ build_template_field( templ->rec_field_is_prefix = FALSE; if (dict_index_is_clust(index)) { + templ->rec_field_is_prefix = false; templ->rec_field_no = templ->clust_rec_field_no; templ->rec_prefix_field_no = ULINT_UNDEFINED; } else { - /* If we're in a secondary index, keep track - * of the original index position even if this - * is just a prefix index; we will use this - * later to avoid a cluster index lookup in - * some cases.*/ + /* If we're in a secondary index, keep track of the original + index position even if this is just a prefix index; we will use + this later to avoid a cluster index lookup in some cases.*/ templ->rec_field_no = dict_index_get_nth_col_pos(index, i, &templ->rec_prefix_field_no); + templ->rec_field_is_prefix + = (templ->rec_field_no == ULINT_UNDEFINED) + && (templ->rec_prefix_field_no != ULINT_UNDEFINED); +#ifdef UNIV_DEBUG + if (templ->rec_prefix_field_no != ULINT_UNDEFINED) + { + const dict_field_t* field = dict_index_get_nth_field( + index, + templ->rec_prefix_field_no); + ut_ad(templ->rec_field_is_prefix + == (field->prefix_len != 0)); + } else { + ut_ad(!templ->rec_field_is_prefix); + } +#endif } if (field->real_maybe_null()) { @@ -13080,7 +13096,8 @@ ha_innobase::delete_table( extension, in contrast to ::create */ normalize_table_name(norm_name, name); - if (srv_read_only_mode) { + if (srv_read_only_mode + || srv_force_recovery >= SRV_FORCE_NO_UNDO_LOG_SCAN) { DBUG_RETURN(HA_ERR_TABLE_READONLY); } else if (row_is_magic_monitor_table(norm_name) && check_global_access(thd, PROCESS_ACL)) { @@ -18605,15 +18622,17 @@ innodb_buffer_pool_evict_uncompressed(void) ut_ad(block->page.in_LRU_list); mutex_enter(&block->mutex); - if (!buf_LRU_free_page(&block->page, false)) { - mutex_exit(&block->mutex); - all_evicted = false; - } else { - mutex_exit(&block->mutex); + all_evicted = buf_LRU_free_page(&block->page, false); + mutex_exit(&block->mutex); + + if (all_evicted) { + mutex_enter(&buf_pool->LRU_list_mutex); - } + block = UT_LIST_GET_LAST(buf_pool->unzip_LRU); + } else { - block = prev_block; + block = prev_block; + } } mutex_exit(&buf_pool->LRU_list_mutex); @@ -19827,32 +19846,6 @@ wsrep_fake_trx_id( /*************************************************************//** -Empty free list algorithm. -Checks if buffer pool is big enough to enable backoff algorithm. -InnoDB empty free list algorithm backoff requires free pages -from LRU for the best performance. -buf_LRU_buf_pool_running_out cancels query if 1/4 of -buffer pool belongs to LRU or freelist. -At the same time buf_flush_LRU_list_batch -keeps up to BUF_LRU_MIN_LEN in LRU. -In order to avoid deadlock baclkoff requires buffer pool -to be at least 4*BUF_LRU_MIN_LEN, -but flush peformance is bad because of trashing -and additional BUF_LRU_MIN_LEN pages are requested. -@return true if it's possible to enable backoff. */ -static -bool -innodb_empty_free_list_algorithm_backoff_allowed( - srv_empty_free_list_t algorithm, /*!< in: desired algorithm - from srv_empty_free_list_t */ - long long buf_pool_pages) /*!< in: total number - of pages inside buffer pool */ -{ - return(buf_pool_pages >= BUF_LRU_MIN_LEN * (4 + 1) - || algorithm != SRV_EMPTY_FREE_LIST_BACKOFF); -} - -/*************************************************************//** Empty free list algorithm. This function is registered as a callback with MySQL. @return 0 for valid algorithm */ @@ -19893,13 +19886,11 @@ innodb_srv_empty_free_list_algorithm_validate( return(1); algorithm = static_cast<srv_empty_free_list_t>(algo); - if (!innodb_empty_free_list_algorithm_backoff_allowed( - algorithm, - innobase_buffer_pool_size / srv_page_size)) { + if (!innodb_empty_free_list_algorithm_allowed(algorithm)) { sql_print_warning( "InnoDB: innodb_empty_free_list_algorithm " "= 'backoff' requires at least" - " 20MB buffer pool.\n"); + " 20MB buffer pool instances.\n"); return(1); } diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index 9b4276efaa8..1899165ace0 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -1577,20 +1577,13 @@ directory (buf) to see it. Do not use from outside! */ typedef struct { bool reserved; /*!< true if this slot is reserved */ -#ifdef HAVE_LZO - byte* lzo_mem; /*!< Temporal memory used by LZO */ -#endif byte* crypt_buf; /*!< for encryption the data needs to be copied to a separate buffer before it's encrypted&written. this as a page can be read while it's being flushed */ - byte* crypt_buf_free; /*!< for encryption, allocated buffer - that is then alligned */ byte* comp_buf; /*!< for compression we need temporal buffer because page can be read while it's being flushed */ - byte* comp_buf_free; /*!< for compression, allocated - buffer that is then alligned */ byte* out_buf; /*!< resulting buffer after encryption/compression. This is a pointer and not allocated. */ diff --git a/storage/xtradb/include/buf0dblwr.h b/storage/xtradb/include/buf0dblwr.h index 5582778825c..8e1b00db83c 100644 --- a/storage/xtradb/include/buf0dblwr.h +++ b/storage/xtradb/include/buf0dblwr.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under @@ -56,7 +56,7 @@ recovery, this function loads the pages from double write buffer into memory. */ void buf_dblwr_init_or_load_pages( /*=========================*/ - os_file_t file, + pfs_os_file_t file, char* path, bool load_corrupt_pages); diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index 6da8eb892d9..0290b884ece 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -1192,7 +1192,7 @@ dict_index_get_nth_col_pos( const dict_index_t* index, /*!< in: index */ ulint n, /*!< in: column number */ ulint* prefix_col_pos) /*!< out: col num if prefix */ - __attribute__((nonnull(1), warn_unused_result)); + MY_ATTRIBUTE((nonnull(1), warn_unused_result)); /********************************************************************//** Looks for column n in an index. @return position in internal representation of the index; @@ -1207,7 +1207,7 @@ dict_index_get_nth_col_or_prefix_pos( column prefixes too */ ulint* prefix_col_pos) /*!< out: col num if prefix */ - __attribute__((nonnull(1), warn_unused_result)); + MY_ATTRIBUTE((nonnull(1), warn_unused_result)); /********************************************************************//** Returns TRUE if the index contains a column or a prefix of that column. @return TRUE if contains the column or its prefix */ diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h index 6b8f164f291..a4e147b9797 100644 --- a/storage/xtradb/include/fil0fil.h +++ b/storage/xtradb/include/fil0fil.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2013, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -210,7 +210,7 @@ struct fsp_open_info { ibool success; /*!< Has the tablespace been opened? */ const char* check_msg; /*!< fil_check_first_page() message */ ibool valid; /*!< Is the tablespace valid? */ - os_file_t file; /*!< File handle */ + pfs_os_file_t file; /*!< File handle */ char* filepath; /*!< File path to open */ lsn_t lsn; /*!< Flushed LSN from header page */ ulint id; /*!< Space ID */ @@ -228,7 +228,7 @@ struct fil_node_t { belongs */ char* name; /*!< path to the file */ ibool open; /*!< TRUE if file open */ - os_file_t handle; /*!< OS handle to the file, if file open */ + pfs_os_file_t handle; /*!< OS handle to the file, if file open */ os_event_t sync_event;/*!< Condition event to group and serialize calls to fsync; os_event_set() and os_event_reset() @@ -808,7 +808,7 @@ UNIV_INTERN const char* fil_read_first_page( /*================*/ - os_file_t data_file, /*!< in: open data file */ + pfs_os_file_t data_file, /*!< in: open data file */ ibool one_read_already, /*!< in: TRUE if min and max parameters below already contain sensible data */ @@ -1340,12 +1340,12 @@ struct PageCallback { Called for every page in the tablespace. If the page was not updated then its state must be set to BUF_PAGE_NOT_USED. For compressed tables the page descriptor memory will be at offset: - block->frame + UNIV_PAGE_SIZE; + block->frame + UNIV_PAGE_SIZE; @param offset - physical offset within the file @param block - block read from file, note it is not from the buffer pool @retval DB_SUCCESS or error code. */ virtual dberr_t operator()( - os_offset_t offset, + os_offset_t offset, buf_block_t* block) UNIV_NOTHROW = 0; /** @@ -1353,7 +1353,7 @@ struct PageCallback { to open it for the file that is being iterated over. @param filename - then physical name of the tablespace file. @param file - OS file handle */ - void set_file(const char* filename, os_file_t file) UNIV_NOTHROW + void set_file(const char* filename, pfs_os_file_t file) UNIV_NOTHROW { m_file = file; m_filepath = filename; @@ -1389,7 +1389,7 @@ struct PageCallback { ulint m_page_size; /** File handle to the tablespace */ - os_file_t m_file; + pfs_os_file_t m_file; /** Physical file path. */ const char* m_filepath; diff --git a/storage/xtradb/include/fil0pagecompress.h b/storage/xtradb/include/fil0pagecompress.h index 73667c5420e..03e16699ce3 100644 --- a/storage/xtradb/include/fil0pagecompress.h +++ b/storage/xtradb/include/fil0pagecompress.h @@ -65,9 +65,8 @@ fil_compress_page( ulint level, /* in: compression level */ ulint block_size, /*!< in: block size */ bool encrypted, /*!< in: is page also encrypted */ - ulint* out_len, /*!< out: actual length of compressed + ulint* out_len); /*!< out: actual length of compressed page */ - byte* lzo_mem); /*!< in: temporal memory used by LZO */ /****************************************************************//** For page compressed pages decompress the page after actual read diff --git a/storage/xtradb/include/ha0ha.h b/storage/xtradb/include/ha0ha.h index 7351b407e8c..58eb581e76a 100644 --- a/storage/xtradb/include/ha0ha.h +++ b/storage/xtradb/include/ha0ha.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -107,7 +107,7 @@ chosen to be a slightly bigger prime number. @param level in: level of the mutexes in the latching order @param n_m in: number of mutexes to protect the hash table; must be a power of 2, or 0 */ -# define ha_create(n_c,n_m,type,level) ha_create_func(n_c,level,n_m,type) +# define ib_create(n_c,n_m,type,level) ha_create_func(n_c,level,n_m,type) #else /* UNIV_SYNC_DEBUG */ /** Creates a hash table. @return own: created table @@ -116,7 +116,7 @@ chosen to be a slightly bigger prime number. @param level in: level of the mutexes in the latching order @param n_m in: number of mutexes to protect the hash table; must be a power of 2, or 0 */ -# define ha_create(n_c,n_m,type,level) ha_create_func(n_c,n_m,type) +# define ib_create(n_c,n_m,type,level) ha_create_func(n_c,n_m,type) #endif /* UNIV_SYNC_DEBUG */ /*************************************************************//** diff --git a/storage/xtradb/include/log0online.h b/storage/xtradb/include/log0online.h index 722336dd6b4..5c3e7d07fd9 100644 --- a/storage/xtradb/include/log0online.h +++ b/storage/xtradb/include/log0online.h @@ -130,7 +130,7 @@ log_online_bitmap_iterator_next( /** Struct for single bitmap file information */ struct log_online_bitmap_file_struct { char name[FN_REFLEN]; /*!< Name with full path */ - os_file_t file; /*!< Handle to opened file */ + pfs_os_file_t file; /*!< Handle to opened file */ ib_uint64_t size; /*!< Size of the file */ os_offset_t offset; /*!< Offset of the next read, or count of already-read bytes diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index 06bb6a6fbac..b17e09cf0fa 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -1,6 +1,6 @@ /*********************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. Copyright (c) 2013, 2017, MariaDB Corporation. @@ -65,26 +65,54 @@ extern ibool os_aio_print_debug; /** File offset in bytes */ typedef ib_uint64_t os_offset_t; -#ifdef __WIN__ -#define SRV_PATH_SEPARATOR '\\' +#ifdef _WIN32 +# define SRV_PATH_SEPARATOR '\\' /** File handle */ -# define os_file_t HANDLE -# define os_file_invalid INVALID_HANDLE_VALUE +typedef HANDLE os_file_t; /** Convert a C file descriptor to a native file handle @param fd file descriptor @return native file handle */ -# define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd) +# define OS_FILE_FROM_FD(fd) reinterpret_cast<HANDLE>(_get_osfhandle(fd)) #else -#define SRV_PATH_SEPARATOR '/' +# define SRV_PATH_SEPARATOR '/' /** File handle */ typedef int os_file_t; -# define os_file_invalid (-1) /** Convert a C file descriptor to a native file handle @param fd file descriptor @return native file handle */ # define OS_FILE_FROM_FD(fd) fd #endif +/** File descriptor with optional PERFORMANCE_SCHEMA instrumentation */ +struct pfs_os_file_t +{ + /** Default constructor */ + pfs_os_file_t() : m_file( +#ifdef _WIN32 + INVALID_HANDLE_VALUE +#else + -1 +#endif + ) +#ifdef UNIV_PFS_IO + , m_psi(NULL) +#endif + {} + + /** The wrapped file handle */ + os_file_t m_file; +#ifdef UNIV_PFS_IO + /** PERFORMANCE_SCHEMA descriptor */ + struct PSI_file *m_psi; +#endif + /** Implicit type conversion. + @return the wrapped file handle */ + operator os_file_t() const { return m_file; } + /** Assignment operator. + @param[in] file file handle to be assigned */ + void operator=(os_file_t file) { m_file = file; } +}; + /** Umask for creating files */ extern ulint os_innodb_umask; @@ -120,6 +148,21 @@ enum os_file_create_t { ON_ERROR_NO_EXIT is set */ }; +/** Options for os_file_advise_func @{ */ +enum os_file_advise_t { + OS_FILE_ADVISE_NORMAL = 1, /*!< no advice on access pattern + (default) */ + OS_FILE_ADVISE_RANDOM = 2, /*!< access in random order */ + OS_FILE_ADVISE_SEQUENTIAL = 4, /*!< access the specified data + sequentially (with lower offsets read + before higher ones) */ + OS_FILE_ADVISE_WILLNEED = 8, /*!< specified data will be accessed + in the near future */ + OS_FILE_ADVISE_DONTNEED = 16, /*!< specified data will not be + accessed in the near future */ + OS_FILE_ADVISE_NOREUSE = 32 /*!< access only once */ +}; + #define OS_FILE_READ_ONLY 333 #define OS_FILE_READ_WRITE 444 #define OS_FILE_READ_ALLOW_DELETE 555 /* for mysqlbackup */ @@ -221,6 +264,8 @@ extern mysql_pfs_key_t innodb_file_bmp_key; various file I/O operations with performance schema. 1) register_pfs_file_open_begin() and register_pfs_file_open_end() are used to register file creation, opening, closing and renaming. +2) register_pfs_file_rename_begin() and register_pfs_file_rename_end() +are used to register file renaming 2) register_pfs_file_io_begin() and register_pfs_file_io_end() are used to register actual file read, write and flush 3) register_pfs_file_close_begin() and register_pfs_file_close_end() @@ -230,17 +275,30 @@ are used to register file deletion operations*/ do { \ locker = PSI_FILE_CALL(get_thread_file_name_locker)( \ state, key, op, name, &locker); \ - if (UNIV_LIKELY(locker != NULL)) { \ + if (locker != NULL) { \ PSI_FILE_CALL(start_file_open_wait)( \ locker, src_file, src_line); \ } \ } while (0) -# define register_pfs_file_open_end(locker, file) \ +# define register_pfs_file_open_end(locker, file, result) \ do { \ - if (UNIV_LIKELY(locker != NULL)) { \ - PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)(\ - locker, file); \ + if (locker != NULL) { \ + file.m_psi = PSI_FILE_CALL( \ + end_file_open_wait)( \ + locker, result); \ + } \ +} while (0) + +# define register_pfs_file_rename_begin(state, locker, key, op, name, \ + src_file, src_line) \ + register_pfs_file_open_begin(state, locker, key, op, name, \ + src_file, src_line) \ + +# define register_pfs_file_rename_end(locker, result) \ +do { \ + if (locker != NULL) { \ + PSI_FILE_CALL(end_file_open_wait)(locker, result); \ } \ } while (0) @@ -266,9 +324,9 @@ do { \ # define register_pfs_file_io_begin(state, locker, file, count, op, \ src_file, src_line) \ do { \ - locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( \ - state, file, op); \ - if (UNIV_LIKELY(locker != NULL)) { \ + locker = PSI_FILE_CALL(get_thread_file_stream_locker)( \ + state, file.m_psi, op); \ + if (locker != NULL) { \ PSI_FILE_CALL(start_file_wait)( \ locker, count, src_file, src_line); \ } \ @@ -276,7 +334,7 @@ do { \ # define register_pfs_file_io_end(locker, count) \ do { \ - if (UNIV_LIKELY(locker != NULL)) { \ + if (locker != NULL) { \ PSI_FILE_CALL(end_file_wait)(locker, count); \ } \ } while (0) @@ -290,11 +348,16 @@ os_file_create os_file_create_simple os_file_create_simple_no_error_handling os_file_close +os_file_close_no_error_handling os_file_rename os_aio os_file_read os_file_read_no_error_handling +os_file_read_no_error_handling_int_fd os_file_write +os_file_write_int_fd +os_file_set_eof_at +os_file_allocate The wrapper functions have the prefix of "innodb_". */ @@ -315,6 +378,9 @@ The wrapper functions have the prefix of "innodb_". */ # define os_file_close(file) \ pfs_os_file_close_func(file, __FILE__, __LINE__) +# define os_file_close_no_error_handling(file) \ + pfs_os_file_close_no_error_handling_func(file, __FILE__, __LINE__) + # define os_aio(type, is_log, mode, name, file, buf, offset, \ n, page_size, message1, message2, space_id, \ trx, write_size) \ @@ -334,9 +400,18 @@ The wrapper functions have the prefix of "innodb_". */ pfs_os_file_read_no_error_handling_func(file, buf, offset, n, \ __FILE__, __LINE__) -# define os_file_write(name, file, buf, offset, n) \ - pfs_os_file_write_func(name, file, buf, offset, n, \ - __FILE__, __LINE__) +# define os_file_read_no_error_handling_int_fd( \ + file, buf, offset, n) \ + pfs_os_file_read_no_error_handling_int_fd_func( \ + file, buf, offset, n, __FILE__, __LINE__) + +# define os_file_write(name, file, buf, offset, n) \ + pfs_os_file_write_func(name, file, buf, offset, \ + n, __FILE__, __LINE__) + +# define os_file_write_int_fd(name, file, buf, offset, n) \ + pfs_os_file_write_int_fd_func(name, file, buf, offset, \ + n, __FILE__, __LINE__) # define os_file_flush(file) \ pfs_os_file_flush_func(file, __FILE__, __LINE__) @@ -349,6 +424,15 @@ The wrapper functions have the prefix of "innodb_". */ # define os_file_delete_if_exists(key, name) \ pfs_os_file_delete_if_exists_func(key, name, __FILE__, __LINE__) + +# define os_file_set_eof_at(file, new_len) \ + pfs_os_file_set_eof_at_func(file, new_len, __FILE__, __LINE__) + +# ifdef HAVE_POSIX_FALLOCATE +# define os_file_allocate(file, offset, len) \ + pfs_os_file_allocate_func(file, offset, len, __FILE__, __LINE__) +# endif + #else /* UNIV_PFS_IO */ /* If UNIV_PFS_IO is not defined, these I/O APIs point @@ -364,7 +448,11 @@ to original un-instrumented file I/O APIs */ os_file_create_simple_no_error_handling_func( \ name, create_mode, access, success, atomic_writes) -# define os_file_close(file) os_file_close_func(file) +# define os_file_close(file) \ + os_file_close_func(file) + +# define os_file_close_no_error_handling(file) \ + os_file_close_no_error_handling_func(file) # define os_aio(type, is_log, mode, name, file, buf, offset, n, page_size, message1, \ message2, space_id, trx, write_size) \ @@ -379,11 +467,17 @@ to original un-instrumented file I/O APIs */ # define os_file_read_no_error_handling(file, buf, offset, n) \ os_file_read_no_error_handling_func(file, buf, offset, n) +# define os_file_read_no_error_handling_int_fd( \ + file, buf, offset, n) \ + os_file_read_no_error_handling_func(file, buf, offset, n) +# define os_file_write_int_fd(name, file, buf, offset, n) \ + os_file_write_func(name, file, buf, offset, n) # define os_file_write(name, file, buf, offset, n) \ os_file_write_func(name, file, buf, offset, n) -# define os_file_flush(file) os_file_flush_func(file) + +# define os_file_flush(file) os_file_flush_func(file) # define os_file_rename(key, oldpath, newpath) \ os_file_rename_func(oldpath, newpath) @@ -393,6 +487,9 @@ to original un-instrumented file I/O APIs */ # define os_file_delete_if_exists(key, name) \ os_file_delete_if_exists_func(name) +# define os_file_set_eof_at(file, new_len) \ + os_file_set_eof_at_func(file, new_len) + #endif /* UNIV_PFS_IO */ /* File types for directory entry data type */ @@ -530,7 +627,7 @@ A simple function to open or create a file. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INTERN -os_file_t +pfs_os_file_t os_file_create_simple_no_error_handling_func( /*=========================================*/ const char* name, /*!< in: name of the file or path as a @@ -565,7 +662,7 @@ Opens an existing file or creates a new. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INTERN -os_file_t +pfs_os_file_t os_file_create_func( /*================*/ const char* name, /*!< in: name of the file or path as a @@ -626,6 +723,42 @@ ibool os_file_close_func( /*===============*/ os_file_t file); /*!< in, own: handle to a file */ +/***********************************************************************//** +NOTE! Use the corresponding macro os_file_close(), not directly this +function! +Closes a file handle. In case of error, error number can be retrieved with +os_file_get_last_error. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_close_no_error_handling_func( +/*===============*/ + os_file_t file); /*!< in, own: handle to a file */ + +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_set_eof_at(), not +directly this function! +Truncates a file at the specified position. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_set_eof_at_func( + os_file_t file, /*!< in: handle to a file */ + ib_uint64_t new_len);/*!< in: new file length */ + +#ifdef HAVE_POSIX_FALLOCATE +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_allocate(), not +directly this function! +Ensures that disk space is allocated for the file. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_allocate_func( + os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len); /*!< in: file region length */ +#endif #ifdef UNIV_PFS_IO /****************************************************************//** @@ -636,7 +769,7 @@ os_file_create_simple() which opens or creates a file. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_simple_func( /*===========================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -661,7 +794,7 @@ monitor file creation/open. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_simple_no_error_handling_func( /*=============================================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -687,7 +820,7 @@ Add instrumentation to monitor file creation/open. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_func( /*====================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -718,7 +851,20 @@ UNIV_INLINE ibool pfs_os_file_close_func( /*===================*/ - os_file_t file, /*!< in, own: handle to a file */ + pfs_os_file_t file, /*!< in, own: handle to a file */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_close_no_error_handling(), +not directly this function! +A performance schema instrumented wrapper function for +os_file_close_no_error_handling(). +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_close_no_error_handling_func( +/*===================*/ + pfs_os_file_t file, /*!< in, own: handle to a file */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ /*******************************************************************//** @@ -731,7 +877,7 @@ UNIV_INLINE ibool pfs_os_file_read_func( /*==================*/ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ @@ -750,7 +896,7 @@ UNIV_INLINE ibool pfs_os_file_read_no_error_handling_func( /*====================================*/ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ @@ -772,7 +918,7 @@ pfs_os_aio_func( ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read or from which to write */ os_offset_t offset, /*!< in: file offset where to read or write */ @@ -807,7 +953,7 @@ pfs_os_file_write_func( /*===================*/ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ const void* buf, /*!< in: buffer from which to write */ os_offset_t offset, /*!< in: file offset where to write */ ulint n, /*!< in: number of bytes to write */ @@ -824,7 +970,7 @@ UNIV_INLINE ibool pfs_os_file_flush_func( /*===================*/ - os_file_t file, /*!< in, own: handle to a file */ + pfs_os_file_t file, /*!< in, own: handle to a file */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ @@ -876,16 +1022,66 @@ pfs_os_file_delete_if_exists_func( string */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line);/*!< in: line where the func invoked */ + +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_set_eof_at(), not +directly this function! +This is the performance schema instrumented wrapper function for +os_file_set_eof_at() +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_set_eof_at_func( + pfs_os_file_t file, /*!< in: handle to a file */ + ib_uint64_t new_len,/*!< in: new file length */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ + +#ifdef HAVE_POSIX_FALLOCATE +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_allocate(), not +directly this function! +Ensures that disk space is allocated for the file. +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_allocate_func( + pfs_os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len, /*!< in: file region length */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line);/*!< in: line where the func invoked */ +#endif + #endif /* UNIV_PFS_IO */ /***********************************************************************//** -Closes a file handle. -@return TRUE if success */ +Checks if the file is marked as invalid. +@return TRUE if invalid */ UNIV_INTERN -ibool -os_file_close_no_error_handling( -/*============================*/ - os_file_t file); /*!< in, own: handle to a file */ +bool +os_file_is_invalid( + pfs_os_file_t file); /*!< in, own: handle to a file */ + +/***********************************************************************//** +Marks the file as invalid. */ +UNIV_INTERN +void +os_file_mark_invalid( + pfs_os_file_t* file); /*!< out: pointer to a handle to a file */ + +/***********************************************************************//** +Announces an intention to access file data in a specific pattern in the +future. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_advise( + pfs_os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len, /*!< in: file region length */ + ulint advice);/*!< in: advice for access pattern */ + /***********************************************************************//** Gets a file size. @return file size, or (os_offset_t) -1 on failure */ @@ -893,7 +1089,7 @@ UNIV_INTERN os_offset_t os_file_get_size( /*=============*/ - os_file_t file) /*!< in: handle to a file */ + pfs_os_file_t file) /*!< in: handle to a file */ MY_ATTRIBUTE((warn_unused_result)); /** Set the size of a newly created file. @param[in] name file name @@ -905,7 +1101,7 @@ UNIV_INTERN bool os_file_set_size( const char* name, - os_file_t file, + pfs_os_file_t file, os_offset_t size, bool is_sparse = false) MY_ATTRIBUTE((nonnull, warn_unused_result)); @@ -918,14 +1114,6 @@ os_file_set_eof( /*============*/ FILE* file); /*!< in: file to be truncated */ /***********************************************************************//** -Truncates a file at the specified position. -@return TRUE if success */ -UNIV_INTERN -ibool -os_file_set_eof_at( - os_file_t file, /*!< in: handle to a file */ - ib_uint64_t new_len);/*!< in: new file length */ -/***********************************************************************//** NOTE! Use the corresponding macro os_file_flush(), not directly this function! Flushes the write buffers of a given file to the disk. @return TRUE if success */ @@ -1155,7 +1343,7 @@ os_aio_func( caution! */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read or from which to write */ os_offset_t offset, /*!< in: file offset where to read or write */ diff --git a/storage/xtradb/include/os0file.ic b/storage/xtradb/include/os0file.ic index b284d7ea9ac..72ac9d9dd6a 100644 --- a/storage/xtradb/include/os0file.ic +++ b/storage/xtradb/include/os0file.ic @@ -1,7 +1,7 @@ /***************************************************************************** -Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2013, SkySQL Ab. All Rights Reserved. +Copyright (c) 2010, 2017, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2013, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -35,7 +35,7 @@ os_file_create_simple() which opens or creates a file. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_simple_func( /*===========================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -50,7 +50,7 @@ pfs_os_file_create_simple_func( const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { - os_file_t file; + pfs_os_file_t file; struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; @@ -64,8 +64,9 @@ pfs_os_file_create_simple_func( file = os_file_create_simple_func(name, create_mode, access_type, success, atomic_writes); - /* Regsiter the returning "file" value with the system */ - register_pfs_file_open_end(locker, file); + /* Register psi value for the file */ + register_pfs_file_open_end(locker, file, + (*success == TRUE ? success : 0)); return(file); } @@ -79,7 +80,7 @@ monitor file creation/open. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_simple_no_error_handling_func( /*=============================================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -96,7 +97,7 @@ pfs_os_file_create_simple_no_error_handling_func( const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { - os_file_t file; + pfs_os_file_t file; struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; @@ -108,9 +109,10 @@ pfs_os_file_create_simple_no_error_handling_func( name, src_file, src_line); file = os_file_create_simple_no_error_handling_func( - name, create_mode, access_type, success, atomic_writes); + name, create_mode, access_type, success, atomic_writes); - register_pfs_file_open_end(locker, file); + register_pfs_file_open_end(locker, file, + (*success == TRUE ? success : 0)); return(file); } @@ -123,7 +125,7 @@ Add instrumentation to monitor file creation/open. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INLINE -os_file_t +pfs_os_file_t pfs_os_file_create_func( /*====================*/ mysql_pfs_key_t key, /*!< in: Performance Schema Key */ @@ -144,7 +146,7 @@ pfs_os_file_create_func( const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { - os_file_t file; + pfs_os_file_t file; struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; @@ -158,7 +160,8 @@ pfs_os_file_create_func( file = os_file_create_func(name, create_mode, purpose, type, success, atomic_writes); - register_pfs_file_open_end(locker, file); + register_pfs_file_open_end(locker, file, + (*success == TRUE ? success : 0)); return(file); } @@ -172,7 +175,7 @@ UNIV_INLINE ibool pfs_os_file_close_func( /*===================*/ - os_file_t file, /*!< in, own: handle to a file */ + pfs_os_file_t file, /*!< in, own: handle to a file */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -190,6 +193,34 @@ pfs_os_file_close_func( return(result); } +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_close_no_error_handling(), +not directly this function! +A performance schema instrumented wrapper function for +os_file_close_no_error_handling(). +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_close_no_error_handling_func( +/*===================*/ + pfs_os_file_t file, /*!< in, own: handle to a file */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + bool result; + struct PSI_file_locker* locker = NULL; + PSI_file_locker_state state; + + /* register the file close */ + register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CLOSE, + src_file, src_line); + + result = os_file_close_no_error_handling_func(file); + + register_pfs_file_io_end(locker, 0); + + return(result); +} /*******************************************************************//** NOTE! Please use the corresponding macro os_aio(), not directly this @@ -206,7 +237,7 @@ pfs_os_aio_func( ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read or from which to write */ os_offset_t offset, /*!< in: file offset where to read or write */ @@ -260,7 +291,7 @@ UNIV_INLINE ibool pfs_os_file_read_func( /*==================*/ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ @@ -294,7 +325,7 @@ UNIV_INLINE ibool pfs_os_file_read_no_error_handling_func( /*====================================*/ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read */ os_offset_t offset, /*!< in: file offset where to read */ ulint n, /*!< in: number of bytes to read */ @@ -315,6 +346,42 @@ pfs_os_file_read_no_error_handling_func( return(result); } +/** NOTE! Please use the corresponding macro +os_file_read_no_error_handling_int_fd(), not directly this function! +This is the performance schema instrumented wrapper function for +os_file_read_no_error_handling_int_fd_func() which requests a +synchronous read operation. +@return TRUE if request was successful, FALSE if fail */ +UNIV_INLINE +ibool +pfs_os_file_read_no_error_handling_int_fd_func( + int file, /*!< in: handle to a file */ + void* buf, /*!< in: buffer where to read */ + os_offset_t offset, /*!< in: file offset where to read */ + ulint n, /*!< in: number of bytes to read */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + PSI_file_locker_state state; + struct PSI_file_locker* locker; + + locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( + &state, file, PSI_FILE_READ); + if (locker != NULL) { + PSI_FILE_CALL(start_file_wait)( + locker, n, + __FILE__, __LINE__); + } + ibool result = os_file_read_no_error_handling_func( + OS_FILE_FROM_FD(file), buf, offset, n); + + if (locker != NULL) { + PSI_FILE_CALL(end_file_wait)(locker, n); + } + + return(result); +} + /*******************************************************************//** NOTE! Please use the corresponding macro os_file_write(), not directly this function! @@ -327,7 +394,7 @@ pfs_os_file_write_func( /*===================*/ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ const void* buf, /*!< in: buffer from which to write */ os_offset_t offset, /*!< in: file offset where to write */ ulint n, /*!< in: number of bytes to write */ @@ -348,6 +415,43 @@ pfs_os_file_write_func( return(result); } +/** NOTE! Please use the corresponding macro os_file_write(), not +directly this function! +This is the performance schema instrumented wrapper function for +os_file_write() which requests a synchronous write operation. +@return TRUE if request was successful, FALSE if fail */ +UNIV_INLINE +ibool +pfs_os_file_write_int_fd_func( + const char* name, /*!< in: name of the file or path as a + null-terminated string */ + int file, /*!< in: handle to a file */ + const void* buf, /*!< in: buffer from which to write */ + os_offset_t offset, /*!< in: file offset where to write */ + ulint n, /*!< in: number of bytes to write */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + PSI_file_locker_state state; + struct PSI_file_locker* locker = NULL; + + locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( + &state, file, PSI_FILE_WRITE); + if (locker != NULL) { + PSI_FILE_CALL(start_file_wait)( + locker, n, + __FILE__, __LINE__); + } + ibool result = os_file_write_func( + name, OS_FILE_FROM_FD(file), buf, offset, n); + + if (locker != NULL) { + PSI_FILE_CALL(end_file_wait)(locker, n); + } + + return(result); +} + /***********************************************************************//** NOTE! Please use the corresponding macro os_file_flush(), not directly this function! @@ -358,7 +462,7 @@ UNIV_INLINE ibool pfs_os_file_flush_func( /*===================*/ - os_file_t file, /*!< in, own: handle to a file */ + pfs_os_file_t file, /*!< in, own: handle to a file */ const char* src_file,/*!< in: file name where func invoked */ ulint src_line)/*!< in: line where the func invoked */ { @@ -396,12 +500,12 @@ pfs_os_file_rename_func( struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; - register_pfs_file_open_begin(&state, locker, key, PSI_FILE_RENAME, newpath, + register_pfs_file_rename_begin(&state, locker, key, PSI_FILE_RENAME, newpath, src_file, src_line); result = os_file_rename_func(oldpath, newpath); - register_pfs_file_open_end(locker, 0); + register_pfs_file_rename_end(locker, 0); return(result); } @@ -465,4 +569,61 @@ pfs_os_file_delete_if_exists_func( return(result); } + +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_set_eof_at(), not +directly this function! +This is the performance schema instrumented wrapper function for +os_file_set_eof_at() +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_set_eof_at_func( + pfs_os_file_t file, /*!< in: handle to a file */ + ib_uint64_t new_len,/*!< in: new file length */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + bool result; + struct PSI_file_locker* locker = NULL; + PSI_file_locker_state state; + + register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CHSIZE, + src_file, src_line); + result = os_file_set_eof_at_func(file, new_len); + + register_pfs_file_io_end(locker, 0); + + return(result); +} + +#ifdef HAVE_POSIX_FALLOCATE +/***********************************************************************//** +NOTE! Please use the corresponding macro os_file_allocate(), not +directly this function! +Ensures that disk space is allocated for the file. +@return TRUE if success */ +UNIV_INLINE +bool +pfs_os_file_allocate_func( + pfs_os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len, /*!< in: file region length */ + const char* src_file,/*!< in: file name where func invoked */ + ulint src_line)/*!< in: line where the func invoked */ +{ + bool result; + struct PSI_file_locker* locker = NULL; + PSI_file_locker_state state; + + register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CHSIZE, + src_file, src_line); + result = os_file_allocate_func(file, offset, len); + + register_pfs_file_io_end(locker, 0); + + return(result); +} +#endif + #endif /* UNIV_PFS_IO */ diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h index 2bd17980896..a8503a5cfda 100644 --- a/storage/xtradb/include/row0mysql.h +++ b/storage/xtradb/include/row0mysql.h @@ -614,7 +614,7 @@ struct mysql_row_templ_t { Innobase record in the current index; not defined if template_type is ROW_MYSQL_WHOLE_ROW */ - ibool rec_field_is_prefix; /* is this field in a prefix index? */ + bool rec_field_is_prefix; /* is this field in a prefix index? */ ulint rec_prefix_field_no; /* record field, even if just a prefix; same as rec_field_no when not a prefix, otherwise rec_field_no is diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index cf7824d91e7..4e98ce0f1cb 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All rights reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, 2009, Google Inc. Copyright (c) 2009, Percona Inc. Copyright (c) 2013, 2017, MariaDB Corporation. @@ -1096,6 +1096,13 @@ UNIV_INTERN void srv_purge_wakeup(); +/** Check whether given space id is undo tablespace id +@param[in] space_id space id to check +@return true if it is undo tablespace else false. */ +bool +srv_is_undo_tablespace( + ulint space_id); + /** Status variables to be passed to MySQL */ struct export_var_t{ ulint innodb_adaptive_hash_hash_searches; diff --git a/storage/xtradb/include/srv0start.h b/storage/xtradb/include/srv0start.h index 963b767f0fb..a60776a4665 100644 --- a/storage/xtradb/include/srv0start.h +++ b/storage/xtradb/include/srv0start.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -139,6 +139,8 @@ extern ibool srv_startup_is_before_trx_rollback_phase; /** TRUE if a raw partition is in use */ extern ibool srv_start_raw_disk_in_use; +/** Undo tablespaces starts with space_id. */ +extern ulint srv_undo_space_id_start; /** Shutdown state */ enum srv_shutdown_state { diff --git a/storage/xtradb/include/trx0xa.h b/storage/xtradb/include/trx0xa.h index 7caddfb7ba4..4d5adc68dcd 100644 --- a/storage/xtradb/include/trx0xa.h +++ b/storage/xtradb/include/trx0xa.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -24,6 +24,8 @@ this program; if not, write to the Free Software Foundation, Inc., #ifndef XA_H #define XA_H +#include "handler.h" + /* * Transaction branch identification: XID and NULLXID: */ @@ -35,17 +37,6 @@ this program; if not, write to the Free Software Foundation, Inc., #define MAXGTRIDSIZE 64 /*!< maximum size in bytes of gtrid */ #define MAXBQUALSIZE 64 /*!< maximum size in bytes of bqual */ -/** X/Open XA distributed transaction identifier */ -struct xid_t { - long formatID; /*!< format identifier; -1 - means that the XID is null */ - long gtrid_length; /*!< value from 1 through 64 */ - long bqual_length; /*!< value from 1 through 64 */ - char data[XIDDATASIZE]; /*!< distributed transaction - identifier */ -}; -/** X/Open XA distributed transaction identifier */ -typedef struct xid_t XID; #endif /** X/Open XA distributed transaction status codes */ /* @{ */ diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 310053b9145..23c8c0a659d 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -45,10 +45,10 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 5 #define INNODB_VERSION_MINOR 6 -#define INNODB_VERSION_BUGFIX 35 +#define INNODB_VERSION_BUGFIX 36 #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 80.0 +#define PERCONA_INNODB_VERSION 82.0 #endif /* Enable UNIV_LOG_ARCHIVE in XtraDB */ @@ -146,14 +146,8 @@ HAVE_PSI_INTERFACE is defined. */ #if defined HAVE_PSI_INTERFACE && !defined UNIV_HOTBACKUP # define UNIV_PFS_MUTEX # define UNIV_PFS_RWLOCK -/* For I/O instrumentation, performance schema rely -on a native descriptor to identify the file, this -descriptor could conflict with our OS level descriptor. -Disable IO instrumentation on Windows until this is -resolved */ -# ifndef __WIN__ -# define UNIV_PFS_IO -# endif + +# define UNIV_PFS_IO # define UNIV_PFS_THREAD /* There are mutexes/rwlocks that we want to exclude from diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc index 309de7daaf8..d39bcb87117 100644 --- a/storage/xtradb/log/log0log.cc +++ b/storage/xtradb/log/log0log.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Google Inc. Copyright (c) 2014, 2017, MariaDB Corporation. @@ -2804,7 +2804,7 @@ log_group_archive( /*==============*/ log_group_t* group) /*!< in: log group */ { - os_file_t file_handle; + pfs_os_file_t file_handle; lsn_t start_lsn; lsn_t end_lsn; char name[OS_FILE_MAX_PATH]; @@ -3639,8 +3639,8 @@ wait_suspend_loop: switch (srv_get_active_thread_type()) { case SRV_NONE: - srv_shutdown_state = SRV_SHUTDOWN_FLUSH_PHASE; if (!srv_n_fil_crypt_threads_started) { + srv_shutdown_state = SRV_SHUTDOWN_FLUSH_PHASE; break; } os_event_set(fil_crypt_threads_event); @@ -3820,7 +3820,8 @@ wait_suspend_loop: ut_a(freed); ut_a(lsn == log_sys->lsn); - ut_ad(lsn == log_sys->last_checkpoint_lsn); + ut_ad(srv_force_recovery >= SRV_FORCE_NO_LOG_REDO + || lsn == log_sys->last_checkpoint_lsn); if (lsn < srv_start_lsn) { ib_logf(IB_LOG_LEVEL_ERROR, diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc index 74f2e2360a8..27382977e5c 100644 --- a/storage/xtradb/log/log0online.cc +++ b/storage/xtradb/log/log0online.cc @@ -329,7 +329,7 @@ log_online_read_last_tracked_lsn(void) lsn_t result; os_offset_t read_offset = log_bmp_sys->out.offset; - while (!checksum_ok && read_offset > 0 && !is_last_page) + while ((!checksum_ok || !is_last_page) && read_offset > 0) { read_offset -= MODIFIED_PAGE_BLOCK_SIZE; log_bmp_sys->out.offset = read_offset; @@ -554,9 +554,9 @@ log_online_rotate_bitmap_file( lsn_t next_file_start_lsn) /*!<in: the start LSN name part */ { - if (log_bmp_sys->out.file != os_file_invalid) { + if (!os_file_is_invalid(log_bmp_sys->out.file)) { os_file_close(log_bmp_sys->out.file); - log_bmp_sys->out.file = os_file_invalid; + os_file_mark_invalid(&log_bmp_sys->out.file); } log_bmp_sys->out_seq_num++; log_online_make_bitmap_name(next_file_start_lsn); @@ -723,7 +723,11 @@ log_online_read_init(void) } last_tracked_lsn = log_online_read_last_tracked_lsn(); + /* Do not rotate if we truncated the file to zero length - we + can just start writing there */ + const bool need_rotate = (last_tracked_lsn != 0); if (!last_tracked_lsn) { + last_tracked_lsn = last_file_start_lsn; } @@ -735,7 +739,8 @@ log_online_read_init(void) } else { file_start_lsn = tracking_start_lsn; } - ut_a(log_online_rotate_bitmap_file(file_start_lsn)); + ut_a(!need_rotate + || log_online_rotate_bitmap_file(file_start_lsn)); if (last_tracked_lsn < tracking_start_lsn) { @@ -773,9 +778,9 @@ log_online_read_shutdown(void) ib_rbt_node_t *free_list_node = log_bmp_sys->page_free_list; - if (log_bmp_sys->out.file != os_file_invalid) { + if (!os_file_is_invalid(log_bmp_sys->out.file)) { os_file_close(log_bmp_sys->out.file); - log_bmp_sys->out.file = os_file_invalid; + os_file_mark_invalid(&log_bmp_sys->out.file); } rbt_free(log_bmp_sys->modified_pages); @@ -1114,6 +1119,18 @@ log_online_write_bitmap_page( } }); + /* A crash injection site that ensures last checkpoint LSN > last + tracked LSN, so that LSN tracking for this interval is tested. */ + DBUG_EXECUTE_IF("crash_before_bitmap_write", + { + ulint space_id + = mach_read_from_4(block + + MODIFIED_PAGE_SPACE_ID); + if (space_id > 0) + DBUG_SUICIDE(); + }); + + ibool success = os_file_write(log_bmp_sys->out.name, log_bmp_sys->out.file, block, log_bmp_sys->out.offset, @@ -1137,10 +1154,8 @@ log_online_write_bitmap_page( return FALSE; } -#ifdef UNIV_LINUX - posix_fadvise(log_bmp_sys->out.file, log_bmp_sys->out.offset, - MODIFIED_PAGE_BLOCK_SIZE, POSIX_FADV_DONTNEED); -#endif + os_file_advise(log_bmp_sys->out.file, log_bmp_sys->out.offset, + MODIFIED_PAGE_BLOCK_SIZE, OS_FILE_ADVISE_DONTNEED); log_bmp_sys->out.offset += MODIFIED_PAGE_BLOCK_SIZE; return TRUE; @@ -1262,10 +1277,6 @@ log_online_follow_redo_log(void) group = UT_LIST_GET_NEXT(log_groups, group); } - /* A crash injection site that ensures last checkpoint LSN > last - tracked LSN, so that LSN tracking for this interval is tested. */ - DBUG_EXECUTE_IF("crash_before_bitmap_write", DBUG_SUICIDE();); - result = log_online_write_bitmap(); log_bmp_sys->start_lsn = log_bmp_sys->end_lsn; log_set_tracked_lsn(log_bmp_sys->start_lsn); @@ -1433,6 +1444,7 @@ log_online_setup_bitmap_file_range( if (UNIV_UNLIKELY(array_pos >= bitmap_files->count)) { log_online_diagnose_inconsistent_dir(bitmap_files); + os_file_closedir(bitmap_dir); return FALSE; } @@ -1535,10 +1547,8 @@ log_online_open_bitmap_file_read_only( bitmap_file->size = os_file_get_size(bitmap_file->file); bitmap_file->offset = 0; -#ifdef UNIV_LINUX - posix_fadvise(bitmap_file->file, 0, 0, POSIX_FADV_SEQUENTIAL); - posix_fadvise(bitmap_file->file, 0, 0, POSIX_FADV_NOREUSE); -#endif + os_file_advise(bitmap_file->file, 0, 0, OS_FILE_ADVISE_SEQUENTIAL); + os_file_advise(bitmap_file->file, 0, 0, OS_FILE_ADVISE_NOREUSE); return TRUE; } @@ -1624,7 +1634,7 @@ log_online_bitmap_iterator_init( /* Empty range */ i->in_files.count = 0; i->in_files.files = NULL; - i->in.file = os_file_invalid; + os_file_mark_invalid(&i->in.file); i->page = NULL; i->failed = FALSE; return TRUE; @@ -1642,7 +1652,7 @@ log_online_bitmap_iterator_init( if (i->in_files.count == 0) { /* Empty range */ - i->in.file = os_file_invalid; + os_file_mark_invalid(&i->in.file); i->page = NULL; i->failed = FALSE; return TRUE; @@ -1681,10 +1691,10 @@ log_online_bitmap_iterator_release( { ut_a(i); - if (i->in.file != os_file_invalid) { + if (!os_file_is_invalid(i->in.file)) { os_file_close(i->in.file); - i->in.file = os_file_invalid; + os_file_mark_invalid(&i->in.file); } if (i->in_files.files) { @@ -1738,8 +1748,9 @@ log_online_bitmap_iterator_next( /* Advance file */ i->in_i++; - success = os_file_close_no_error_handling(i->in.file); - i->in.file = os_file_invalid; + success = os_file_close_no_error_handling( + i->in.file); + os_file_mark_invalid(&i->in.file); if (UNIV_UNLIKELY(!success)) { os_file_get_last_error(TRUE); @@ -1848,7 +1859,7 @@ log_online_purge_changed_page_bitmaps( /* If we have to delete the current output file, close it first. */ os_file_close(log_bmp_sys->out.file); - log_bmp_sys->out.file = os_file_invalid; + os_file_mark_invalid(&log_bmp_sys->out.file); } for (i = 0; i < bitmap_files.count; i++) { diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc index 978e6051711..0b797fd6c23 100644 --- a/storage/xtradb/log/log0recv.cc +++ b/storage/xtradb/log/log0recv.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. Copyright (c) 2013, 2017, MariaDB Corporation. @@ -332,6 +332,7 @@ DECLARE_THREAD(recv_writer_thread)( /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); ut_ad(!srv_read_only_mode); #ifdef UNIV_PFS_THREAD @@ -362,6 +363,7 @@ DECLARE_THREAD(recv_writer_thread)( recv_writer_thread_active = false; + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc index 4f219b18428..34336a4bb7d 100644 --- a/storage/xtradb/os/os0file.cc +++ b/storage/xtradb/os/os0file.cc @@ -1,6 +1,6 @@ /*********************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Percona Inc. Copyright (c) 2013, 2017, MariaDB Corporation. @@ -114,10 +114,12 @@ my_umask */ #ifndef __WIN__ /** Umask for creating files */ UNIV_INTERN ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; +# define os_file_invalid (-1) #else /** Umask for creating files */ UNIV_INTERN ulint os_innodb_umask = 0; -#define ECANCELED 125 +# define ECANCELED 125 +# define os_file_invalid INVALID_HANDLE_VALUE #endif /* __WIN__ */ #ifndef UNIV_HOTBACKUP @@ -221,7 +223,7 @@ struct os_aio_slot_t{ ulint page_size; /*!< UNIV_PAGE_SIZE or zip_size */ os_offset_t offset; /*!< file offset in bytes */ - os_file_t file; /*!< file where to read or write */ + pfs_os_file_t file; /*!< file where to read or write */ const char* name; /*!< file name or path */ ibool io_already_done;/*!< used only in simulated aio: TRUE if the physical i/o already @@ -1568,7 +1570,7 @@ A simple function to open or create a file. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INTERN -os_file_t +pfs_os_file_t os_file_create_simple_no_error_handling_func( /*=========================================*/ const char* name, /*!< in: name of the file or path as a @@ -1584,7 +1586,7 @@ os_file_create_simple_no_error_handling_func( ulint atomic_writes) /*! in: atomic writes table option value */ { - os_file_t file; + pfs_os_file_t file; atomic_writes_t awrites = (atomic_writes_t) atomic_writes; *success = FALSE; @@ -1593,7 +1595,6 @@ os_file_create_simple_no_error_handling_func( DWORD create_flag; DWORD attributes = 0; DWORD share_mode = FILE_SHARE_READ; - ut_a(name); ut_a(!(create_mode & OS_FILE_ON_ERROR_SILENT)); @@ -1610,8 +1611,8 @@ os_file_create_simple_no_error_handling_func( ib_logf(IB_LOG_LEVEL_ERROR, "Unknown file create mode (%lu) for file '%s'", create_mode, name); - - return((os_file_t) -1); + file = INVALID_HANDLE_VALUE; + return(file); } if (access_type == OS_FILE_READ_ONLY) { @@ -1635,8 +1636,8 @@ os_file_create_simple_no_error_handling_func( ib_logf(IB_LOG_LEVEL_ERROR, "Unknown file access type (%lu) for file '%s'", access_type, name); - - return((os_file_t) -1); + file = INVALID_HANDLE_VALUE; + return(file); } if (IS_XTRABACKUP()) { @@ -1668,11 +1669,10 @@ os_file_create_simple_no_error_handling_func( } } - *success = (file != INVALID_HANDLE_VALUE); + *success = file != INVALID_HANDLE_VALUE; #else /* __WIN__ */ int create_flag; const char* mode_str = NULL; - ut_a(name); if (create_mode != OS_FILE_OPEN && create_mode != OS_FILE_OPEN_RAW) WAIT_ALLOW_WRITES(); @@ -1717,13 +1717,13 @@ os_file_create_simple_no_error_handling_func( ib_logf(IB_LOG_LEVEL_ERROR, "Unknown file create mode (%lu) for file '%s'", create_mode, name); - - return((os_file_t) -1); + file = -1; + return(file); } - file = ::open(name, create_flag, os_innodb_umask); + file = open(name, create_flag, os_innodb_umask); - *success = file == -1 ? FALSE : TRUE; + *success = file != -1; /* This function is always called for data files, we should disable OS caching (O_DIRECT) here as we do in os_file_create_func(), so @@ -1872,7 +1872,7 @@ Opens an existing file or creates a new. @return own: handle to the file, not defined if error, error number can be retrieved with os_file_get_last_error */ UNIV_INTERN -os_file_t +pfs_os_file_t os_file_create_func( /*================*/ const char* name, /*!< in: name of the file or path as a @@ -1890,7 +1890,7 @@ os_file_create_func( ulint atomic_writes) /*! in: atomic writes table option value */ { - os_file_t file; + pfs_os_file_t file; ibool retry; ibool on_error_no_exit; ibool on_error_silent; @@ -1901,14 +1901,16 @@ os_file_create_func( "ib_create_table_fail_disk_full", *success = FALSE; SetLastError(ERROR_DISK_FULL); - return((os_file_t) -1); + file = INVALID_HANDLE_VALUE; + return(file); ); #else /* __WIN__ */ DBUG_EXECUTE_IF( "ib_create_table_fail_disk_full", *success = FALSE; errno = ENOSPC; - return((os_file_t) -1); + file = -1; + return(file); ); #endif /* __WIN__ */ @@ -1962,7 +1964,8 @@ os_file_create_func( "Unknown file create mode (%lu) for file '%s'", create_mode, name); - return((os_file_t) -1); + file = INVALID_HANDLE_VALUE; + return(file); } DWORD attributes = 0; @@ -1986,8 +1989,8 @@ os_file_create_func( ib_logf(IB_LOG_LEVEL_ERROR, "Unknown purpose flag (%lu) while opening file '%s'", purpose, name); - - return((os_file_t)(-1)); + file = INVALID_HANDLE_VALUE; + return(file); } #ifdef UNIV_NON_BUFFERED_IO @@ -2113,7 +2116,8 @@ os_file_create_func( "Unknown file create mode (%lu) for file '%s'", create_mode, name); - return((os_file_t) -1); + file = -1; + return(file); } ut_a(type == OS_LOG_FILE || type == OS_DATA_FILE); @@ -2133,7 +2137,7 @@ os_file_create_func( #endif /* O_SYNC */ do { - file = ::open(name, create_flag, os_innodb_umask); + file = open(name, create_flag, os_innodb_umask); if (file == -1) { const char* operation; @@ -2442,8 +2446,8 @@ os_file_close_func( Closes a file handle. @return TRUE if success */ UNIV_INTERN -ibool -os_file_close_no_error_handling( +bool +os_file_close_no_error_handling_func( /*============================*/ os_file_t file) /*!< in, own: handle to a file */ { @@ -2453,10 +2457,10 @@ os_file_close_no_error_handling( ret = CloseHandle(file); if (ret) { - return(TRUE); + return(true); } - return(FALSE); + return(false); #else int ret; @@ -2464,10 +2468,83 @@ os_file_close_no_error_handling( if (ret == -1) { - return(FALSE); + return(false); } - return(TRUE); + return(true); +#endif /* __WIN__ */ +} + +#ifdef HAVE_POSIX_FALLOCATE +/***********************************************************************//** +Ensures that disk space is allocated for the file. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_allocate_func( + os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len) /*!< in: file region length */ +{ + return(posix_fallocate(file, offset, len) == 0); +} +#endif + +/***********************************************************************//** +Checks if the file is marked as invalid. +@return TRUE if invalid */ +UNIV_INTERN +bool +os_file_is_invalid( + pfs_os_file_t file) /*!< in, own: handle to a file */ +{ + return(file == os_file_invalid); +} + +/***********************************************************************//** +Marks the file as invalid. */ +UNIV_INTERN +void +os_file_mark_invalid( + pfs_os_file_t* file) /*!< out: pointer to a handle to a file */ +{ + file->m_file = os_file_invalid; +} + +/***********************************************************************//** +Announces an intention to access file data in a specific pattern in the +future. +@return TRUE if success */ +UNIV_INTERN +bool +os_file_advise( + pfs_os_file_t file, /*!< in, own: handle to a file */ + os_offset_t offset, /*!< in: file region offset */ + os_offset_t len, /*!< in: file region length */ + ulint advice)/*!< in: advice for access pattern */ +{ +#ifdef __WIN__ + return(true); +#else +#ifdef UNIV_LINUX + int native_advice = 0; + if ((advice & OS_FILE_ADVISE_NORMAL) != 0) + native_advice |= POSIX_FADV_NORMAL; + if ((advice & OS_FILE_ADVISE_RANDOM) != 0) + native_advice |= POSIX_FADV_RANDOM; + if ((advice & OS_FILE_ADVISE_SEQUENTIAL) != 0) + native_advice |= POSIX_FADV_SEQUENTIAL; + if ((advice & OS_FILE_ADVISE_WILLNEED) != 0) + native_advice |= POSIX_FADV_WILLNEED; + if ((advice & OS_FILE_ADVISE_DONTNEED) != 0) + native_advice |= POSIX_FADV_DONTNEED; + if ((advice & OS_FILE_ADVISE_NOREUSE) != 0) + native_advice |= POSIX_FADV_NOREUSE; + + return(posix_fadvise(file, offset, len, native_advice) == 0); +#else + return(true); +#endif #endif /* __WIN__ */ } @@ -2478,7 +2555,7 @@ UNIV_INTERN os_offset_t os_file_get_size( /*=============*/ - os_file_t file) /*!< in: handle to a file */ + pfs_os_file_t file) /*!< in: handle to a file */ { #ifdef __WIN__ os_offset_t offset; @@ -2496,6 +2573,7 @@ os_file_get_size( return(offset); #else return((os_offset_t) lseek(file, 0, SEEK_END)); + #endif /* __WIN__ */ } @@ -2509,7 +2587,7 @@ UNIV_INTERN bool os_file_set_size( const char* name, - os_file_t file, + pfs_os_file_t file, os_offset_t size, bool is_sparse) { @@ -2618,8 +2696,8 @@ os_file_set_eof( Truncates a file at the specified position. @return TRUE if success */ UNIV_INTERN -ibool -os_file_set_eof_at( +bool +os_file_set_eof_at_func( os_file_t file, /*!< in: handle to a file */ ib_uint64_t new_len)/*!< in: new file length */ { @@ -4553,7 +4631,7 @@ os_aio_array_reserve_slot( the aio operation */ void* message2,/*!< in: message to be passed along with the aio operation */ - os_file_t file, /*!< in: file handle */ + pfs_os_file_t file, /*!< in: file handle */ const char* name, /*!< in: name of the file or path as a null-terminated string */ void* buf, /*!< in: buffer where to read or from which @@ -4928,7 +5006,7 @@ os_aio_func( caution! */ const char* name, /*!< in: name of the file or path as a null-terminated string */ - os_file_t file, /*!< in: handle to a file */ + pfs_os_file_t file, /*!< in: handle to a file */ void* buf, /*!< in: buffer where to read or from which to write */ os_offset_t offset, /*!< in: file offset where to read or write */ @@ -4958,7 +5036,6 @@ os_aio_func( BOOL ret; #endif ulint wake_later; - ut_ad(buf); ut_ad(n > 0); ut_ad(n % OS_MIN_LOG_BLOCK_SIZE == 0); @@ -5205,7 +5282,6 @@ os_aio_windows_handle( break; } } - *message1 = slot->message1; *message2 = slot->message2; @@ -5229,12 +5305,14 @@ os_aio_windows_handle( switch (slot->type) { case OS_FILE_WRITE: - ret_val = os_file_write(slot->name, slot->file, slot->buf, - slot->offset, slot->len); + ret_val = os_file_write( + slot->name, slot->file, slot->buf, + slot->offset, slot->len); break; case OS_FILE_READ: - ret_val = os_file_read(slot->file, slot->buf, - slot->offset, slot->len); + ret_val = os_file_read( + slot->file, slot->buf, + slot->offset, slot->len); break; default: ut_error; @@ -5503,12 +5581,14 @@ found: iocb = &(slot->control); if (slot->type == OS_FILE_READ) { - io_prep_pread(&slot->control, slot->file, slot->buf, - slot->len, (off_t) slot->offset); + io_prep_pread(&slot->control, slot->file, + slot->buf, slot->len, + (off_t) slot->offset); } else { ut_a(slot->type == OS_FILE_WRITE); - io_prep_pwrite(&slot->control, slot->file, slot->buf, - slot->len, (off_t) slot->offset); + io_prep_pwrite(&slot->control, slot->file, + slot->buf, slot->len, + (off_t) slot->offset); } /* Resubmit an I/O request */ submit_ret = io_submit(array->aio_ctx[segment], 1, &iocb); @@ -5742,7 +5822,6 @@ consecutive_loop: os_aio_slot_t* slot; slot = os_aio_array_get_nth_slot(array, i + segment * n); - if (slot->reserved && slot != aio_slot && slot->offset == aio_slot->offset + aio_slot->len @@ -6296,7 +6375,9 @@ os_file_trim( #ifdef __linux__ #if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) - int ret = fallocate(slot->file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len); + int ret = fallocate(slot->file, + FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, + off, trim_len); if (ret) { /* After first failure do not try to trim again */ diff --git a/storage/xtradb/row/row0log.cc b/storage/xtradb/row/row0log.cc index 3cb909632dc..2cd663fd600 100644 --- a/storage/xtradb/row/row0log.cc +++ b/storage/xtradb/row/row0log.cc @@ -368,9 +368,9 @@ row_log_online_op( goto err_exit; } - ret = os_file_write( + ret = os_file_write_int_fd( "(modification log)", - OS_FILE_FROM_FD(log->fd), + log->fd, log->tail.block, byte_offset, srv_sort_buf_size); log->tail.blocks++; if (!ret) { @@ -484,9 +484,9 @@ row_log_table_close_func( goto err_exit; } - ret = os_file_write( + ret = os_file_write_int_fd( "(modification log)", - OS_FILE_FROM_FD(log->fd), + log->fd, log->tail.block, byte_offset, srv_sort_buf_size); log->tail.blocks++; if (!ret) { @@ -2619,11 +2619,10 @@ all_done: goto func_exit; } - success = os_file_read_no_error_handling( - OS_FILE_FROM_FD(index->online_log->fd), + success = os_file_read_no_error_handling_int_fd( + index->online_log->fd, index->online_log->head.block, ofs, srv_sort_buf_size); - if (!success) { fprintf(stderr, "InnoDB: unable to read temporary file" " for table %s\n", index->table_name); @@ -3446,8 +3445,8 @@ all_done: goto func_exit; } - success = os_file_read_no_error_handling( - OS_FILE_FROM_FD(index->online_log->fd), + success = os_file_read_no_error_handling_int_fd( + index->online_log->fd, index->online_log->head.block, ofs, srv_sort_buf_size); diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc index 57b08801225..32c63afcc6d 100644 --- a/storage/xtradb/row/row0merge.cc +++ b/storage/xtradb/row/row0merge.cc @@ -967,8 +967,8 @@ row_merge_read( } #endif /* UNIV_DEBUG */ - success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf, - ofs, srv_sort_buf_size); + success = os_file_read_no_error_handling_int_fd(fd, buf, + ofs, srv_sort_buf_size); /* For encrypted tables, decrypt data after reading and copy data */ if (crypt_data && crypt_buf) { @@ -1023,7 +1023,7 @@ row_merge_write( mach_write_to_4((byte *)out_buf, 0); } - ret = os_file_write("(merge)", OS_FILE_FROM_FD(fd), out_buf, ofs, buf_len); + ret = os_file_write_int_fd("(merge)", fd, buf, ofs, buf_len); #ifdef UNIV_DEBUG if (row_merge_print_block_write) { @@ -3427,14 +3427,21 @@ row_merge_file_create_low( performance schema */ struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; - register_pfs_file_open_begin(&state, locker, innodb_file_temp_key, - PSI_FILE_OPEN, - "Innodb Merge Temp File", - __FILE__, __LINE__); + locker = PSI_FILE_CALL(get_thread_file_name_locker)( + &state, innodb_file_temp_key, PSI_FILE_OPEN, + "Innodb Merge Temp File", &locker); + if (locker != NULL) { + PSI_FILE_CALL(start_file_open_wait)(locker, + __FILE__, + __LINE__); + } #endif fd = innobase_mysql_tmpfile(path); #ifdef UNIV_PFS_IO - register_pfs_file_open_end(locker, fd); + if (locker != NULL) { + PSI_FILE_CALL(end_file_open_wait_and_bind_to_descriptor)( + locker, fd); + } #endif if (fd < 0) { @@ -3481,15 +3488,20 @@ row_merge_file_destroy_low( #ifdef UNIV_PFS_IO struct PSI_file_locker* locker = NULL; PSI_file_locker_state state; - register_pfs_file_io_begin(&state, locker, - fd, 0, PSI_FILE_CLOSE, - __FILE__, __LINE__); + locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)( + &state, fd, PSI_FILE_CLOSE); + if (locker != NULL) { + PSI_FILE_CALL(start_file_wait)( + locker, 0, __FILE__, __LINE__); + } #endif if (fd >= 0) { close(fd); } #ifdef UNIV_PFS_IO - register_pfs_file_io_end(locker, 0); + if (locker != NULL) { + PSI_FILE_CALL(end_file_wait)(locker, 0); + } #endif } /*********************************************************************//** diff --git a/storage/xtradb/row/row0sel.cc b/storage/xtradb/row/row0sel.cc index 7d4435eba5b..8e3ed3d1a4e 100644 --- a/storage/xtradb/row/row0sel.cc +++ b/storage/xtradb/row/row0sel.cc @@ -66,6 +66,8 @@ Created 12/19/1997 Heikki Tuuri #include "my_compare.h" /* enum icp_result */ +#include <vector> + /* Maximum number of rows to prefetch; MySQL interface has another parameter */ #define SEL_MAX_N_PREFETCH 16 @@ -2715,7 +2717,8 @@ row_sel_field_store_in_mysql_format_func( || !(templ->mysql_col_len % templ->mbmaxlen)); ut_ad(len * templ->mbmaxlen >= templ->mysql_col_len || (field_no == templ->icp_rec_field_no - && field->prefix_len > 0)); + && field->prefix_len > 0) + || templ->rec_field_is_prefix); ut_ad(!(field->prefix_len % templ->mbmaxlen)); if (templ->mbminlen == 1 && templ->mbmaxlen != 1) { @@ -2757,27 +2760,32 @@ row_sel_field_store_in_mysql_format_func( # define row_sel_store_mysql_field(m,p,r,i,o,f,t) \ row_sel_store_mysql_field_func(m,p,r,o,f,t) #endif /* UNIV_DEBUG */ -/**************************************************************//** -Convert a field in the Innobase format to a field in the MySQL format. */ +/** Convert a field in the Innobase format to a field in the MySQL format. +@param[out] mysql_rec record in the MySQL format +@param[in,out] prebuilt prebuilt struct +@param[in] rec InnoDB record; must be protected + by a page latch +@param[in] index index of rec +@param[in] offsets array returned by rec_get_offsets() +@param[in] field_no templ->rec_field_no or + templ->clust_rec_field_no + or templ->icp_rec_field_no + or sec field no if clust_templ_for_sec + is TRUE +@param[in] templ row template +*/ static MY_ATTRIBUTE((warn_unused_result)) ibool row_sel_store_mysql_field_func( -/*===========================*/ - byte* mysql_rec, /*!< out: record in the - MySQL format */ - row_prebuilt_t* prebuilt, /*!< in/out: prebuilt struct */ - const rec_t* rec, /*!< in: InnoDB record; - must be protected by - a page latch */ + byte* mysql_rec, + row_prebuilt_t* prebuilt, + const rec_t* rec, #ifdef UNIV_DEBUG - const dict_index_t* index, /*!< in: index of rec */ + const dict_index_t* index, #endif - const ulint* offsets, /*!< in: array returned by - rec_get_offsets() */ - ulint field_no, /*!< in: templ->rec_field_no or - templ->clust_rec_field_no or - templ->icp_rec_field_no */ - const mysql_row_templ_t*templ) /*!< in: row template */ + const ulint* offsets, + ulint field_no, + const mysql_row_templ_t*templ) { const byte* data; ulint len; @@ -2906,31 +2914,31 @@ row_sel_store_mysql_field_func( return(TRUE); } -/**************************************************************//** -Convert a row in the Innobase format to a row in the MySQL format. +/** Convert a row in the Innobase format to a row in the MySQL format. Note that the template in prebuilt may advise us to copy only a few columns to mysql_rec, other columns are left blank. All columns may not be needed in the query. +@param[out] mysql_rec row in the MySQL format +@param[in] prebuilt prebuilt structure +@param[in] rec Innobase record in the index + which was described in prebuilt's + template, or in the clustered index; + must be protected by a page latch +@param[in] rec_clust TRUE if the rec in the clustered index +@param[in] index index of rec +@param[in] offsets array returned by rec_get_offsets(rec) @return TRUE on success, FALSE if not all columns could be retrieved */ static MY_ATTRIBUTE((warn_unused_result)) ibool row_sel_store_mysql_rec( -/*====================*/ - byte* mysql_rec, /*!< out: row in the MySQL format */ - row_prebuilt_t* prebuilt, /*!< in: prebuilt struct */ - const rec_t* rec, /*!< in: Innobase record in the index - which was described in prebuilt's - template, or in the clustered index; - must be protected by a page latch */ - ibool rec_clust, /*!< in: TRUE if rec is in the - clustered index instead of - prebuilt->index */ - const dict_index_t* index, /*!< in: index of rec */ - const ulint* offsets) /*!< in: array returned by - rec_get_offsets(rec) */ + byte* mysql_rec, + row_prebuilt_t* prebuilt, + const rec_t* rec, + ibool rec_clust, + const dict_index_t* index, + const ulint* offsets) { ulint i; - ut_ad(rec_clust || index == prebuilt->index); ut_ad(!rec_clust || dict_index_is_clust(index)); @@ -2946,12 +2954,14 @@ row_sel_store_mysql_rec( ? templ->clust_rec_field_no : templ->rec_field_no; /* We should never deliver column prefixes to MySQL, - except for evaluating innobase_index_cond(). */ + except for evaluating innobase_index_cond() and if the prefix + index is longer than the actual row data. */ /* ...actually, we do want to do this in order to support the prefix query optimization. ut_ad(dict_index_get_nth_field(index, field_no)->prefix_len - == 0); + == 0 || templ->rec_field_is_prefix); + ...so we disable this assert. */ @@ -3681,7 +3691,7 @@ row_search_for_mysql( trx_t* trx = prebuilt->trx; dict_index_t* clust_index; que_thr_t* thr; - const rec_t* rec; + const rec_t* rec = NULL; const rec_t* result_rec = NULL; const rec_t* clust_rec; dberr_t err = DB_SUCCESS; @@ -3706,7 +3716,7 @@ row_search_for_mysql( ulint* offsets = offsets_; ibool table_lock_waited = FALSE; byte* next_buf = 0; - ibool use_clustered_index = FALSE; + bool use_clustered_index = false; rec_offs_init(offsets_); @@ -3966,7 +3976,8 @@ row_search_for_mysql( if (!row_sel_store_mysql_rec( buf, prebuilt, - rec, FALSE, index, offsets)) { + rec, FALSE, index, + offsets)) { /* Only fresh inserts may contain incomplete externally stored columns. Pretend that such @@ -4248,7 +4259,6 @@ rec_loop: } if (page_rec_is_supremum(rec)) { - if (set_also_gap_locks && !(srv_locks_unsafe_for_binlog || trx->isolation_level <= TRX_ISO_READ_COMMITTED) @@ -4777,17 +4787,17 @@ locks_ok: indexes are shorter than the prefix size This optimization can avoid many IOs for certain schemas. */ - ibool row_contains_all_values = TRUE; - int i; + bool row_contains_all_values = true; + unsigned int i; for (i = 0; i < prebuilt->n_template; i++) { /* Condition (1) from above: is the field in the index (prefix or not)? */ - mysql_row_templ_t* templ = + const mysql_row_templ_t* templ = prebuilt->mysql_template + i; ulint secondary_index_field_no = templ->rec_prefix_field_no; if (secondary_index_field_no == ULINT_UNDEFINED) { - row_contains_all_values = FALSE; + row_contains_all_values = false; break; } /* Condition (2) from above: if this is a @@ -4802,8 +4812,9 @@ locks_ok: index, secondary_index_field_no); ut_a(field->prefix_len > 0); - if (record_size >= field->prefix_len) { - row_contains_all_values = FALSE; + if (record_size >= field->prefix_len + / templ->mbmaxlen) { + row_contains_all_values = false; break; } } @@ -4819,7 +4830,7 @@ locks_ok: templ->rec_prefix_field_no; ut_a(templ->rec_field_no != ULINT_UNDEFINED); } - use_clustered_index = FALSE; + use_clustered_index = false; srv_stats.n_sec_rec_cluster_reads_avoided.inc(); } } diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index b02a3b8f27e..77e18e43ef1 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. Copyright (c) 2013, 2017, MariaDB Corporation. @@ -171,7 +171,8 @@ UNIV_INTERN unsigned long long srv_online_max_size; OS (provided we compiled Innobase with it in), otherwise we will use simulated aio we build below with threads. Currently we support native aio on windows and linux */ -UNIV_INTERN my_bool srv_use_native_aio = TRUE; +/* make srv_use_native_aio to be visible for other plugins */ +my_bool srv_use_native_aio = TRUE; UNIV_INTERN my_bool srv_numa_interleave = FALSE; /* Default compression level if page compression is used and no compression @@ -2584,7 +2585,8 @@ purge_archived_logs( if (dirnamelen + strlen(fileinfo.name) + 2 > OS_FILE_MAX_PATH) continue; - snprintf(archived_log_filename + dirnamelen, OS_FILE_MAX_PATH, + snprintf(archived_log_filename + dirnamelen, + OS_FILE_MAX_PATH - dirnamelen - 1, "%s", fileinfo.name); if (before_no) { @@ -3148,6 +3150,8 @@ DECLARE_THREAD(srv_master_thread)( /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); + srv_slot_t* slot; ulint old_activity_count = srv_get_activity_count(); ulint old_ibuf_merge_activity_count @@ -3221,6 +3225,7 @@ suspend_thread: srv_resume_thread(slot); if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { + my_thread_end(); os_thread_exit(NULL); } @@ -3307,6 +3312,8 @@ DECLARE_THREAD(srv_worker_thread)( void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); + srv_slot_t* slot; ulint tid_i = os_atomic_increment_ulint(&purge_tid_i, 1); @@ -3372,6 +3379,7 @@ DECLARE_THREAD(srv_worker_thread)( os_thread_pf(os_thread_get_curr_id())); #endif /* UNIV_DEBUG_THREAD_CREATION */ + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ os_thread_exit(NULL); @@ -3540,6 +3548,8 @@ DECLARE_THREAD(srv_purge_coordinator_thread)( void* arg MY_ATTRIBUTE((unused))) /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); + srv_slot_t* slot; ulint n_total_purged = ULINT_UNDEFINED; @@ -3653,6 +3663,7 @@ DECLARE_THREAD(srv_purge_coordinator_thread)( srv_release_threads(SRV_WORKER, srv_n_purge_threads - 1); } + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ os_thread_exit(NULL); @@ -3718,3 +3729,19 @@ srv_purge_wakeup() } } } + +/** Check whether given space id is undo tablespace id +@param[in] space_id space id to check +@return true if it is undo tablespace else false. */ +bool +srv_is_undo_tablespace( + ulint space_id) +{ + if (srv_undo_space_id_start == 0) { + return (false); + } + + return(space_id >= srv_undo_space_id_start + && space_id < (srv_undo_space_id_start + + srv_undo_tablespaces_open)); +} diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc index aa51012816d..14c7e05b57c 100644 --- a/storage/xtradb/srv/srv0start.cc +++ b/storage/xtradb/srv/srv0start.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All rights reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2009, Percona Inc. Copyright (c) 2013, 2017, MariaDB Corporation @@ -121,6 +121,9 @@ UNIV_INTERN ibool srv_have_fullfsync = FALSE; /** TRUE if a raw partition is in use */ UNIV_INTERN ibool srv_start_raw_disk_in_use = FALSE; +/** UNDO tablespaces starts with space id. */ +ulint srv_undo_space_id_start; + /** TRUE if the server is being started, before rolling back any incomplete transactions */ UNIV_INTERN ibool srv_startup_is_before_trx_rollback_phase = FALSE; @@ -139,8 +142,8 @@ UNIV_INTERN uint srv_sys_space_size_debug; SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */ UNIV_INTERN enum srv_shutdown_state srv_shutdown_state = SRV_SHUTDOWN_NONE; -/** Files comprising the system tablespace */ -os_file_t files[1000]; +/** Files comprising the system tablespace. Also used by Mariabackup. */ +UNIV_INTERN pfs_os_file_t files[1000]; /** io_handler_thread parameters for thread identification */ static ulint n[SRV_MAX_N_IO_THREADS]; @@ -584,7 +587,7 @@ static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t create_log_file( /*============*/ - os_file_t* file, /*!< out: file handle */ + pfs_os_file_t* file, /*!< out: file handle */ const char* name) /*!< in: log file name */ { ibool ret; @@ -802,7 +805,7 @@ static MY_ATTRIBUTE((nonnull, warn_unused_result)) dberr_t open_log_file( /*==========*/ - os_file_t* file, /*!< out: file handle */ + pfs_os_file_t* file, /*!< out: file handle */ const char* name, /*!< in: log file name */ os_offset_t* size) /*!< out: file size */ { @@ -919,7 +922,7 @@ open_or_create_data_files( && os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS #ifdef UNIV_AIX - /* AIX 5.1 after security patch ML7 may have + /* AIX 5.1 after security patch ML7 may have errno set to 0 here, which causes our function to return 100; work around that AIX problem */ @@ -1221,7 +1224,7 @@ srv_undo_tablespace_create( const char* name, /*!< in: tablespace name */ ulint size) /*!< in: tablespace size in pages */ { - os_file_t fh; + pfs_os_file_t fh; ibool ret; dberr_t err = DB_SUCCESS; @@ -1299,7 +1302,7 @@ srv_undo_tablespace_open( const char* name, /*!< in: tablespace name */ ulint space) /*!< in: tablespace id */ { - os_file_t fh; + pfs_os_file_t fh; dberr_t err = DB_ERROR; ibool ret; ulint flags; @@ -1404,13 +1407,23 @@ srv_undo_tablespaces_init( for (i = 0; create_new_db && i < n_conf_tablespaces; ++i) { char name[OS_FILE_MAX_PATH]; + ulint space_id = i + 1; + + DBUG_EXECUTE_IF("innodb_undo_upgrade", + space_id = i + 3;); ut_snprintf( name, sizeof(name), "%s%cundo%03lu", - srv_undo_dir, SRV_PATH_SEPARATOR, i + 1); + srv_undo_dir, SRV_PATH_SEPARATOR, space_id); + + if (i == 0) { + srv_undo_space_id_start = space_id; + prev_space_id = srv_undo_space_id_start - 1; + } + + undo_tablespace_ids[i] = space_id; - /* Undo space ids start from 1. */ err = srv_undo_tablespace_create( name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES); @@ -1432,14 +1445,16 @@ srv_undo_tablespaces_init( if (!create_new_db && !backup_mode) { n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces( undo_tablespace_ids); - } else { - n_undo_tablespaces = n_conf_tablespaces; - for (i = 1; i <= n_undo_tablespaces; ++i) { - undo_tablespace_ids[i - 1] = i; + if (n_undo_tablespaces != 0) { + srv_undo_space_id_start = undo_tablespace_ids[0]; + prev_space_id = srv_undo_space_id_start - 1; } - undo_tablespace_ids[i] = ULINT_UNDEFINED; + } else { + n_undo_tablespaces = n_conf_tablespaces; + + undo_tablespace_ids[n_conf_tablespaces] = ULINT_UNDEFINED; } /* Open all the undo tablespaces that are currently in use. If we @@ -1463,8 +1478,6 @@ srv_undo_tablespaces_init( ut_a(undo_tablespace_ids[i] != 0); ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED); - /* Undo space ids start from 1. */ - err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]); if (err != DB_SUCCESS) { @@ -1499,11 +1512,23 @@ srv_undo_tablespaces_init( break; } + /** Note the first undo tablespace id in case of + no active undo tablespace. */ + if (n_undo_tablespaces == 0) { + srv_undo_space_id_start = i; + } + ++n_undo_tablespaces; ++*n_opened; } + /** Explictly specify the srv_undo_space_id_start + as zero when there are no undo tablespaces. */ + if (n_undo_tablespaces == 0) { + srv_undo_space_id_start = 0; + } + /* If the user says that there are fewer than what we find we tolerate that discrepancy but not the inverse. Because there could be unused undo tablespaces for future use. */ @@ -1548,10 +1573,11 @@ srv_undo_tablespaces_init( mtr_start(&mtr); /* The undo log tablespace */ - for (i = 1; i <= n_undo_tablespaces; ++i) { + for (i = 0; i < n_undo_tablespaces; ++i) { fsp_header_init( - i, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr); + undo_tablespace_ids[i], + SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr); } mtr_commit(&mtr); @@ -1665,6 +1691,10 @@ innobase_start_or_create_for_mysql(void) /* This should be initialized early */ ut_init_timer(); + if (srv_force_recovery == SRV_FORCE_NO_LOG_REDO) { + srv_read_only_mode = 1; + } + high_level_read_only = srv_read_only_mode || srv_force_recovery > SRV_FORCE_NO_TRX_UNDO; diff --git a/storage/xtradb/trx/trx0purge.cc b/storage/xtradb/trx/trx0purge.cc index 7d35bb12093..90f22e7ef82 100644 --- a/storage/xtradb/trx/trx0purge.cc +++ b/storage/xtradb/trx/trx0purge.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2017, MariaDB Corporation. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under @@ -685,7 +685,8 @@ trx_purge_get_rseg_with_min_trx_id( /* We assume in purge of externally stored fields that space id is in the range of UNDO tablespace space ids */ - ut_a(purge_sys->rseg->space <= srv_undo_tablespaces_open); + ut_a(purge_sys->rseg->space == 0 + || srv_is_undo_tablespace(purge_sys->rseg->space)); zip_size = purge_sys->rseg->zip_size; diff --git a/storage/xtradb/trx/trx0roll.cc b/storage/xtradb/trx/trx0roll.cc index d228743d300..335ef8859c4 100644 --- a/storage/xtradb/trx/trx0roll.cc +++ b/storage/xtradb/trx/trx0roll.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2016, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under @@ -818,6 +818,7 @@ DECLARE_THREAD(trx_rollback_or_clean_all_recovered)( /*!< in: a dummy parameter required by os_thread_create */ { + my_thread_init(); ut_ad(!srv_read_only_mode); #ifdef UNIV_PFS_THREAD @@ -828,6 +829,7 @@ DECLARE_THREAD(trx_rollback_or_clean_all_recovered)( trx_rollback_or_clean_is_active = false; + my_thread_end(); /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ diff --git a/storage/xtradb/trx/trx0sys.cc b/storage/xtradb/trx/trx0sys.cc index 558fe8a2c49..9accb4ef303 100644 --- a/storage/xtradb/trx/trx0sys.cc +++ b/storage/xtradb/trx/trx0sys.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2017, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1064,18 +1064,12 @@ trx_sys_create_rsegs( ulint new_rsegs = n_rsegs - n_used; for (i = 0; i < new_rsegs; ++i) { - ulint space; + ulint space_id; + space_id = (n_spaces == 0) ? 0 + : (srv_undo_space_id_start + i % n_spaces); - /* Tablespace 0 is the system tablespace. All UNDO - log tablespaces start from 1. */ - - if (n_spaces > 0) { - space = (i % n_spaces) + 1; - } else { - space = 0; /* System tablespace */ - } - - if (trx_rseg_create(space) != NULL) { + /* Tablespace 0 is the system tablespace. */ + if (trx_rseg_create(space_id) != NULL) { ++n_used; } else { break; |