diff options
author | Nirbhay Choubey <nirbhay@mariadb.com> | 2015-12-19 14:24:38 -0500 |
---|---|---|
committer | Nirbhay Choubey <nirbhay@mariadb.com> | 2015-12-19 14:24:38 -0500 |
commit | dad555a09c8d590132c77c192a18d7fc1f8fe91e (patch) | |
tree | 38fb545e5df0a24333b8284c816f5bea95d19a03 /storage/xtradb | |
parent | 18173ddfc4081407832d9a6703d1b8356b7defe9 (diff) | |
parent | 90ea0145856338221803ebb9b446ed2a6e082412 (diff) | |
download | mariadb-git-dad555a09c8d590132c77c192a18d7fc1f8fe91e.tar.gz |
Merge tag 'mariadb-10.0.23' into 10.0-galera
Diffstat (limited to 'storage/xtradb')
37 files changed, 981 insertions, 489 deletions
diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt index f5ac3a483f3..a01fa198068 100644 --- a/storage/xtradb/CMakeLists.txt +++ b/storage/xtradb/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -49,6 +49,9 @@ IF(UNIX) LINK_LIBRARIES(${AIO_LIBRARY}) ENDIF() ADD_DEFINITIONS("-DUNIV_LINUX -D_GNU_SOURCE=1") + IF(HAVE_LIBNUMA) + LINK_LIBRARIES(numa) + ENDIF() ELSEIF(CMAKE_SYSTEM_NAME MATCHES "HP*") ADD_DEFINITIONS("-DUNIV_HPUX") ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "AIX") @@ -170,6 +173,18 @@ IF(NOT MSVC) }" HAVE_IB_GCC_ATOMIC_THREAD_FENCE ) + CHECK_C_SOURCE( + "#include<stdint.h> + int main() + { + unsigned char c; + + __atomic_test_and_set(&c, __ATOMIC_ACQUIRE); + __atomic_clear(&c, __ATOMIC_RELEASE); + return(0); + }" + HAVE_IB_GCC_ATOMIC_TEST_AND_SET + ) IF(HAVE_IB_GCC_ATOMIC_BUILTINS) ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS=1) @@ -192,6 +207,10 @@ IF(HAVE_IB_GCC_ATOMIC_THREAD_FENCE) ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_THREAD_FENCE=1) ENDIF() +IF(HAVE_IB_GCC_ATOMIC_TEST_AND_SET) + ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_TEST_AND_SET=1) +ENDIF() + # either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not CHECK_C_SOURCE( " diff --git a/storage/xtradb/btr/btr0cur.cc b/storage/xtradb/btr/btr0cur.cc index 04e5797602c..d0f65c42974 100644 --- a/storage/xtradb/btr/btr0cur.cc +++ b/storage/xtradb/btr/btr0cur.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2012, Facebook Inc. @@ -284,8 +284,13 @@ btr_cur_latch_leaves( #ifdef UNIV_BTR_DEBUG ut_a(page_is_comp(get_block->frame) == page_is_comp(page)); - ut_a(btr_page_get_next(get_block->frame, mtr) - == page_get_page_no(page)); + + /* For fake_change mode we avoid a detailed validation + as it operate in tweaked format where-in validation + may fail. */ + ut_a(sibling_mode == RW_NO_LATCH + || btr_page_get_next(get_block->frame, mtr) + == page_get_page_no(page)); #endif /* UNIV_BTR_DEBUG */ if (sibling_mode == RW_NO_LATCH) { /* btr_block_get() called with RW_NO_LATCH will @@ -1383,9 +1388,6 @@ btr_cur_optimistic_insert( } #endif /* UNIV_DEBUG */ - ut_ad((thr && thr_get_trx(thr)->fake_changes) - || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - leaf = page_is_leaf(page); /* Calculate the record size when entry is converted to a record */ @@ -2265,6 +2267,7 @@ btr_cur_optimistic_update( ulint max_size; ulint new_rec_size; ulint old_rec_size; + ulint max_ins_size = 0; dtuple_t* new_entry; roll_ptr_t roll_ptr; ulint i; @@ -2394,6 +2397,10 @@ any_extern: : (old_rec_size + page_get_max_insert_size_after_reorganize(page, 1)); + if (!page_zip) { + max_ins_size = page_get_max_insert_size_after_reorganize(page, 1); + } + if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT) && (max_size >= new_rec_size)) || (page_get_n_recs(page) <= 1))) { @@ -2459,12 +2466,15 @@ any_extern: ut_ad(err == DB_SUCCESS); func_exit: - if (page_zip - && !(flags & BTR_KEEP_IBUF_BITMAP) + if (!(flags & BTR_KEEP_IBUF_BITMAP) && !dict_index_is_clust(index) && page_is_leaf(page)) { - /* Update the free bits in the insert buffer. */ - ibuf_update_free_bits_zip(block, mtr); + + if (page_zip) { + ibuf_update_free_bits_zip(block, mtr); + } else { + ibuf_update_free_bits_low(block, max_ins_size, mtr); + } } return(err); @@ -2600,6 +2610,7 @@ btr_cur_pessimistic_update( ulint n_reserved = 0; ulint n_ext; trx_t* trx; + ulint max_ins_size = 0; *offsets = NULL; *big_rec = NULL; @@ -2800,6 +2811,10 @@ make_external: } } + if (!page_zip) { + max_ins_size = page_get_max_insert_size_after_reorganize(page, 1); + } + /* Store state of explicit locks on rec on the page infimum record, before deleting rec. The page infimum acts as a dummy carrier of the locks, taking care also of lock releases, before we can move the locks @@ -2845,13 +2860,18 @@ make_external: rec_offs_make_valid( page_cursor->rec, index, *offsets); } - } else if (page_zip && - !dict_index_is_clust(index) + } else if (!dict_index_is_clust(index) && page_is_leaf(page)) { + /* Update the free bits in the insert buffer. This is the same block which was skipped by BTR_KEEP_IBUF_BITMAP. */ - ibuf_update_free_bits_zip(block, mtr); + if (page_zip) { + ibuf_update_free_bits_zip(block, mtr); + } else { + ibuf_update_free_bits_low(block, max_ins_size, + mtr); + } } err = DB_SUCCESS; diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc index 895957dbe10..de342c74374 100644 --- a/storage/xtradb/buf/buf0buf.cc +++ b/storage/xtradb/buf/buf0buf.cc @@ -53,6 +53,10 @@ Created 11/5/1995 Heikki Tuuri #include "page0zip.h" #include "srv0mon.h" #include "buf0checksum.h" +#ifdef HAVE_LIBNUMA +#include <numa.h> +#include <numaif.h> +#endif // HAVE_LIBNUMA #include "trx0trx.h" #include "srv0start.h" @@ -1167,8 +1171,7 @@ buf_chunk_init( /*===========*/ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ buf_chunk_t* chunk, /*!< out: chunk of buffers */ - ulint mem_size, /*!< in: requested size in bytes */ - ibool populate) /*!< in: virtual page preallocation */ + ulint mem_size) /*!< in: requested size in bytes */ { buf_block_t* block; byte* frame; @@ -1184,13 +1187,29 @@ buf_chunk_init( + (UNIV_PAGE_SIZE - 1), UNIV_PAGE_SIZE); chunk->mem_size = mem_size; - chunk->mem = os_mem_alloc_large(&chunk->mem_size, populate); + chunk->mem = os_mem_alloc_large(&chunk->mem_size); if (UNIV_UNLIKELY(chunk->mem == NULL)) { return(NULL); } +#ifdef HAVE_LIBNUMA + if (srv_numa_interleave) { + int st = mbind(chunk->mem, chunk->mem_size, + MPOL_INTERLEAVE, + numa_all_nodes_ptr->maskp, + numa_all_nodes_ptr->size, + MPOL_MF_MOVE); + if (st != 0) { + ib_logf(IB_LOG_LEVEL_WARN, + "Failed to set NUMA memory policy of buffer" + " pool page frames to MPOL_INTERLEAVE" + " (error: %s).", strerror(errno)); + } + } +#endif // HAVE_LIBNUMA + /* Allocate the block descriptors from the start of the memory block. */ chunk->blocks = (buf_block_t*) chunk->mem; @@ -1387,7 +1406,6 @@ buf_pool_init_instance( /*===================*/ buf_pool_t* buf_pool, /*!< in: buffer pool instance */ ulint buf_pool_size, /*!< in: size in bytes */ - ibool populate, /*!< in: virtual page preallocation */ ulint instance_no) /*!< in: id of the instance */ { ulint i; @@ -1416,7 +1434,7 @@ buf_pool_init_instance( UT_LIST_INIT(buf_pool->free); - if (!buf_chunk_init(buf_pool, chunk, buf_pool_size, populate)) { + if (!buf_chunk_init(buf_pool, chunk, buf_pool_size)) { mem_free(chunk); mem_free(buf_pool); @@ -1522,7 +1540,6 @@ dberr_t buf_pool_init( /*==========*/ ulint total_size, /*!< in: size of the total pool in bytes */ - ibool populate, /*!< in: virtual page preallocation */ ulint n_instances) /*!< in: number of instances */ { ulint i; @@ -1532,13 +1549,28 @@ buf_pool_init( ut_ad(n_instances <= MAX_BUFFER_POOLS); ut_ad(n_instances == srv_buf_pool_instances); +#ifdef HAVE_LIBNUMA + if (srv_numa_interleave) { + ib_logf(IB_LOG_LEVEL_INFO, + "Setting NUMA memory policy to MPOL_INTERLEAVE"); + if (set_mempolicy(MPOL_INTERLEAVE, + numa_all_nodes_ptr->maskp, + numa_all_nodes_ptr->size) != 0) { + ib_logf(IB_LOG_LEVEL_WARN, + "Failed to set NUMA memory policy to" + " MPOL_INTERLEAVE (error: %s).", + strerror(errno)); + } + } +#endif // HAVE_LIBNUMA + buf_pool_ptr = (buf_pool_t*) mem_zalloc( n_instances * sizeof *buf_pool_ptr); for (i = 0; i < n_instances; i++) { buf_pool_t* ptr = &buf_pool_ptr[i]; - if (buf_pool_init_instance(ptr, size, populate, i) != DB_SUCCESS) { + if (buf_pool_init_instance(ptr, size, i) != DB_SUCCESS) { /* Free all the instances created so far. */ buf_pool_free(i); @@ -1552,6 +1584,18 @@ buf_pool_init( btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64); +#ifdef HAVE_LIBNUMA + if (srv_numa_interleave) { + ib_logf(IB_LOG_LEVEL_INFO, + "Setting NUMA memory policy to MPOL_DEFAULT"); + if (set_mempolicy(MPOL_DEFAULT, NULL, 0) != 0) { + ib_logf(IB_LOG_LEVEL_WARN, + "Failed to set NUMA memory policy to" + " MPOL_DEFAULT (error: %s).", strerror(errno)); + } + } +#endif // HAVE_LIBNUMA + return(DB_SUCCESS); } diff --git a/storage/xtradb/buf/buf0dump.cc b/storage/xtradb/buf/buf0dump.cc index 090e8cac63b..2b3b506a457 100644 --- a/storage/xtradb/buf/buf0dump.cc +++ b/storage/xtradb/buf/buf0dump.cc @@ -230,6 +230,16 @@ buf_dump( continue; } + if (srv_buf_pool_dump_pct != 100) { + ut_ad(srv_buf_pool_dump_pct < 100); + + n_pages = n_pages * srv_buf_pool_dump_pct / 100; + + if (n_pages == 0) { + n_pages = 1; + } + } + dump = static_cast<buf_dump_t*>( ut_malloc(n_pages * sizeof(*dump))) ; @@ -244,9 +254,9 @@ buf_dump( return; } - for (bpage = UT_LIST_GET_LAST(buf_pool->LRU), j = 0; - bpage != NULL; - bpage = UT_LIST_GET_PREV(LRU, bpage), j++) { + for (bpage = UT_LIST_GET_FIRST(buf_pool->LRU), j = 0; + bpage != NULL && j < n_pages; + bpage = UT_LIST_GET_NEXT(LRU, bpage), j++) { ut_a(buf_page_in_file(bpage)); @@ -361,6 +371,72 @@ buf_dump_sort( } /*****************************************************************//** +Artificially delay the buffer pool loading if necessary. The idea of +this function is to prevent hogging the server with IO and slowing down +too much normal client queries. */ +UNIV_INLINE +void +buf_load_throttle_if_needed( +/*========================*/ + ulint* last_check_time, /*!< in/out: miliseconds since epoch + of the last time we did check if + throttling is needed, we do the check + every srv_io_capacity IO ops. */ + ulint* last_activity_count, + ulint n_io) /*!< in: number of IO ops done since + buffer pool load has started */ +{ + if (n_io % srv_io_capacity < srv_io_capacity - 1) { + return; + } + + if (*last_check_time == 0 || *last_activity_count == 0) { + *last_check_time = ut_time_ms(); + *last_activity_count = srv_get_activity_count(); + return; + } + + /* srv_io_capacity IO operations have been performed by buffer pool + load since the last time we were here. */ + + /* If no other activity, then keep going without any delay. */ + if (srv_get_activity_count() == *last_activity_count) { + return; + } + + /* There has been other activity, throttle. */ + + ulint now = ut_time_ms(); + ulint elapsed_time = now - *last_check_time; + + /* Notice that elapsed_time is not the time for the last + srv_io_capacity IO operations performed by BP load. It is the + time elapsed since the last time we detected that there has been + other activity. This has a small and acceptable deficiency, e.g.: + 1. BP load runs and there is no other activity. + 2. Other activity occurs, we run N IO operations after that and + enter here (where 0 <= N < srv_io_capacity). + 3. last_check_time is very old and we do not sleep at this time, but + only update last_check_time and last_activity_count. + 4. We run srv_io_capacity more IO operations and call this function + again. + 5. There has been more other activity and thus we enter here. + 6. Now last_check_time is recent and we sleep if necessary to prevent + more than srv_io_capacity IO operations per second. + The deficiency is that we could have slept at 3., but for this we + would have to update last_check_time before the + "cur_activity_count == *last_activity_count" check and calling + ut_time_ms() that often may turn out to be too expensive. */ + + if (elapsed_time < 1000 /* 1 sec (1000 mili secs) */) { + os_thread_sleep((1000 - elapsed_time) * 1000 /* micro secs */); + } + + *last_check_time = ut_time_ms(); + *last_activity_count = srv_get_activity_count(); +} + +/*****************************************************************//** Perform a buffer pool load from the file specified by innodb_buffer_pool_filename. If any errors occur then the value of innodb_buffer_pool_load_status will be set accordingly, see buf_load_status(). @@ -521,6 +597,9 @@ buf_load() ut_free(dump_tmp); + ulint last_check_time = 0; + ulint last_activity_cnt = 0; + for (i = 0; i < dump_n && !SHUTTING_DOWN(); i++) { buf_read_page_async(BUF_DUMP_SPACE(dump[i]), @@ -544,6 +623,9 @@ buf_load() "Buffer pool(s) load aborted on request"); return; } + + buf_load_throttle_if_needed( + &last_check_time, &last_activity_cnt, i); } ut_free(dump); diff --git a/storage/xtradb/dict/dict0crea.cc b/storage/xtradb/dict/dict0crea.cc index 1947786c8e3..37ebdbae54a 100644 --- a/storage/xtradb/dict/dict0crea.cc +++ b/storage/xtradb/dict/dict0crea.cc @@ -1183,7 +1183,7 @@ dict_create_index_step( >= UNIV_FORMAT_B); node->index = dict_index_get_if_in_cache_low(index_id); - ut_a(!node->index == (err != DB_SUCCESS)); + ut_a((node->index == 0) == (err != DB_SUCCESS)); if (err != DB_SUCCESS) { diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc index c9180d8285c..a6b107563a8 100644 --- a/storage/xtradb/dict/dict0dict.cc +++ b/storage/xtradb/dict/dict0dict.cc @@ -2,6 +2,7 @@ Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. +Copyright (c) 2014, 2015, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -28,6 +29,7 @@ Created 1/8/1996 Heikki Tuuri #include "fts0fts.h" #include "fil0fil.h" #include <algorithm> +#include <string> #ifdef UNIV_NONINL #include "dict0dict.ic" @@ -212,14 +214,6 @@ dict_index_remove_from_cache_low( dict_index_t* index, /*!< in, own: index */ ibool lru_evict); /*!< in: TRUE if page being evicted to make room in the table LRU list */ -/**********************************************************************//** -Removes a table object from the dictionary cache. */ -static -void -dict_table_remove_from_cache_low( -/*=============================*/ - dict_table_t* table, /*!< in, own: table */ - ibool lru_evict); /*!< in: TRUE if evicting from LRU */ #ifdef UNIV_DEBUG /**********************************************************************//** Validate the dictionary table LRU list. @@ -787,6 +781,45 @@ dict_table_get_all_fts_indexes( return(ib_vector_size(indexes)); } +/** Store autoinc value when the table is evicted. +@param[in] table table evicted */ +UNIV_INTERN +void +dict_table_autoinc_store( + const dict_table_t* table) +{ + ut_ad(mutex_own(&dict_sys->mutex)); + + if (table->autoinc != 0) { + ut_ad(dict_sys->autoinc_map->find(table->id) + == dict_sys->autoinc_map->end()); + + dict_sys->autoinc_map->insert( + std::pair<table_id_t, ib_uint64_t>( + table->id, table->autoinc)); + } +} + +/** Restore autoinc value when the table is loaded. +@param[in] table table loaded */ +UNIV_INTERN +void +dict_table_autoinc_restore( + dict_table_t* table) +{ + ut_ad(mutex_own(&dict_sys->mutex)); + + autoinc_map_t::iterator it; + it = dict_sys->autoinc_map->find(table->id); + + if (it != dict_sys->autoinc_map->end()) { + table->autoinc = it->second; + ut_ad(table->autoinc != 0); + + dict_sys->autoinc_map->erase(it); + } +} + /********************************************************************//** Reads the next autoinc value (== autoinc counter value), 0 if not yet initialized. @@ -1080,6 +1113,8 @@ dict_init(void) mutex_create(dict_foreign_err_mutex_key, &dict_foreign_err_mutex, SYNC_NO_ORDER_CHECK); } + + dict_sys->autoinc_map = new autoinc_map_t(); } /**********************************************************************//** @@ -1327,6 +1362,8 @@ dict_table_add_to_cache( UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_non_LRU, table); } + dict_table_autoinc_restore(table); + ut_ad(dict_lru_validate()); dict_sys->size += mem_heap_get_size(table->heap) @@ -2016,7 +2053,6 @@ dict_table_change_id_in_cache( /**********************************************************************//** Removes a table object from the dictionary cache. */ -static void dict_table_remove_from_cache_low( /*=============================*/ @@ -2078,6 +2114,10 @@ dict_table_remove_from_cache_low( ut_ad(dict_lru_validate()); + if (lru_evict) { + dict_table_autoinc_store(table); + } + if (lru_evict && table->drop_aborted) { /* Do as dict_table_try_drop_aborted() does. */ @@ -2773,7 +2813,7 @@ dict_index_find_cols( dict_field_t* field = dict_index_get_nth_field(index, i); for (j = 0; j < table->n_cols; j++) { - if (!strcmp(dict_table_get_col_name(table, j), + if (!innobase_strcasecmp(dict_table_get_col_name(table, j), field->name)) { field->col = dict_table_get_nth_col(table, j); @@ -3448,11 +3488,13 @@ dict_foreign_error_report( dict_foreign_t* fk, /*!< in: foreign key constraint */ const char* msg) /*!< in: the error message */ { + std::string fk_str; mutex_enter(&dict_foreign_err_mutex); dict_foreign_error_report_low(file, fk->foreign_table_name); fputs(msg, file); fputs(" Constraint:\n", file); - dict_print_info_on_foreign_key_in_create_format(file, NULL, fk, TRUE); + fk_str = dict_print_info_on_foreign_key_in_create_format(NULL, fk, TRUE); + fputs(fk_str.c_str(), file); putc('\n', file); if (fk->foreign_index) { fputs("The index in the foreign key in table is ", file); @@ -4273,13 +4315,13 @@ dict_foreign_push_index_error( "%s table '%s' with foreign key constraint" " failed. There is no index in the referenced" " table where the referenced columns appear" - " as the first columns. Error close to %s.\n", + " as the first columns near '%s'.\n", operation, create_name, latest_foreign); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table '%s' with foreign key constraint" " failed. There is no index in the referenced" " table where the referenced columns appear" - " as the first columns. Error close to %s.", + " as the first columns near '%s'.", operation, create_name, latest_foreign); break; } @@ -4288,13 +4330,13 @@ dict_foreign_push_index_error( "%s table '%s' with foreign key constraint" " failed. There is only prefix index in the referenced" " table where the referenced columns appear" - " as the first columns. Error close to %s.\n", + " as the first columns near '%s'.\n", operation, create_name, latest_foreign); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table '%s' with foreign key constraint" " failed. There is only prefix index in the referenced" " table where the referenced columns appear" - " as the first columns. Error close to %s.", + " as the first columns near '%s'.", operation, create_name, latest_foreign); break; } @@ -4302,12 +4344,12 @@ dict_foreign_push_index_error( fprintf(ef, "%s table %s with foreign key constraint" " failed. You have defined a SET NULL condition but " - "field %s on index is defined as NOT NULL close to %s\n", + "column '%s' on index is defined as NOT NULL near '%s'.\n", operation, create_name, columns[err_col], latest_foreign); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table %s with foreign key constraint" " failed. You have defined a SET NULL condition but " - "field %s on index is defined as NOT NULL close to %s", + "column '%s' on index is defined as NOT NULL near '%s'.", operation, create_name, columns[err_col], latest_foreign); break; } @@ -4320,13 +4362,13 @@ dict_foreign_push_index_error( table, dict_col_get_no(field->col)); fprintf(ef, "%s table %s with foreign key constraint" - " failed. Field type or character set for column %s " - "does not mach referenced column %s close to %s\n", + " failed. Field type or character set for column '%s' " + "does not mach referenced column '%s' near '%s'.\n", operation, create_name, columns[err_col], col_name, latest_foreign); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table %s with foreign key constraint" - " failed. Field type or character set for column %s " - "does not mach referenced column %s close to %s", + " failed. Field type or character set for column '%s' " + "does not mach referenced column '%s' near '%s'.", operation, create_name, columns[err_col], col_name, latest_foreign); break; } @@ -4660,14 +4702,14 @@ loop: if (!success) { dict_foreign_report_syntax_err( "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.\n", + " failed. Parse error in '%s'" + " near '%s'.\n", operation, create_name, start_of_latest_foreign, orig); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.", + " failed. Parse error in '%s'" + " near '%s'.", operation, create_name, start_of_latest_foreign, orig); return(DB_CANNOT_ADD_CONSTRAINT); } @@ -4704,16 +4746,16 @@ col_loop1: dict_foreign_error_report_low(ef, create_name); fprintf(ef, "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.\n", + " failed. Parse error in '%s'" + " near '%s'.\n", operation, create_name, start_of_latest_foreign, orig); mutex_exit(&dict_foreign_err_mutex); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.", + " failed. Parse error in '%s'" + " near '%s'.", operation, create_name, start_of_latest_foreign, orig); return(DB_CANNOT_ADD_CONSTRAINT); @@ -4733,14 +4775,14 @@ col_loop1: if (!success) { dict_foreign_report_syntax_err( "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.\n", + " failed. Parse error in '%s'" + " near '%s'.\n", operation, create_name, start_of_latest_foreign, orig); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.", + " failed. Parse error in '%s'" + " near '%s'.", operation, create_name, start_of_latest_foreign, orig); return(DB_CANNOT_ADD_CONSTRAINT); @@ -4779,14 +4821,14 @@ col_loop1: if (!success || !my_isspace(cs, *ptr)) { dict_foreign_report_syntax_err( "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.\n", + " failed. Parse error in '%s'" + " near '%s'.\n", operation, create_name, start_of_latest_foreign, orig); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.", + " failed. Parse error in '%s'" + " near '%s'.", operation, create_name, start_of_latest_foreign, orig); return(DB_CANNOT_ADD_CONSTRAINT); } @@ -4866,13 +4908,13 @@ col_loop1: ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table %s with foreign key constraint failed. Referenced table %s not found in the data dictionary " - "close to %s.", + "near '%s'.", operation, create_name, buf, start_of_latest_foreign); mutex_enter(&dict_foreign_err_mutex); dict_foreign_error_report_low(ef, create_name); fprintf(ef, "%s table %s with foreign key constraint failed. Referenced table %s not found in the data dictionary " - "close to %s.\n", + "near '%s'.\n", operation, create_name, buf, start_of_latest_foreign); mutex_exit(&dict_foreign_err_mutex); @@ -4886,14 +4928,14 @@ col_loop1: if (!success) { dict_foreign_report_syntax_err( "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.\n", + " failed. Parse error in '%s'" + " near '%s'.\n", operation, create_name, start_of_latest_foreign, orig); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.", + " failed. Parse error in '%s'" + " near '%s'.", operation, create_name, start_of_latest_foreign, orig); return(DB_CANNOT_ADD_CONSTRAINT); @@ -4914,14 +4956,14 @@ col_loop2: dict_foreign_error_report_low(ef, create_name); fprintf(ef, "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.\n", + " failed. Parse error in '%s'" + " near '%s'.\n", operation, create_name, start_of_latest_foreign, orig); mutex_exit(&dict_foreign_err_mutex); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.", + " failed. Parse error in '%s'" + " near '%s'.", operation, create_name, start_of_latest_foreign, orig); return(DB_CANNOT_ADD_CONSTRAINT); @@ -4941,14 +4983,12 @@ col_loop2: dict_foreign_report_syntax_err( "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s. Too few referenced columns.\n", + " failed. Parse error in '%s' near '%s'. Referencing column count does not match referenced column count.\n", operation, create_name, start_of_latest_foreign, orig); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s. Too few referenced columns, you have %d when you should have %d.", + " failed. Parse error in '%s' near '%s'. Referencing column count %d does not match referenced column count %d.\n", operation, create_name, start_of_latest_foreign, orig, i, foreign->n_fields); return(DB_CANNOT_ADD_CONSTRAINT); @@ -4979,14 +5019,14 @@ scan_on_conditions: dict_foreign_report_syntax_err( "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.\n", + " failed. Parse error in '%s'" + " near '%s'.\n", operation, create_name, start_of_latest_foreign, start_of_latest_set); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.", + " failed. Parse error in '%s'" + " near '%s'.", operation, create_name, start_of_latest_foreign, start_of_latest_set); return(DB_CANNOT_ADD_CONSTRAINT); @@ -5029,14 +5069,14 @@ scan_on_conditions: if (!success) { dict_foreign_report_syntax_err( "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.\n", + " failed. Parse error in '%s'" + " near '%s'.\n", operation, create_name, start_of_latest_foreign, start_of_latest_set); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.", + " failed. Parse error in '%s'" + " near '%s'.", operation, create_name, start_of_latest_foreign, start_of_latest_set); return(DB_CANNOT_ADD_CONSTRAINT); @@ -5057,14 +5097,14 @@ scan_on_conditions: if (!success) { dict_foreign_report_syntax_err( "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.\n", + " failed. Parse error in '%s'" + " near '%s'.\n", operation, create_name, start_of_latest_foreign, start_of_latest_set); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.", + " failed. Parse error in '%s'" + " near '%s'.", operation, create_name, start_of_latest_foreign, start_of_latest_set); return(DB_CANNOT_ADD_CONSTRAINT); } @@ -5075,14 +5115,14 @@ scan_on_conditions: if (!success) { dict_foreign_report_syntax_err( "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.\n", + " failed. Parse error in '%s'" + " near '%s'.\n", operation, create_name, start_of_latest_foreign, start_of_latest_set); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table %s with foreign key constraint" - " failed. Foreign key constraint parse error in %s" - " close to %s.", + " failed. Parse error in '%s'" + " near '%s'.", operation, create_name, start_of_latest_foreign, start_of_latest_set); return(DB_CANNOT_ADD_CONSTRAINT); @@ -5103,15 +5143,15 @@ scan_on_conditions: dict_foreign_error_report_low(ef, create_name); fprintf(ef, "%s table %s with foreign key constraint" - " failed. You have defined a SET NULL condition but column %s is defined as NOT NULL" - " in %s close to %s.\n", + " failed. You have defined a SET NULL condition but column '%s' is defined as NOT NULL" + " in '%s' near '%s'.\n", operation, create_name, col_name, start_of_latest_foreign, start_of_latest_set); mutex_exit(&dict_foreign_err_mutex); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table %s with foreign key constraint" - " failed. You have defined a SET NULL condition but column %s is defined as NOT NULL" - " in %s close to %s.", + " failed. You have defined a SET NULL condition but column '%s' is defined as NOT NULL" + " in '%s' near '%s'.", operation, create_name, col_name, start_of_latest_foreign, start_of_latest_set); return(DB_CANNOT_ADD_CONSTRAINT); @@ -5135,13 +5175,13 @@ try_find_index: fprintf(ef, "%s table %s with foreign key constraint" " failed. You have more than one on delete or on update clause" - " in %s close to %s.\n", + " in '%s' near '%s'.\n", operation, create_name, start_of_latest_foreign, start_of_latest_set); ib_push_warning(trx, DB_CANNOT_ADD_CONSTRAINT, "%s table %s with foreign key constraint" " failed. You have more than one on delete or on update clause" - " in %s close to %s.", + " in '%s' near '%s'.", operation, create_name, start_of_latest_foreign, start_of_latest_set); dict_foreign_free(foreign); @@ -5839,16 +5879,16 @@ dict_field_print_low( Outputs info on a foreign key of a table in a format suitable for CREATE TABLE. */ UNIV_INTERN -void +std::string dict_print_info_on_foreign_key_in_create_format( /*============================================*/ - FILE* file, /*!< in: file where to print */ trx_t* trx, /*!< in: transaction */ dict_foreign_t* foreign, /*!< in: foreign key constraint */ ibool add_newline) /*!< in: whether to add a newline */ { const char* stripped_id; ulint i; + std::string str; if (strchr(foreign->id, '/')) { /* Strip the preceding database name from the constraint id */ @@ -5858,96 +5898,99 @@ dict_print_info_on_foreign_key_in_create_format( stripped_id = foreign->id; } - putc(',', file); + str.append(","); if (add_newline) { /* SHOW CREATE TABLE wants constraints each printed nicely on its own line, while error messages want no newlines inserted. */ - fputs("\n ", file); + str.append("\n "); } - fputs(" CONSTRAINT ", file); - ut_print_name(file, trx, FALSE, stripped_id); - fputs(" FOREIGN KEY (", file); + str.append(" CONSTRAINT "); + + str.append(ut_get_name(trx, FALSE, stripped_id)); + str.append(" FOREIGN KEY ("); for (i = 0;;) { - ut_print_name(file, trx, FALSE, foreign->foreign_col_names[i]); + str.append(ut_get_name(trx, FALSE, foreign->foreign_col_names[i])); if (++i < foreign->n_fields) { - fputs(", ", file); + str.append(", "); } else { break; } } - fputs(") REFERENCES ", file); + str.append(") REFERENCES "); if (dict_tables_have_same_db(foreign->foreign_table_name_lookup, foreign->referenced_table_name_lookup)) { /* Do not print the database name of the referenced table */ - ut_print_name(file, trx, TRUE, + str.append(ut_get_name(trx, TRUE, dict_remove_db_name( - foreign->referenced_table_name)); + foreign->referenced_table_name))); } else { - ut_print_name(file, trx, TRUE, - foreign->referenced_table_name); + str.append(ut_get_name(trx, TRUE, + foreign->referenced_table_name)); } - putc(' ', file); - putc('(', file); + str.append(" ("); for (i = 0;;) { - ut_print_name(file, trx, FALSE, - foreign->referenced_col_names[i]); + str.append(ut_get_name(trx, FALSE, + foreign->referenced_col_names[i])); + if (++i < foreign->n_fields) { - fputs(", ", file); + str.append(", "); } else { break; } } - putc(')', file); + str.append(")"); if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE) { - fputs(" ON DELETE CASCADE", file); + str.append(" ON DELETE CASCADE"); } if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) { - fputs(" ON DELETE SET NULL", file); + str.append(" ON DELETE SET NULL"); } if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) { - fputs(" ON DELETE NO ACTION", file); + str.append(" ON DELETE NO ACTION"); } if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) { - fputs(" ON UPDATE CASCADE", file); + str.append(" ON UPDATE CASCADE"); } if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) { - fputs(" ON UPDATE SET NULL", file); + str.append(" ON UPDATE SET NULL"); } if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) { - fputs(" ON UPDATE NO ACTION", file); + str.append(" ON UPDATE NO ACTION"); } + + return str; } /**********************************************************************//** Outputs info on foreign keys of a table. */ UNIV_INTERN -void +std::string dict_print_info_on_foreign_keys( /*============================*/ ibool create_table_format, /*!< in: if TRUE then print in a format suitable to be inserted into a CREATE TABLE, otherwise in the format of SHOW TABLE STATUS */ - FILE* file, /*!< in: file where to print */ trx_t* trx, /*!< in: transaction */ dict_table_t* table) /*!< in: table */ { dict_foreign_t* foreign; + std::string str; mutex_enter(&(dict_sys->mutex)); @@ -5958,64 +6001,67 @@ dict_print_info_on_foreign_keys( foreign = *it; if (create_table_format) { - dict_print_info_on_foreign_key_in_create_format( - file, trx, foreign, TRUE); + str.append( + dict_print_info_on_foreign_key_in_create_format( + trx, foreign, TRUE)); } else { ulint i; - fputs("; (", file); + str.append("; ("); for (i = 0; i < foreign->n_fields; i++) { if (i) { - putc(' ', file); + str.append(" "); } - ut_print_name(file, trx, FALSE, - foreign->foreign_col_names[i]); + str.append(ut_get_name(trx, FALSE, + foreign->foreign_col_names[i])); } - fputs(") REFER ", file); - ut_print_name(file, trx, TRUE, - foreign->referenced_table_name); - putc('(', file); + str.append(") REFER "); + str.append(ut_get_name(trx, TRUE, + foreign->referenced_table_name)); + str.append(")"); for (i = 0; i < foreign->n_fields; i++) { if (i) { - putc(' ', file); + str.append(" "); } - ut_print_name( - file, trx, FALSE, - foreign->referenced_col_names[i]); + str.append(ut_get_name( + trx, FALSE, + foreign->referenced_col_names[i])); } - putc(')', file); + str.append(")"); if (foreign->type == DICT_FOREIGN_ON_DELETE_CASCADE) { - fputs(" ON DELETE CASCADE", file); + str.append(" ON DELETE CASCADE"); } if (foreign->type == DICT_FOREIGN_ON_DELETE_SET_NULL) { - fputs(" ON DELETE SET NULL", file); + str.append(" ON DELETE SET NULL"); } if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) { - fputs(" ON DELETE NO ACTION", file); + str.append(" ON DELETE NO ACTION"); } if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) { - fputs(" ON UPDATE CASCADE", file); + str.append(" ON UPDATE CASCADE"); } if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) { - fputs(" ON UPDATE SET NULL", file); + str.append(" ON UPDATE SET NULL"); } if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) { - fputs(" ON UPDATE NO ACTION", file); + str.append(" ON UPDATE NO ACTION"); } } } mutex_exit(&(dict_sys->mutex)); + + return str; } /********************************************************************//** @@ -6872,6 +6918,8 @@ dict_close(void) mutex_free(&dict_foreign_err_mutex); } + delete dict_sys->autoinc_map; + mem_free(dict_sys); dict_sys = NULL; } diff --git a/storage/xtradb/fsp/fsp0fsp.cc b/storage/xtradb/fsp/fsp0fsp.cc index cb51366df16..ef27e688a3f 100644 --- a/storage/xtradb/fsp/fsp0fsp.cc +++ b/storage/xtradb/fsp/fsp0fsp.cc @@ -2810,8 +2810,8 @@ try_to_extend: if (rounds > 50) { ib_logf(IB_LOG_LEVEL_INFO, "Space id %lu trying to reserve %lu extents actually reserved %lu " - " reserve %lu free %lu size %lu rounds %lu total_reserved %lu", - space, n_ext, n_pages_added, reserve, n_free, size, rounds, total_reserved); + " reserve %lu free %lu size %lu rounds %lu total_reserved %llu", + space, n_ext, n_pages_added, reserve, n_free, size, rounds, (ullint) total_reserved); } goto try_again; diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 878d567c9b2..2f088f106fc 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -116,6 +116,9 @@ this program; if not, write to the Free Software Foundation, Inc., #include "i_s.h" #include "xtradb_i_s.h" +#include <string> +#include <sstream> + # ifndef MYSQL_PLUGIN_IMPORT # define MYSQL_PLUGIN_IMPORT /* nothing */ # endif /* MYSQL_PLUGIN_IMPORT */ @@ -1618,6 +1621,19 @@ thd_supports_xa( } /******************************************************************//** +Check the status of fake changes mode (innodb_fake_changes) +@return true if fake change mode is enabled. */ +UNIV_INTERN +ibool +thd_fake_changes( +/*=============*/ + THD* thd) /*!< in: thread handle, or NULL to query + the global innodb_supports_xa */ +{ + return(THDVAR((THD*) thd, fake_changes)); +} + +/******************************************************************//** Returns the lock wait timeout for the current connection. @return the lock wait timeout, in seconds */ UNIV_INTERN @@ -2385,10 +2401,11 @@ innobase_next_autoinc( if (next_value == 0) { ulonglong next; - if (current > offset) { + if (current >= offset) { next = (current - offset) / step; } else { - next = (offset - current) / step; + next = 0; + block -= step; } ut_a(max_value > next); @@ -2436,7 +2453,15 @@ innobase_trx_init( trx->check_unique_secondary = !thd_test_options( thd, OPTION_RELAXED_UNIQUE_CHECKS); - trx->fake_changes = THDVAR(thd, fake_changes); + /* Transaction on start caches the fake_changes state and uses it for + complete transaction lifetime. + There are some APIs that doesn't need an active transaction object + but transaction object are just use as a cache object/data carrier. + Before using transaction object for such APIs refresh the state of + fake_changes. */ + if (trx->state == TRX_STATE_NOT_STARTED) { + trx->fake_changes = thd_fake_changes(thd); + } #ifdef EXTENDED_SLOWLOG if (thd_log_slow_verbosity(thd) & (1ULL << SLOG_V_INNODB)) { @@ -4415,15 +4440,26 @@ innobase_commit( /* No-op in XtraDB */ trx_search_latch_release_if_reserved(trx); - if (UNIV_UNLIKELY(trx->fake_changes && - (commit_trx || - (!thd_test_options(thd, + /* If fake-changes mode = ON then allow + SELECT (they are read-only) and + CREATE ... SELECT * from table (Well this doesn't open up DDL for InnoDB + as ha_innobase::create will return appropriate error if fake-change = ON + but if create is trying to use other SE and SELECT is executing on + InnoDB table then we allow SELECT to proceed. + Ideally, statement like this should be marked CREATE_SELECT like + INSERT_SELECT but unfortunately it doesn't). */ + if (UNIV_UNLIKELY(trx->fake_changes + && (thd_sql_command(thd) != SQLCOM_SELECT + && thd_sql_command(thd) != SQLCOM_CREATE_TABLE) + && (commit_trx || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))))) { /* rollback implicitly */ innobase_rollback(hton, thd, commit_trx); + /* because debug assertion code complains, if something left */ thd->get_stmt_da()->reset_diagnostics_area(); + DBUG_RETURN(HA_ERR_WRONG_COMMAND); } /* Transaction is deregistered only in a commit or a rollback. If @@ -5934,6 +5970,10 @@ ha_innobase::open( "current file system\n", norm_name); #endif + /* We allow use of table if it is found. + this is consistent to current behavior + to innodb_plugin */ + share->ib_table = ib_table; goto table_opened; } } @@ -8031,7 +8071,7 @@ ha_innobase::write_row( DBUG_ENTER("ha_innobase::write_row"); - if (srv_read_only_mode) { + if (high_level_read_only) { ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); DBUG_RETURN(HA_ERR_TABLE_READONLY); } else if (prebuilt->trx != trx) { @@ -8788,7 +8828,7 @@ ha_innobase::update_row( ut_a(prebuilt->trx == trx); - if (srv_read_only_mode) { + if (high_level_read_only) { ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); DBUG_RETURN(HA_ERR_TABLE_READONLY); } else if (!trx_is_started(trx)) { @@ -8952,7 +8992,7 @@ ha_innobase::delete_row( ut_a(prebuilt->trx == trx); - if (srv_read_only_mode) { + if (high_level_read_only) { ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); DBUG_RETURN(HA_ERR_TABLE_READONLY); } else if (!trx_is_started(trx)) { @@ -11779,7 +11819,7 @@ ha_innobase::create( if (form->s->stored_fields > REC_MAX_N_USER_FIELDS) { DBUG_RETURN(HA_ERR_TOO_MANY_FIELDS); - } else if (srv_read_only_mode) { + } else if (high_level_read_only) { DBUG_RETURN(HA_ERR_TABLE_READONLY); } @@ -12115,7 +12155,7 @@ ha_innobase::discard_or_import_tablespace( ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N); ut_a(prebuilt->trx == thd_to_trx(ha_thd())); - if (srv_read_only_mode) { + if (high_level_read_only) { DBUG_RETURN(HA_ERR_TABLE_READONLY); } @@ -12213,7 +12253,7 @@ ha_innobase::truncate() DBUG_ENTER("ha_innobase::truncate"); - if (srv_read_only_mode) { + if (high_level_read_only) { DBUG_RETURN(HA_ERR_TABLE_READONLY); } @@ -12589,7 +12629,7 @@ ha_innobase::rename_table( DBUG_ENTER("ha_innobase::rename_table"); - if (srv_read_only_mode) { + if (high_level_read_only) { ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); DBUG_RETURN(HA_ERR_TABLE_READONLY); } @@ -13838,8 +13878,9 @@ ha_innobase::update_table_comment( const char* comment)/*!< in: table comment defined by user */ { uint length = (uint) strlen(comment); - char* str; + char* str=0; long flen; + std::string fk_str; /* We do not know if MySQL can call this function before calling external_lock(). To be safe, update the thd of the current table @@ -13857,50 +13898,40 @@ ha_innobase::update_table_comment( possible adaptive hash latch to avoid deadlocks of threads */ trx_search_latch_release_if_reserved(prebuilt->trx); - str = NULL; - /* output the data to a temporary file */ - - if (!srv_read_only_mode) { +#define SSTR( x ) reinterpret_cast< std::ostringstream & >( \ + ( std::ostringstream() << std::dec << x ) ).str() - mutex_enter(&srv_dict_tmpfile_mutex); + fk_str.append("InnoDB free: "); + fk_str.append(SSTR(fsp_get_available_space_in_free_extents( + prebuilt->table->space))); - rewind(srv_dict_tmpfile); + fk_str.append(dict_print_info_on_foreign_keys( + FALSE, prebuilt->trx, + prebuilt->table)); - fprintf(srv_dict_tmpfile, "InnoDB free: %llu kB", - fsp_get_available_space_in_free_extents( - prebuilt->table->space)); + flen = fk_str.length(); - dict_print_info_on_foreign_keys( - FALSE, srv_dict_tmpfile, prebuilt->trx, - prebuilt->table); - - flen = ftell(srv_dict_tmpfile); - - if (flen < 0) { - flen = 0; - } else if (length + flen + 3 > 64000) { - flen = 64000 - 3 - length; - } + if (flen < 0) { + flen = 0; + } else if (length + flen + 3 > 64000) { + flen = 64000 - 3 - length; + } - /* allocate buffer for the full string, and - read the contents of the temporary file */ + /* allocate buffer for the full string */ - str = (char*) my_malloc(length + flen + 3, MYF(0)); + str = (char*) my_malloc(length + flen + 3, MYF(0)); - if (str) { - char* pos = str + length; - if (length) { - memcpy(str, comment, length); - *pos++ = ';'; - *pos++ = ' '; - } - rewind(srv_dict_tmpfile); - flen = (uint) fread(pos, 1, flen, srv_dict_tmpfile); - pos[flen] = 0; + if (str) { + char* pos = str + length; + if (length) { + memcpy(str, comment, length); + *pos++ = ';'; + *pos++ = ' '; } - mutex_exit(&srv_dict_tmpfile_mutex); + memcpy(pos, fk_str.c_str(), flen); + pos[flen] = 0; } prebuilt->trx->op_info = (char*)""; @@ -13918,8 +13949,7 @@ char* ha_innobase::get_foreign_key_create_info(void) /*==========================================*/ { - long flen; - char* str = 0; + char* fk_str = 0; ut_a(prebuilt != NULL); @@ -13937,38 +13967,22 @@ ha_innobase::get_foreign_key_create_info(void) trx_search_latch_release_if_reserved(prebuilt->trx); - if (!srv_read_only_mode) { - mutex_enter(&srv_dict_tmpfile_mutex); - rewind(srv_dict_tmpfile); - - /* Output the data to a temporary file */ - dict_print_info_on_foreign_keys( - TRUE, srv_dict_tmpfile, prebuilt->trx, + /* Output the data to a temporary file */ + std::string str = dict_print_info_on_foreign_keys( + TRUE, prebuilt->trx, prebuilt->table); - prebuilt->trx->op_info = (char*)""; - - flen = ftell(srv_dict_tmpfile); - - if (flen < 0) { - flen = 0; - } - - /* Allocate buffer for the string, and - read the contents of the temporary file */ + prebuilt->trx->op_info = (char*)""; - str = (char*) my_malloc(flen + 1, MYF(0)); + /* Allocate buffer for the string */ + fk_str = (char*) my_malloc(str.length() + 1, MYF(0)); - if (str) { - rewind(srv_dict_tmpfile); - flen = (uint) fread(str, 1, flen, srv_dict_tmpfile); - str[flen] = 0; - } - - mutex_exit(&srv_dict_tmpfile_mutex); + if (fk_str) { + memcpy(fk_str, str.c_str(), str.length()); + fk_str[str.length()]='\0'; } - return(str); + return(fk_str); } @@ -15608,10 +15622,7 @@ ha_innobase::get_auto_increment( current = *first_value; - /* If the increment step of the auto increment column - decreases then it is not affecting the immediate - next value in the series. */ - if (prebuilt->autoinc_increment > increment) { + if (prebuilt->autoinc_increment != increment) { #ifdef WITH_WSREP WSREP_DEBUG("autoinc decrease: %llu -> %llu\n" @@ -15629,7 +15640,7 @@ ha_innobase::get_auto_increment( #endif /* WITH_WSREP */ current = innobase_next_autoinc( - current, 1, increment, 1, col_max_value); + current, 1, increment, offset, col_max_value); dict_table_autoinc_initialize(prebuilt->table, current); @@ -16755,6 +16766,12 @@ innodb_internal_table_validate( } dict_table_close(user_table, FALSE, TRUE); + + DBUG_EXECUTE_IF("innodb_evict_autoinc_table", + mutex_enter(&dict_sys->mutex); + dict_table_remove_from_cache_low(user_table, TRUE); + mutex_exit(&dict_sys->mutex); + ); } return(ret); @@ -17910,6 +17927,46 @@ innodb_sched_priority_master_update( #endif /* UNIV_LINUX */ +#ifdef UNIV_DEBUG +/*************************************************************//** +Check if it is a valid value of innodb_track_changed_pages. +Changed pages tracking is not working correctly without initialization +procedure on server startup. The function allows to temporary +disable tracking, but only if the feature was enabled on startup. +This function is registered as a callback with MySQL. +@return 0 for valid innodb_track_changed_pages */ +static +int +innodb_track_changed_pages_validate( + THD* thd, /*!< in: thread handle */ + struct st_mysql_sys_var* var, /*!< in: pointer to system + variable */ + void* save, /*!< out: immediate result + for update function */ + struct st_mysql_value* value) /*!< in: incoming bool */ +{ + static bool enabled_on_startup = false; + long long intbuf = 0; + + if (value->val_int(value, &intbuf)) { + /* The value is NULL. That is invalid. */ + return 1; + } + + if (srv_track_changed_pages || enabled_on_startup) { + enabled_on_startup = true; + *reinterpret_cast<ulong*>(save) + = static_cast<ulong>(intbuf); + return 0; + } + + if (intbuf == srv_track_changed_pages) + return 0; + + return 1; +} +#endif + /****************************************************************//** Callback function for accessing the InnoDB variables from MySQL: SHOW VARIABLES. */ @@ -19123,10 +19180,9 @@ static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size, "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", NULL, NULL, 128*1024*1024L, 5*1024*1024L, LONGLONG_MAX, 1024*1024L); -static MYSQL_SYSVAR_BOOL(buffer_pool_populate, srv_buf_pool_populate, +static MYSQL_SYSVAR_BOOL(buffer_pool_populate, srv_numa_interleave, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Preallocate (pre-fault) the page frames required for the mapping " - "established by the buffer pool memory region. Disabled by default.", + "Depricated. This option is temporary alias of --innodb-numa-interleave.", NULL, NULL, FALSE); static MYSQL_SYSVAR_ENUM(foreground_preflush, srv_foreground_preflush, @@ -19274,6 +19330,11 @@ static MYSQL_SYSVAR_BOOL(buffer_pool_dump_at_shutdown, srv_buffer_pool_dump_at_s "Dump the buffer pool into a file named @@innodb_buffer_pool_filename", NULL, NULL, FALSE); +static MYSQL_SYSVAR_ULONG(buffer_pool_dump_pct, srv_buf_pool_dump_pct, + PLUGIN_VAR_RQCMDARG, + "Dump only the hottest N% of each buffer pool, defaults to 100", + NULL, NULL, 100, 1, 100, 0); + #ifdef UNIV_DEBUG static MYSQL_SYSVAR_STR(buffer_pool_evict, srv_buffer_pool_evict, PLUGIN_VAR_RQCMDARG, @@ -19565,6 +19626,13 @@ static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio, "Use native AIO if supported on this platform.", NULL, NULL, TRUE); +#ifdef HAVE_LIBNUMA +static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Use NUMA interleave memory policy to allocate InnoDB buffer pool.", + NULL, NULL, FALSE); +#endif // HAVE_LIBNUMA + static MYSQL_SYSVAR_BOOL(api_enable_binlog, ib_binlog_enabled, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, "Enable binlog for applications direct access InnoDB through InnoDB APIs", @@ -19622,13 +19690,16 @@ static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method, /* Make this variable dynamic for debug builds to provide a testcase sync facility */ #define track_changed_pages_flags PLUGIN_VAR_NOCMDARG +#define track_changed_pages_check innodb_track_changed_pages_validate #else #define track_changed_pages_flags PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY +#define track_changed_pages_check NULL #endif static MYSQL_SYSVAR_BOOL(track_changed_pages, srv_track_changed_pages, track_changed_pages_flags, "Track the redo log for changed pages and output a changed page bitmap", - NULL, NULL, FALSE); + track_changed_pages_check, + NULL, FALSE); static MYSQL_SYSVAR_ULONGLONG(max_bitmap_file_size, srv_max_bitmap_file_size, PLUGIN_VAR_RQCMDARG, @@ -19840,6 +19911,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(buffer_pool_filename), MYSQL_SYSVAR(buffer_pool_dump_now), MYSQL_SYSVAR(buffer_pool_dump_at_shutdown), + MYSQL_SYSVAR(buffer_pool_dump_pct), #ifdef UNIV_DEBUG MYSQL_SYSVAR(buffer_pool_evict), #endif /* UNIV_DEBUG */ @@ -19951,6 +20023,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(version), MYSQL_SYSVAR(use_sys_malloc), MYSQL_SYSVAR(use_native_aio), +#ifdef HAVE_LIBNUMA + MYSQL_SYSVAR(numa_interleave), +#endif // HAVE_LIBNUMA MYSQL_SYSVAR(change_buffering), MYSQL_SYSVAR(change_buffer_max_size), MYSQL_SYSVAR(track_changed_pages), diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc index 8134ec9b59d..ebe2e0faa4b 100644 --- a/storage/xtradb/handler/handler0alter.cc +++ b/storage/xtradb/handler/handler0alter.cc @@ -244,7 +244,7 @@ ha_innobase::check_if_supported_inplace_alter( { DBUG_ENTER("check_if_supported_inplace_alter"); - if (srv_read_only_mode) { + if (high_level_read_only) { ha_alter_info->unsupported_reason = innobase_get_err_msg(ER_READ_ONLY_MODE); DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); @@ -2704,15 +2704,10 @@ prepare_inplace_alter_table_dict( /* Create a background transaction for the operations on the data dictionary tables. */ ctx->trx = innobase_trx_allocate(ctx->prebuilt->trx->mysql_thd); - - if (UNIV_UNLIKELY(ctx->trx->fake_changes)) { - trx_rollback_to_savepoint(ctx->trx, NULL); - trx_free_for_mysql(ctx->trx); - DBUG_RETURN(true); - } - trx_start_for_ddl(ctx->trx, TRX_DICT_OP_INDEX); + DBUG_ASSERT(!ctx->trx->fake_changes); + /* Create table containing all indexes to be built in this ALTER TABLE ADD INDEX so that they are in the correct order in the table. */ diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h index 7d388f90bb5..d5a25e276e5 100644 --- a/storage/xtradb/include/buf0buf.h +++ b/storage/xtradb/include/buf0buf.h @@ -214,7 +214,6 @@ dberr_t buf_pool_init( /*=========*/ ulint size, /*!< in: Size of the total pool in bytes */ - ibool populate, /*!< in: Force virtual page preallocation */ ulint n_instances); /*!< in: Number of instances */ /********************************************************************//** Frees the buffer pool at shutdown. This must not be invoked before diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h index 5643892bffc..0e8498e6210 100644 --- a/storage/xtradb/include/dict0dict.h +++ b/storage/xtradb/include/dict0dict.h @@ -2,6 +2,7 @@ Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. +Copyright (c) 2014, 2015, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -312,6 +313,21 @@ dict_table_autoinc_initialize( dict_table_t* table, /*!< in/out: table */ ib_uint64_t value) /*!< in: next value to assign to a row */ __attribute__((nonnull)); + +/** Store autoinc value when the table is evicted. +@param[in] table table evicted */ +UNIV_INTERN +void +dict_table_autoinc_store( + const dict_table_t* table); + +/** Restore autoinc value when the table is loaded. +@param[in] table table loaded */ +UNIV_INTERN +void +dict_table_autoinc_restore( + dict_table_t* table); + /********************************************************************//** Reads the next autoinc value (== autoinc counter value), 0 if not yet initialized. @@ -371,6 +387,15 @@ dict_table_remove_from_cache( dict_table_t* table) /*!< in, own: table */ __attribute__((nonnull)); /**********************************************************************//** +Removes a table object from the dictionary cache. */ +UNIV_INTERN +void +dict_table_remove_from_cache_low( +/*=============================*/ + dict_table_t* table, /*!< in, own: table */ + ibool lru_evict); /*!< in: TRUE if table being evicted + to make room in the table LRU list */ +/**********************************************************************//** Renames a table object. @return TRUE if success */ UNIV_INTERN @@ -600,29 +625,25 @@ dict_table_print( /**********************************************************************//** Outputs info on foreign keys of a table. */ UNIV_INTERN -void +std::string dict_print_info_on_foreign_keys( /*============================*/ ibool create_table_format, /*!< in: if TRUE then print in a format suitable to be inserted into a CREATE TABLE, otherwise in the format of SHOW TABLE STATUS */ - FILE* file, /*!< in: file where to print */ trx_t* trx, /*!< in: transaction */ - dict_table_t* table) /*!< in: table */ - __attribute__((nonnull)); + dict_table_t* table); /*!< in: table */ /**********************************************************************//** Outputs info on a foreign key of a table in a format suitable for CREATE TABLE. */ UNIV_INTERN -void +std::string dict_print_info_on_foreign_key_in_create_format( /*============================================*/ - FILE* file, /*!< in: file where to print */ trx_t* trx, /*!< in: transaction */ dict_foreign_t* foreign, /*!< in: foreign key constraint */ - ibool add_newline) /*!< in: whether to add a newline */ - __attribute__((nonnull(1,3))); + ibool add_newline); /*!< in: whether to add a newline */ /********************************************************************//** Displays the names of the index and the table. */ UNIV_INTERN @@ -1572,6 +1593,8 @@ extern dict_sys_t* dict_sys; /** the data dictionary rw-latch protecting dict_sys */ extern rw_lock_t dict_operation_lock; +typedef std::map<table_id_t, ib_uint64_t> autoinc_map_t; + /* Dictionary system struct */ struct dict_sys_t{ ib_prio_mutex_t mutex; /*!< mutex protecting the data @@ -1606,6 +1629,8 @@ struct dict_sys_t{ UT_LIST_BASE_NODE_T(dict_table_t) table_non_LRU; /*!< List of tables that can't be evicted from the cache */ + autoinc_map_t* autoinc_map; /*!< Map to store table id and autoinc + when table is evicted */ }; #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h index 0ce310a7cec..b10b8219f22 100644 --- a/storage/xtradb/include/ha_prototypes.h +++ b/storage/xtradb/include/ha_prototypes.h @@ -364,6 +364,16 @@ thd_supports_xa( the global innodb_supports_xa */ /******************************************************************//** +Check the status of fake changes mode (innodb_fake_changes) +@return true if fake change mode is enabled. */ +UNIV_INTERN +ibool +thd_fake_changes( +/*=============*/ + THD* thd); /*!< in: thread handle, or NULL to query + the global innodb_supports_xa */ + +/******************************************************************//** Returns the lock wait timeout for the current connection. @return the lock wait timeout, in seconds */ UNIV_INTERN diff --git a/storage/xtradb/include/ibuf0ibuf.ic b/storage/xtradb/include/ibuf0ibuf.ic index 21747fdceac..a5df9f7b6b4 100644 --- a/storage/xtradb/include/ibuf0ibuf.ic +++ b/storage/xtradb/include/ibuf0ibuf.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -128,7 +128,8 @@ ibuf_should_try( && ibuf->max_size != 0 && !dict_index_is_clust(index) && index->table->quiesce == QUIESCE_NONE - && (ignore_sec_unique || !dict_index_is_unique(index))); + && (ignore_sec_unique || !dict_index_is_unique(index)) + && srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE); } /******************************************************************//** diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h index 24f4d2b78aa..93e1b3edef9 100644 --- a/storage/xtradb/include/os0file.h +++ b/storage/xtradb/include/os0file.h @@ -132,6 +132,10 @@ enum os_file_create_t { #define OS_FILE_READ_ONLY 333 #define OS_FILE_READ_WRITE 444 #define OS_FILE_READ_ALLOW_DELETE 555 /* for mysqlbackup */ +#define OS_FILE_READ_WRITE_CACHED 666 /* OS_FILE_READ_WRITE but never + O_DIRECT. Only for + os_file_create_simple_no_error_handling + currently. */ /* Options for file_create */ #define OS_FILE_AIO 61 @@ -541,9 +545,11 @@ os_file_create_simple_no_error_handling_func( null-terminated string */ ulint create_mode,/*!< in: create mode */ ulint access_type,/*!< in: OS_FILE_READ_ONLY, - OS_FILE_READ_WRITE, or - OS_FILE_READ_ALLOW_DELETE; the last option is - used by a backup program reading the file */ + OS_FILE_READ_WRITE, + OS_FILE_READ_ALLOW_DELETE (used by a backup + program reading the file), or + OS_FILE_READ_WRITE_CACHED (disable O_DIRECT + if it would be enabled otherwise) */ ibool* success)/*!< out: TRUE if succeed, FALSE if error */ __attribute__((nonnull, warn_unused_result)); /****************************************************************//** @@ -552,7 +558,7 @@ UNIV_INTERN void os_file_set_nocache( /*================*/ - int fd, /*!< in: file descriptor to alter */ + os_file_t fd, /*!< in: file descriptor to alter */ const char* file_name, /*!< in: file name, used in the diagnostic message */ const char* operation_name);/*!< in: "open" or "create"; used in the diff --git a/storage/xtradb/include/os0proc.h b/storage/xtradb/include/os0proc.h index f9e88ff1a28..613e3bd6947 100644 --- a/storage/xtradb/include/os0proc.h +++ b/storage/xtradb/include/os0proc.h @@ -58,8 +58,7 @@ UNIV_INTERN void* os_mem_alloc_large( /*===============*/ - ulint* n, /*!< in/out: number of bytes */ - ibool populate); /*!< in: virtual page preallocation */ + ulint* n); /*!< in/out: number of bytes */ /****************************************************************//** Frees large pages memory. */ UNIV_INTERN diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h index 3a7707ee130..56c094547b1 100644 --- a/storage/xtradb/include/os0sync.h +++ b/storage/xtradb/include/os0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -38,6 +38,27 @@ Created 9/6/1995 Heikki Tuuri #include "ut0lst.h" #include "sync0types.h" +#if defined __i386__ || defined __x86_64__ || defined _M_IX86 \ + || defined _M_X64 || defined __WIN__ + +#define IB_STRONG_MEMORY_MODEL +#undef HAVE_IB_GCC_ATOMIC_TEST_AND_SET // Quick-and-dirty fix for bug 1519094 + +#endif /* __i386__ || __x86_64__ || _M_IX86 || _M_X64 || __WIN__ */ + +#ifdef HAVE_WINDOWS_ATOMICS +typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates + on LONG variable */ +#elif defined(HAVE_ATOMIC_BUILTINS) && !defined(HAVE_ATOMIC_BUILTINS_BYTE) +typedef ulint lock_word_t; +#else + +#define IB_LOCK_WORD_IS_BYTE + +typedef byte lock_word_t; + +#endif /* HAVE_WINDOWS_ATOMICS */ + #ifdef __WIN__ /** Native event (slow)*/ typedef HANDLE os_native_event_t; @@ -446,11 +467,61 @@ amount to decrement. */ # define os_atomic_decrement_uint64(ptr, amount) \ os_atomic_decrement(ptr, amount) -/**********************************************************//** -Returns the old value of *ptr, atomically sets *ptr to new_val */ - -# define os_atomic_test_and_set_ulint(ptr, new_val) \ - __sync_lock_test_and_set(ptr, new_val) +# if defined(HAVE_IB_GCC_ATOMIC_TEST_AND_SET) + +/** Do an atomic test-and-set. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(__atomic_test_and_set(ptr, __ATOMIC_ACQUIRE)); +} + +/** Do an atomic clear. +@param[in,out] ptr Memory location to set to zero */ +inline +void +os_atomic_clear(volatile lock_word_t* ptr) +{ + __atomic_clear(ptr, __ATOMIC_RELEASE); +} + +# elif defined(IB_STRONG_MEMORY_MODEL) + +/** Do an atomic test and set. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(__sync_lock_test_and_set(ptr, 1)); +} + +/** Do an atomic release. + +In theory __sync_lock_release should be used to release the lock. +Unfortunately, it does not work properly alone. The workaround is +that more conservative __sync_lock_test_and_set is used instead. + +Performance regression was observed at some conditions for Intel +architecture. Disable release barrier on Intel architecture for now. +@param[in,out] ptr Memory location to write to +@return the previous value */ +inline +lock_word_t +os_atomic_clear(volatile lock_word_t* ptr) +{ + return(__sync_lock_test_and_set(ptr, 0)); +} + +# else + +# error "Unsupported platform" + +# endif /* HAVE_IB_GCC_ATOMIC_TEST_AND_SET */ #if defined(__powerpc__) || defined(__aarch64__) /* @@ -548,11 +619,51 @@ amount to decrement. */ # define os_atomic_decrement_uint64(ptr, amount) \ os_atomic_increment_uint64(ptr, -(amount)) -/**********************************************************//** -Returns the old value of *ptr, atomically sets *ptr to new_val */ - -# define os_atomic_test_and_set_ulint(ptr, new_val) \ - atomic_swap_ulong(ptr, new_val) +# ifdef IB_LOCK_WORD_IS_BYTE + +/** Do an atomic xchg and set to non-zero. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(atomic_swap_uchar(ptr, 1)); +} + +/** Do an atomic xchg and set to zero. +@param[in,out] ptr Memory location to set to zero +@return the previous value */ +inline +lock_word_t +os_atomic_clear(volatile lock_word_t* ptr) +{ + return(atomic_swap_uchar(ptr, 0)); +} + +# else + +/** Do an atomic xchg and set to non-zero. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(atomic_swap_ulong(ptr, 1)); +} + +/** Do an atomic xchg and set to zero. +@param[in,out] ptr Memory location to set to zero +@return the previous value */ +inline +lock_word_t +os_atomic_clear(volatile lock_word_t* ptr) +{ + return(atomic_swap_ulong(ptr, 0)); +} + +# endif /* IB_LOCK_WORD_IS_BYTE */ # define os_atomic_test_and_set_byte_acquire(ptr, new_val) \ atomic_swap_uchar(ptr, new_val) @@ -673,18 +784,27 @@ amount to decrement. There is no atomic substract function on Windows */ (ib_int64_t*) ptr, \ -(ib_int64_t) amount) - amount)) -/**********************************************************//** -Returns the old value of *ptr, atomically sets *ptr to new_val. -InterlockedExchange() operates on LONG, and the LONG will be -clobbered */ - -# define os_atomic_test_and_set_byte_acquire(ptr, new_val) \ - ((byte) InterlockedExchange(ptr, new_val)) -# define os_atomic_test_and_set_byte_release(ptr, new_val) \ - ((byte) InterlockedExchange(ptr, new_val)) - -# define os_atomic_test_and_set_ulong(ptr, new_val) \ - InterlockedExchange(ptr, new_val) +/** Do an atomic test and set. +InterlockedExchange() operates on LONG, and the LONG will be clobbered +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(InterlockedExchange(ptr, 1)); +} + +/** Do an atomic release. +InterlockedExchange() operates on LONG, and the LONG will be clobbered +@param[in,out] ptr Memory location to set to zero +@return the previous value */ +inline +lock_word_t +os_atomic_clear(volatile lock_word_t* ptr) +{ + return(InterlockedExchange(ptr, 0)); +} # define os_atomic_lock_release_byte(ptr) \ (void) InterlockedExchange(ptr, 0) @@ -737,7 +857,7 @@ for synchronization */ } while (0); /** barrier definitions for memory ordering */ -#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__ +#ifdef IB_STRONG_MEMORY_MODEL /* Performance regression was observed at some conditions for Intel architecture. Disable memory barrier for Intel architecture for now. */ # define os_rmb do { } while(0) diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index 31cda7d3238..567e5dc5f16 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, 2009, Google Inc. Copyright (c) 2009, Percona Inc. Copyright (c) 2013, 2015, MariaDB Corporation. @@ -229,6 +229,9 @@ extern char* srv_arch_dir; recovery and open all tables in RO mode instead of RW mode. We don't sync the max trx id to disk either. */ extern my_bool srv_read_only_mode; +/** Set if InnoDB operates in read-only mode or innodb-force-recovery +is greater than SRV_FORCE_NO_TRX_UNDO. */ +extern my_bool high_level_read_only; /** store to its own file each table created by an user; data dictionary tables are in the system tablespace 0 */ extern my_bool srv_file_per_table; @@ -259,6 +262,7 @@ OS (provided we compiled Innobase with it in), otherwise we will use simulated aio we build below with threads. Currently we support native aio on windows and linux */ extern my_bool srv_use_native_aio; +extern my_bool srv_numa_interleave; #ifdef __WIN__ extern ibool srv_use_native_conditions; #endif /* __WIN__ */ @@ -326,7 +330,6 @@ extern my_bool srv_use_sys_malloc; extern ibool srv_use_sys_malloc; #endif /* UNIV_HOTBACKUP */ extern ulint srv_buf_pool_size; /*!< requested size in bytes */ -extern my_bool srv_buf_pool_populate; /*!< virtual page preallocation */ extern ulint srv_buf_pool_instances; /*!< requested number of buffer pool instances */ extern ulong srv_n_page_hash_locks; /*!< number of locks to protect buf_pool->page_hash */ @@ -336,6 +339,8 @@ extern ulong srv_flush_neighbors; /*!< whether or not to flush neighbors of a block */ extern ulint srv_buf_pool_old_size; /*!< previously requested size */ extern ulint srv_buf_pool_curr_size; /*!< current size in bytes */ +extern ulong srv_buf_pool_dump_pct; /*!< dump that may % of each buffer + pool during BP dump */ extern ulint srv_mem_pool_size; extern ulint srv_lock_table_size; @@ -1132,6 +1137,7 @@ struct srv_slot_t{ #else /* !UNIV_HOTBACKUP */ # define srv_use_adaptive_hash_indexes FALSE # define srv_use_native_aio FALSE +# define srv_numa_interleave FALSE # define srv_force_recovery 0UL # define srv_set_io_thread_op_info(t,info) ((void) 0) # define srv_reset_io_thread_op_info() ((void) 0) diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h index ffe2d635fbd..152a05231ef 100644 --- a/storage/xtradb/include/sync0sync.h +++ b/storage/xtradb/include/sync0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2012, Facebook Inc. @@ -47,13 +47,6 @@ Created 9/5/1995 Heikki Tuuri extern "C" my_bool timed_mutexes; #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ -#ifdef _WIN32 -typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates - on LONG variable */ -#else -typedef byte lock_word_t; -#endif - #if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK /* By default, buffer mutexes and rwlocks will be excluded from diff --git a/storage/xtradb/include/sync0sync.ic b/storage/xtradb/include/sync0sync.ic index 8176ae55fd8..44891f91165 100644 --- a/storage/xtradb/include/sync0sync.ic +++ b/storage/xtradb/include/sync0sync.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -77,13 +77,13 @@ Performs an atomic test-and-set instruction to the lock_word field of a mutex. @return the previous value of lock_word: 0 or 1 */ UNIV_INLINE -byte +lock_word_t ib_mutex_test_and_set( -/*===============*/ +/*==================*/ ib_mutex_t* mutex) /*!< in: mutex */ { #if defined(HAVE_ATOMIC_BUILTINS) - return(os_atomic_test_and_set_byte_acquire(&mutex->lock_word, 1)); + return(os_atomic_test_and_set(&mutex->lock_word)); #else ibool ret; @@ -98,7 +98,7 @@ ib_mutex_test_and_set( } return((byte) ret); -#endif +#endif /* HAVE_ATOMIC_BUILTINS */ } /******************************************************************//** @@ -111,15 +111,12 @@ mutex_reset_lock_word( ib_mutex_t* mutex) /*!< in: mutex */ { #if defined(HAVE_ATOMIC_BUILTINS) - /* In theory __sync_lock_release should be used to release the lock. - Unfortunately, it does not work properly alone. The workaround is - that more conservative __sync_lock_test_and_set is used instead. */ - os_atomic_test_and_set_byte_release(&mutex->lock_word, 0); + os_atomic_clear(&mutex->lock_word); #else mutex->lock_word = 0; - os_fast_mutex_unlock_full_barrier(&(mutex->os_fast_mutex)); -#endif + os_fast_mutex_unlock(&(mutex->os_fast_mutex)); +#endif /* HAVE_ATOMIC_BUILTINS */ } /******************************************************************//** diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 1c52bd0925b..ac0039309e5 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -47,7 +47,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_BUGFIX 26 #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 74.0 +#define PERCONA_INNODB_VERSION 76.0 #endif /* Enable UNIV_LOG_ARCHIVE in XtraDB */ diff --git a/storage/xtradb/include/ut0ut.h b/storage/xtradb/include/ut0ut.h index 0caf379d8fa..9228c25d8be 100644 --- a/storage/xtradb/include/ut0ut.h +++ b/storage/xtradb/include/ut0ut.h @@ -43,6 +43,8 @@ Created 1/20/1994 Heikki Tuuri #include <stdarg.h> /* for va_list */ +#include <string> + /** Index name prefix in fast index creation */ #define TEMP_INDEX_PREFIX '\377' /** Index name prefix in fast index creation, as a string constant */ @@ -390,7 +392,19 @@ ut_print_namel( FALSE=print other identifier */ const char* name, /*!< in: name to print */ ulint namelen);/*!< in: length of name */ - +/**********************************************************************//** +Outputs a fixed-length string, quoted as an SQL identifier. +If the string contains a slash '/', the string will be +output as two identifiers separated by a period (.), +as in SQL database_name.identifier. */ +UNIV_INTERN +std::string +ut_get_name( +/*=========*/ + const trx_t* trx, /*!< in: transaction (NULL=no quotes) */ + ibool table_id,/*!< in: TRUE=print a table name, + FALSE=print other identifier */ + const char* name); /*!< in: name to print */ /**********************************************************************//** Formats a table or index name, quoted as an SQL identifier. If the name contains a slash '/', the result will contain two identifiers separated by diff --git a/storage/xtradb/lock/lock0lock.cc b/storage/xtradb/lock/lock0lock.cc index a41f60c24cd..f73c6b504dd 100644 --- a/storage/xtradb/lock/lock0lock.cc +++ b/storage/xtradb/lock/lock0lock.cc @@ -3020,8 +3020,8 @@ lock_rec_inherit_to_gap( /* If srv_locks_unsafe_for_binlog is TRUE or session is using READ COMMITTED isolation level, we do not want locks set by an UPDATE or a DELETE to be inherited as gap type locks. But we - DO want S-locks set by a consistency constraint to be inherited also - then. */ + DO want S-locks/X-locks(taken for replace) set by a consistency + constraint to be inherited also then */ for (lock = lock_rec_get_first(block, heap_no); lock != NULL; diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc index 73551fd72f2..327bbc2f93b 100644 --- a/storage/xtradb/log/log0log.cc +++ b/storage/xtradb/log/log0log.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -256,7 +256,7 @@ log_buffer_extend( { ulint move_start; ulint move_end; - byte* tmp_buf = static_cast<byte *>(alloca(OS_FILE_LOG_BLOCK_SIZE)); + byte* tmp_buf = reinterpret_cast<byte *>(alloca(OS_FILE_LOG_BLOCK_SIZE)); mutex_enter(&(log_sys->mutex)); @@ -3728,12 +3728,7 @@ loop: lsn = log_sys->lsn; - ut_ad(srv_force_recovery != SRV_FORCE_NO_LOG_REDO - || lsn == log_sys->last_checkpoint_lsn + LOG_BLOCK_HDR_SIZE); - - - if ((srv_force_recovery != SRV_FORCE_NO_LOG_REDO - && lsn != log_sys->last_checkpoint_lsn) + if (lsn != log_sys->last_checkpoint_lsn || (srv_track_changed_pages && (tracked_lsn != log_sys->last_checkpoint_lsn)) #ifdef UNIV_LOG_ARCHIVE diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc index 0b9b9aa3205..75c2a49fa01 100644 --- a/storage/xtradb/log/log0online.cc +++ b/storage/xtradb/log/log0online.cc @@ -538,7 +538,7 @@ log_online_start_bitmap_file(void) innodb_file_bmp_key, log_bmp_sys->out.name, OS_FILE_CREATE, - OS_FILE_READ_WRITE, + OS_FILE_READ_WRITE_CACHED, &success); } if (UNIV_UNLIKELY(!success)) { @@ -699,7 +699,7 @@ log_online_read_init(void) log_bmp_sys->out.file = os_file_create_simple_no_error_handling (innodb_file_bmp_key, log_bmp_sys->out.name, OS_FILE_OPEN, - OS_FILE_READ_WRITE, &success); + OS_FILE_READ_WRITE_CACHED, &success); if (!success) { @@ -1482,10 +1482,20 @@ log_online_open_bitmap_file_read_only( file */ { ibool success = FALSE; + size_t srv_data_home_len; ut_ad(name[0] != '\0'); - ut_snprintf(bitmap_file->name, FN_REFLEN, "%s%s", srv_data_home, name); + srv_data_home_len = strlen(srv_data_home); + if (srv_data_home_len + && srv_data_home[srv_data_home_len-1] + != SRV_PATH_SEPARATOR) { + ut_snprintf(bitmap_file->name, FN_REFLEN, "%s%c%s", + srv_data_home, SRV_PATH_SEPARATOR, name); + } else { + ut_snprintf(bitmap_file->name, FN_REFLEN, "%s%s", + srv_data_home, name); + } bitmap_file->file = os_file_create_simple_no_error_handling(innodb_file_bmp_key, bitmap_file->name, diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc index c7482e93c25..7d12c2fb678 100644 --- a/storage/xtradb/log/log0recv.cc +++ b/storage/xtradb/log/log0recv.cc @@ -1933,7 +1933,7 @@ loop: goto loop; } - ut_ad((!allow_ibuf) == mutex_own(&log_sys->mutex)); + ut_ad((allow_ibuf == 0) == (mutex_own(&log_sys->mutex) != 0)); if (!allow_ibuf) { recv_no_ibuf_operations = TRUE; @@ -3097,7 +3097,7 @@ recv_recovery_from_checkpoint_start_func( #endif /* UNIV_LOG_ARCHIVE */ byte* buf; byte* log_hdr_buf; - byte* log_hdr_buf_base = static_cast<byte *> + byte* log_hdr_buf_base = reinterpret_cast<byte *> (alloca(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE)); dberr_t err; ut_when_dtor<recv_dblwr_t> tmp(recv_sys->dblwr); diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc index d5bd6b429ea..757e541962e 100644 --- a/storage/xtradb/os/os0file.cc +++ b/storage/xtradb/os/os0file.cc @@ -1413,6 +1413,31 @@ os_file_create_simple_func( return(file); } +/** Disable OS I/O caching on the file if the file type and server +configuration requires it. +@param file handle to the file +@param name name of the file, for diagnostics +@param mode_str operation on the file, for diagnostics +@param type OS_LOG_FILE or OS_DATA_FILE +@param access_type if OS_FILE_READ_WRITE_CACHED, then caching will be disabled +unconditionally, ignored otherwise */ +static +void +os_file_set_nocache_if_needed(os_file_t file, const char* name, + const char *mode_str, ulint type, + ulint access_type) +{ + if (srv_read_only_mode || access_type == OS_FILE_READ_WRITE_CACHED) + return; + + if (srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT + || (type != OS_LOG_FILE + && (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT + || (srv_unix_file_flush_method + == SRV_UNIX_O_DIRECT_NO_FSYNC)))) + os_file_set_nocache(file, name, mode_str); +} + /****************************************************************//** NOTE! Use the corresponding macro os_file_create_simple_no_error_handling(), not directly this function! @@ -1427,9 +1452,11 @@ os_file_create_simple_no_error_handling_func( null-terminated string */ ulint create_mode,/*!< in: create mode */ ulint access_type,/*!< in: OS_FILE_READ_ONLY, - OS_FILE_READ_WRITE, or - OS_FILE_READ_ALLOW_DELETE; the last option is - used by a backup program reading the file */ + OS_FILE_READ_WRITE, + OS_FILE_READ_ALLOW_DELETE (used by a backup + program reading the file), or + OS_FILE_READ_WRITE_CACHED (disable O_DIRECT + if it would be enabled otherwise) */ ibool* success)/*!< out: TRUE if succeed, FALSE if error */ { os_file_t file; @@ -1465,7 +1492,8 @@ os_file_create_simple_no_error_handling_func( access = GENERIC_READ; } else if (srv_read_only_mode) { access = GENERIC_READ; - } else if (access_type == OS_FILE_READ_WRITE) { + } else if (access_type == OS_FILE_READ_WRITE + || access_type == OS_FILE_READ_WRITE_CACHED) { access = GENERIC_READ | GENERIC_WRITE; } else if (access_type == OS_FILE_READ_ALLOW_DELETE) { @@ -1520,7 +1548,8 @@ os_file_create_simple_no_error_handling_func( } else { ut_a(access_type == OS_FILE_READ_WRITE - || access_type == OS_FILE_READ_ALLOW_DELETE); + || access_type == OS_FILE_READ_ALLOW_DELETE + || access_type == OS_FILE_READ_WRITE_CACHED); create_flag = O_RDWR; } @@ -1552,18 +1581,16 @@ os_file_create_simple_no_error_handling_func( /* This function is always called for data files, we should disable OS caching (O_DIRECT) here as we do in os_file_create_func(), so we open the same file in the same mode, see man page of open(2). */ - if (!srv_read_only_mode - && *success - && (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT - || srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)) { - - os_file_set_nocache(file, name, mode_str); + if (*success) { + os_file_set_nocache_if_needed(file, name, mode_str, + OS_DATA_FILE, access_type); } #ifdef USE_FILE_LOCK if (!srv_read_only_mode && *success - && access_type == OS_FILE_READ_WRITE + && (access_type == OS_FILE_READ_WRITE + || access_type == OS_FILE_READ_WRITE_CACHED) && os_file_lock(file, name)) { *success = FALSE; @@ -1584,7 +1611,7 @@ UNIV_INTERN void os_file_set_nocache( /*================*/ - int fd /*!< in: file descriptor to alter */ + os_file_t fd /*!< in: file descriptor to alter */ __attribute__((unused)), const char* file_name /*!< in: used in the diagnostic message */ @@ -1948,17 +1975,9 @@ os_file_create_func( } while (retry); - if (!srv_read_only_mode - && *success - && type != OS_LOG_FILE - && (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT - || srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)) { + if (*success) { - os_file_set_nocache(file, name, mode_str); - } else if (!srv_read_only_mode - && *success - && srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT) { - os_file_set_nocache(file, name, mode_str); + os_file_set_nocache_if_needed(file, name, mode_str, type, 0); } #ifdef USE_FILE_LOCK @@ -5046,53 +5065,25 @@ os_aio_windows_handle( } if (retry) { - /* retry failed read/write operation synchronously. - No need to hold array->mutex. */ - -#ifdef UNIV_PFS_IO - /* This read/write does not go through os_file_read - and os_file_write APIs, need to register with - performance schema explicitly here. */ - struct PSI_file_locker* locker = NULL; - register_pfs_file_io_begin(locker, slot->file, slot->len, - (slot->type == OS_FILE_WRITE) - ? PSI_FILE_WRITE - : PSI_FILE_READ, - __FILE__, __LINE__); -#endif + LARGE_INTEGER li; + li.LowPart = slot->control.Offset; + li.HighPart = slot->control.OffsetHigh; ut_a((slot->len & 0xFFFFFFFFUL) == slot->len); switch (slot->type) { case OS_FILE_WRITE: ret_val = os_file_write(slot->name, slot->file, slot->buf, - slot->control.Offset, slot->control.OffsetHigh, slot->len); + li.QuadPart, slot->len); break; case OS_FILE_READ: ret_val = os_file_read(slot->file, slot->buf, - slot->control.Offset, slot->control.OffsetHigh, slot->len); + li.QuadPart, slot->len); break; default: ut_error; } -#ifdef UNIV_PFS_IO - register_pfs_file_io_end(locker, len); -#endif - - if (!ret && GetLastError() == ERROR_IO_PENDING) { - /* aio was queued successfully! - We want a synchronous i/o operation on a - file where we also use async i/o: in Windows - we must use the same wait mechanism as for - async i/o */ - - ret = GetOverlappedResult(slot->file, - &(slot->control), - &len, TRUE); - } - - ret_val = ret && len == slot->len; } os_aio_array_free_slot((os_aio_array_t *)slot->arr, slot); diff --git a/storage/xtradb/os/os0proc.cc b/storage/xtradb/os/os0proc.cc index ec629430baf..ff6d65e4ae6 100644 --- a/storage/xtradb/os/os0proc.cc +++ b/storage/xtradb/os/os0proc.cc @@ -32,12 +32,6 @@ Created 9/30/1995 Heikki Tuuri #include "ut0mem.h" #include "ut0byte.h" -/* Linux release version */ -#if defined(UNIV_LINUX) && defined(_GNU_SOURCE) -#include <string.h> /* strverscmp() */ -#include <sys/utsname.h> /* uname() */ -#endif - /* FreeBSD for example has only MAP_ANON, Linux has MAP_ANONYMOUS and MAP_ANON but MAP_ANON is marked as deprecated */ #if defined(MAP_ANONYMOUS) @@ -46,13 +40,6 @@ MAP_ANON but MAP_ANON is marked as deprecated */ #define OS_MAP_ANON MAP_ANON #endif -/* Linux's MAP_POPULATE */ -#if defined(MAP_POPULATE) -#define OS_MAP_POPULATE MAP_POPULATE -#else -#define OS_MAP_POPULATE 0 -#endif - UNIV_INTERN ibool os_use_large_pages; /* Large page size. This may be a boot-time option on some platforms */ UNIV_INTERN ulint os_large_page_size; @@ -76,32 +63,13 @@ os_proc_get_number(void) } /****************************************************************//** -Retrieve and compare operating system release. -@return TRUE if the OS release is equal to, or later than release. */ -UNIV_INTERN -ibool -os_compare_release( -/*===============*/ - const char* release /*!< in: OS release */ - __attribute__((unused))) -{ -#if defined(UNIV_LINUX) && defined(_GNU_SOURCE) - struct utsname name; - return uname(&name) == 0 && strverscmp(name.release, release) >= 0; -#else - return 0; -#endif -} - -/****************************************************************//** Allocates large pages memory. @return allocated memory */ UNIV_INTERN void* os_mem_alloc_large( /*===============*/ - ulint* n, /*!< in/out: number of bytes */ - ibool populate) /*!< in: virtual page preallocation */ + ulint* n) /*!< in/out: number of bytes */ { void* ptr; ulint size; @@ -187,13 +155,12 @@ skip: ut_ad(ut_is_2pow(size)); size = *n = ut_2pow_round(*n + (size - 1), size); ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | OS_MAP_ANON | - (populate ? OS_MAP_POPULATE : 0), -1, 0); + MAP_PRIVATE | OS_MAP_ANON, -1, 0); if (UNIV_UNLIKELY(ptr == (void*) -1)) { fprintf(stderr, "InnoDB: mmap(%lu bytes) failed;" " errno %lu\n", (ulong) size, (ulong) errno); - return(NULL); + ptr = NULL; } else { os_fast_mutex_lock(&ut_list_mutex); ut_total_allocated_memory += size; @@ -201,25 +168,6 @@ skip: UNIV_MEM_ALLOC(ptr, size); } #endif - -#if OS_MAP_ANON && OS_MAP_POPULATE - /* MAP_POPULATE is only supported for private mappings - since Linux 2.6.23. */ - populate = populate && !os_compare_release("2.6.23"); - - if (populate) { - fprintf(stderr, "InnoDB: Warning: mmap(MAP_POPULATE) " - "is not supported for private mappings. " - "Forcing preallocation by faulting in pages.\n"); - } -#endif - - /* Initialize the entire buffer to force the allocation - of physical memory page frames. */ - if (populate) { - memset(ptr, '\0', size); - } - return(ptr); } diff --git a/storage/xtradb/row/row0ins.cc b/storage/xtradb/row/row0ins.cc index 934a91edc5c..823b169e6b3 100644 --- a/storage/xtradb/row/row0ins.cc +++ b/storage/xtradb/row/row0ins.cc @@ -261,7 +261,13 @@ row_ins_sec_index_entry_by_modify( update = row_upd_build_sec_rec_difference_binary( rec, cursor->index, *offsets, entry, heap); - if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets))) { + /* If operating in fake_change mode then flow will not mark the record + deleted but will still assume it and take delete-mark path. Condition + below has a different path if record is not marked deleted but we need + to still by-pass it given that original flow has taken this path for + fake_change mode execution assuming record is delete-marked. */ + if (!rec_get_deleted_flag(rec, rec_offs_comp(*offsets)) + && UNIV_UNLIKELY(!thr_get_trx(thr)->fake_changes)) { /* We should never insert in place of a record that has not been delete-marked. The only exception is when online CREATE INDEX copied the changes that we already @@ -730,10 +736,12 @@ row_ins_set_detailed( rewind(srv_misc_tmpfile); if (os_file_set_eof(srv_misc_tmpfile)) { + std::string fk_str; ut_print_name(srv_misc_tmpfile, trx, TRUE, foreign->foreign_table_name); - dict_print_info_on_foreign_key_in_create_format( - srv_misc_tmpfile, trx, foreign, FALSE); + fk_str = dict_print_info_on_foreign_key_in_create_format( + trx, foreign, FALSE); + fputs(fk_str.c_str(), srv_misc_tmpfile); trx_set_detailed_error_from_file(trx, srv_misc_tmpfile); } else { trx_set_detailed_error(trx, "temp file operation failed"); @@ -798,6 +806,8 @@ row_ins_foreign_report_err( const dtuple_t* entry) /*!< in: index entry in the parent table */ { + std::string fk_str; + if (srv_read_only_mode) { return; } @@ -812,8 +822,9 @@ row_ins_foreign_report_err( fputs("Foreign key constraint fails for table ", ef); ut_print_name(ef, trx, TRUE, foreign->foreign_table_name); fputs(":\n", ef); - dict_print_info_on_foreign_key_in_create_format(ef, trx, foreign, + fk_str = dict_print_info_on_foreign_key_in_create_format(trx, foreign, TRUE); + fputs(fk_str.c_str(), ef); putc('\n', ef); fputs(errstr, ef); fputs(" in parent table, in index ", ef); @@ -853,6 +864,8 @@ row_ins_foreign_report_add_err( const dtuple_t* entry) /*!< in: index entry to insert in the child table */ { + std::string fk_str; + if (srv_read_only_mode) { return; } @@ -866,8 +879,9 @@ row_ins_foreign_report_add_err( fputs("Foreign key constraint fails for table ", ef); ut_print_name(ef, trx, TRUE, foreign->foreign_table_name); fputs(":\n", ef); - dict_print_info_on_foreign_key_in_create_format(ef, trx, foreign, + fk_str = dict_print_info_on_foreign_key_in_create_format(trx, foreign, TRUE); + fputs(fk_str.c_str(), ef); fputs("\nTrying to add in child table, in index ", ef); ut_print_name(ef, trx, FALSE, foreign->foreign_index->name); if (entry) { @@ -1509,6 +1523,7 @@ run_again: if (!srv_read_only_mode && check_ref) { FILE* ef = dict_foreign_err_file; + std::string fk_str; row_ins_set_detailed(trx, foreign); @@ -1518,8 +1533,9 @@ run_again: ut_print_name(ef, trx, TRUE, foreign->foreign_table_name); fputs(":\n", ef); - dict_print_info_on_foreign_key_in_create_format( - ef, trx, foreign, TRUE); + fk_str = dict_print_info_on_foreign_key_in_create_format( + trx, foreign, TRUE); + fputs(fk_str.c_str(), ef); fputs("\nTrying to add to index ", ef); ut_print_name(ef, trx, FALSE, foreign->foreign_index->name); @@ -2509,9 +2525,14 @@ err_exit: effectively "roll back" the operation. */ ut_a(err == DB_SUCCESS); dtuple_big_rec_free(big_rec); + } else if (big_rec != NULL + && UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) { + dtuple_big_rec_free(big_rec); } - if (err == DB_SUCCESS && dict_index_is_online_ddl(index)) { + if (err == DB_SUCCESS + && dict_index_is_online_ddl(index) + && UNIV_LIKELY(!thr_get_trx(thr)->fake_changes)) { row_log_table_insert(rec, index, offsets); } @@ -2555,8 +2576,8 @@ err_exit: if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) { - /* skip store extern */ - mem_heap_free(big_rec->heap); + dtuple_convert_back_big_rec( + index, entry, big_rec); goto func_exit; } @@ -2576,7 +2597,8 @@ err_exit: dtuple_convert_back_big_rec(index, entry, big_rec); } else { if (err == DB_SUCCESS - && dict_index_is_online_ddl(index)) { + && dict_index_is_online_ddl(index) + && !UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) { row_log_table_insert( insert_rec, index, offsets); } diff --git a/storage/xtradb/row/row0log.cc b/storage/xtradb/row/row0log.cc index 84992ed08cc..7c7ae7be2e0 100644 --- a/storage/xtradb/row/row0log.cc +++ b/storage/xtradb/row/row0log.cc @@ -208,8 +208,7 @@ row_log_block_allocate( DBUG_ENTER("row_log_block_allocate"); if (log_buf.block == NULL) { log_buf.size = srv_sort_buf_size; - log_buf.block = (byte*) os_mem_alloc_large(&log_buf.size, - FALSE); + log_buf.block = (byte*) os_mem_alloc_large(&log_buf.size); DBUG_EXECUTE_IF("simulate_row_log_allocation_failure", if (log_buf.block) os_mem_free_large(log_buf.block, log_buf.size); diff --git a/storage/xtradb/row/row0merge.cc b/storage/xtradb/row/row0merge.cc index 3447d15ed1c..1c6cbdbd427 100644 --- a/storage/xtradb/row/row0merge.cc +++ b/storage/xtradb/row/row0merge.cc @@ -2312,7 +2312,6 @@ row_merge_sort( { const ulint half = file->offset / 2; ulint num_runs; - ulint cur_run = 0; ulint* run_offset; dberr_t error = DB_SUCCESS; DBUG_ENTER("row_merge_sort"); @@ -2336,18 +2335,23 @@ row_merge_sort( of file marker). Thus, it must be at least one block. */ ut_ad(file->offset > 0); - thd_progress_init(trx->mysql_thd, num_runs); + /* Progress report only for "normal" indexes. */ + if (!(dup->index->type & DICT_FTS)) { + thd_progress_init(trx->mysql_thd, 1); + } /* Merge the runs until we have one big run */ do { - cur_run++; - error = row_merge(trx, dup, file, block, tmpfd, &num_runs, run_offset); /* Report progress of merge sort to MySQL for - show processlist progress field */ - thd_progress_report(trx->mysql_thd, cur_run, num_runs); + show processlist progress field only for + "normal" indexes. */ + if (!(dup->index->type & DICT_FTS)) { + thd_progress_report(trx->mysql_thd, file->offset - num_runs, file->offset); + + } if (error != DB_SUCCESS) { break; @@ -2358,7 +2362,9 @@ row_merge_sort( mem_free(run_offset); - thd_progress_end(trx->mysql_thd); + if (!(dup->index->type & DICT_FTS)) { + thd_progress_end(trx->mysql_thd); + } DBUG_RETURN(error); } @@ -3120,7 +3126,7 @@ row_merge_file_create( if (merge_file->fd >= 0) { if (srv_disable_sort_file_cache) { - os_file_set_nocache(merge_file->fd, + os_file_set_nocache(OS_FILE_FROM_FD(merge_file->fd), "row0merge.cc", "sort"); } } @@ -3614,7 +3620,7 @@ row_merge_build_indexes( block_size = 3 * srv_sort_buf_size; block = static_cast<row_merge_block_t*>( - os_mem_alloc_large(&block_size, FALSE)); + os_mem_alloc_large(&block_size)); if (block == NULL) { DBUG_RETURN(DB_OUT_OF_MEMORY); diff --git a/storage/xtradb/row/row0mysql.cc b/storage/xtradb/row/row0mysql.cc index 17358570ae0..6bb5a61b635 100644 --- a/storage/xtradb/row/row0mysql.cc +++ b/storage/xtradb/row/row0mysql.cc @@ -1389,7 +1389,8 @@ error_exit: return(err); } - if (dict_table_has_fts_index(table)) { + if (dict_table_has_fts_index(table) + && UNIV_LIKELY(!thr_get_trx(thr)->fake_changes)) { doc_id_t doc_id; /* Extract the doc id from the hidden FTS column */ @@ -1869,6 +1870,12 @@ run_again: columns would not affect statistics. */ if (node->is_delete || !(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { row_update_statistics_if_needed(prebuilt->table); + } else { + /* Update the table modification counter even when + non-indexed columns change if statistics is initialized. */ + if (prebuilt->table->stat_initialized) { + prebuilt->table->stat_modified_counter++; + } } trx->op_info = ""; diff --git a/storage/xtradb/row/row0upd.cc b/storage/xtradb/row/row0upd.cc index 30b7dc17d9c..16795aed0db 100644 --- a/storage/xtradb/row/row0upd.cc +++ b/storage/xtradb/row/row0upd.cc @@ -2624,6 +2624,7 @@ row_upd_clust_step( ut_a(pcur->rel_pos == BTR_PCUR_ON); ulint mode; + ulint search_mode; #ifdef UNIV_DEBUG /* Work around Bug#14626800 ASSERTION FAILURE IN DEBUG_SYNC(). @@ -2635,17 +2636,29 @@ row_upd_clust_step( } #endif /* UNIV_DEBUG */ + /* If running with fake_changes mode on then switch from modify to + search so that code takes only s-latch and not x-latch. + For dry-run (fake-changes) s-latch is acceptable. Taking x-latch will + make it more restrictive and will block real changes/workflow. */ if (UNIV_UNLIKELY(thr_get_trx(thr)->fake_changes)) { - mode = BTR_SEARCH_LEAF; - } else if (dict_index_is_online_ddl(index)) { - ut_ad(node->table->id != DICT_INDEXES_ID); - mode = BTR_MODIFY_LEAF | BTR_ALREADY_S_LATCHED; - mtr_s_lock(dict_index_get_lock(index), &mtr); + mode = BTR_MODIFY_LEAF; + search_mode = BTR_SEARCH_LEAF; } else { mode = BTR_MODIFY_LEAF; + search_mode = BTR_MODIFY_LEAF; + } + + if (dict_index_is_online_ddl(index)) { + + ut_ad(node->table->id != DICT_INDEXES_ID); + + mode |= BTR_ALREADY_S_LATCHED; + search_mode |= BTR_ALREADY_S_LATCHED; + + mtr_s_lock(dict_index_get_lock(index), &mtr); } - success = btr_pcur_restore_position(mode, pcur, &mtr); + success = btr_pcur_restore_position(search_mode, pcur, &mtr); if (!success) { err = DB_RECORD_NOT_FOUND; @@ -2663,6 +2676,10 @@ row_upd_clust_step( ut_ad(!dict_index_is_online_ddl(index)); + /* Action in fake change mode shouldn't cause changes + in system tables. */ + ut_ad(UNIV_LIKELY(!thr_get_trx(thr)->fake_changes)); + dict_drop_index_tree(btr_pcur_get_rec(pcur), &mtr); mtr_commit(&mtr); @@ -2694,6 +2711,8 @@ row_upd_clust_step( } } + /* This check passes as the function manipulates x-lock to s-lock + if operating in fake-change mode. */ ut_ad(lock_trx_has_rec_x_lock(thr_get_trx(thr), index->table, btr_pcur_get_block(pcur), page_rec_get_heap_no(rec))); diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc index 769942ce245..8b6b79f7e63 100644 --- a/storage/xtradb/srv/srv0srv.cc +++ b/storage/xtradb/srv/srv0srv.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. Copyright (c) 2013, 2015, MariaDB Corporation. @@ -150,6 +150,9 @@ UNIV_INTERN ulint srv_file_format = 0; UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to set it to the highest format we support. */ UNIV_INTERN ulint srv_max_file_format_at_startup = UNIV_FORMAT_MAX; +/** Set if InnoDB operates in read-only mode or innodb-force-recovery +is greater than SRV_FORCE_NO_TRX_UNDO. */ +UNIV_INTERN my_bool high_level_read_only; #if UNIV_FORMAT_A # error "UNIV_FORMAT_A must be 0!" @@ -168,6 +171,7 @@ OS (provided we compiled Innobase with it in), otherwise we will use simulated aio we build below with threads. Currently we support native aio on windows and linux */ UNIV_INTERN my_bool srv_use_native_aio = TRUE; +UNIV_INTERN my_bool srv_numa_interleave = FALSE; UNIV_INTERN my_bool srv_lock_timeout_active = FALSE; @@ -258,8 +262,6 @@ UNIV_INTERN const byte* srv_latin1_ordering; UNIV_INTERN my_bool srv_use_sys_malloc = TRUE; /* requested size in kilobytes */ UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX; -/* force virtual page preallocation (prefault) */ -UNIV_INTERN my_bool srv_buf_pool_populate = FALSE; /* requested number of buffer pool instances */ UNIV_INTERN ulint srv_buf_pool_instances = 1; /* number of locks to protect buf_pool->page_hash */ @@ -272,6 +274,8 @@ UNIV_INTERN ulong srv_flush_neighbors = 1; UNIV_INTERN ulint srv_buf_pool_old_size; /* current size in kilobytes */ UNIV_INTERN ulint srv_buf_pool_curr_size = 0; +/* dump that may % of each buffer pool during BP dump */ +UNIV_INTERN ulong srv_buf_pool_dump_pct; /* size in bytes */ UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX; UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX; @@ -3313,13 +3317,8 @@ srv_do_purge( } n_pages_purged = trx_purge( - n_use_threads, srv_purge_batch_size, false); - - if (!(count++ % TRX_SYS_N_RSEGS)) { - /* Force a truncate of the history list. */ - n_pages_purged += trx_purge( - 1, srv_purge_batch_size, true); - } + n_use_threads, srv_purge_batch_size, + (++count % TRX_SYS_N_RSEGS) == 0); *n_total_purged += n_pages_purged; @@ -3519,8 +3518,17 @@ DECLARE_THREAD(srv_purge_coordinator_thread)( n_pages_purged = trx_purge(1, srv_purge_batch_size, false); } - /* Force a truncate of the history list. */ - n_pages_purged = trx_purge(1, srv_purge_batch_size, true); + /* This trx_purge is called to remove any undo records (added by + background threads) after completion of the above loop. When + srv_fast_shutdown != 0, a large batch size can cause significant + delay in shutdown ,so reducing the batch size to magic number 20 + (which was default in 5.5), which we hope will be sufficient to + remove all the undo records */ + const uint temp_batch_size = 20; + + n_pages_purged = trx_purge(1, srv_purge_batch_size <= temp_batch_size + ? srv_purge_batch_size : temp_batch_size, + true); ut_a(n_pages_purged == 0 || srv_fast_shutdown != 0); /* The task queue should always be empty, independent of fast diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc index de51ff18ef3..c2994c57ae7 100644 --- a/storage/xtradb/srv/srv0start.cc +++ b/storage/xtradb/srv/srv0start.cc @@ -1617,9 +1617,8 @@ innobase_start_or_create_for_mysql(void) size_t dirnamelen; bool sys_datafiles_created = false; - if (srv_force_recovery > SRV_FORCE_NO_TRX_UNDO) { - srv_read_only_mode = true; - } + high_level_read_only = srv_read_only_mode + || srv_force_recovery > SRV_FORCE_NO_TRX_UNDO; if (srv_read_only_mode) { ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode"); @@ -2071,8 +2070,7 @@ innobase_start_or_create_for_mysql(void) ib_logf(IB_LOG_LEVEL_INFO, "Initializing buffer pool, size = %.1f%c", size, unit); - err = buf_pool_init(srv_buf_pool_size, (ibool) srv_buf_pool_populate, - srv_buf_pool_instances); + err = buf_pool_init(srv_buf_pool_size, srv_buf_pool_instances); if (err != DB_SUCCESS) { ib_logf(IB_LOG_LEVEL_ERROR, diff --git a/storage/xtradb/trx/trx0sys.cc b/storage/xtradb/trx/trx0sys.cc index e26d23e5892..6f524718052 100644 --- a/storage/xtradb/trx/trx0sys.cc +++ b/storage/xtradb/trx/trx0sys.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -29,7 +29,10 @@ Created 3/26/1996 Heikki Tuuri #include "trx0sys.ic" #endif -#ifndef UNIV_HOTBACKUP +#ifdef UNIV_HOTBACKUP +#include "fsp0types.h" + +#else /* !UNIV_HOTBACKUP */ #include "fsp0fsp.h" #include "mtr0log.h" #include "mtr0log.h" @@ -1258,18 +1261,15 @@ trx_sys_read_pertable_file_format_id( /* get the file format from the page */ ptr = page + 54; flags = mach_read_from_4(ptr); - if (flags == 0) { - /* file format is Antelope */ - *format_id = 0; - return(TRUE); - } else if (flags & 1) { - /* tablespace flags are ok */ - *format_id = (flags / 32) % 128; - return(TRUE); - } else { + + if (!fsp_flags_is_valid(flags) { /* bad tablespace flags */ return(FALSE); } + + *format_id = FSP_FLAGS_GET_POST_ANTELOPE(flags); + + return(TRUE); } diff --git a/storage/xtradb/trx/trx0trx.cc b/storage/xtradb/trx/trx0trx.cc index b69e8aec30b..85b85b772e7 100644 --- a/storage/xtradb/trx/trx0trx.cc +++ b/storage/xtradb/trx/trx0trx.cc @@ -1072,6 +1072,12 @@ trx_start_low( trx->id = trx_sys_get_new_trx_id(); + /* Cache the state of fake_changes that transaction will use for + lifetime. Any change in session/global fake_changes configuration during + lifetime of transaction will not be honored by already started + transaction. */ + trx->fake_changes = thd_fake_changes(trx->mysql_thd); + ut_ad(!trx->in_rw_trx_list); ut_ad(!trx->in_ro_trx_list); diff --git a/storage/xtradb/ut/ut0ut.cc b/storage/xtradb/ut/ut0ut.cc index 121cbdb7bc0..3de3d6e5ee6 100644 --- a/storage/xtradb/ut/ut0ut.cc +++ b/storage/xtradb/ut/ut0ut.cc @@ -43,6 +43,7 @@ Created 5/11/1994 Heikki Tuuri # include "trx0trx.h" # include "ha_prototypes.h" # include "mysql_com.h" /* NAME_LEN */ +# include <string> #endif /* UNIV_HOTBACKUP */ /** A constant to prevent the compiler from optimizing ut_delay() away. */ @@ -563,6 +564,35 @@ ut_print_namel( } /**********************************************************************//** +Outputs a fixed-length string, quoted as an SQL identifier. +If the string contains a slash '/', the string will be +output as two identifiers separated by a period (.), +as in SQL database_name.identifier. */ +UNIV_INTERN +std::string +ut_get_name( +/*=========*/ + const trx_t* trx, /*!< in: transaction (NULL=no quotes) */ + ibool table_id,/*!< in: TRUE=print a table name, + FALSE=print other identifier */ + const char* name) /*!< in: name to print */ +{ + /* 2 * NAME_LEN for database and table name, + and some slack for the #mysql50# prefix and quotes */ + char buf[3 * NAME_LEN]; + const char* bufend; + ulint namelen = strlen(name); + + bufend = innobase_convert_name(buf, sizeof buf, + name, namelen, + trx ? trx->mysql_thd : NULL, + table_id); + buf[bufend-buf]='\0'; + std::string str(buf); + return str; +} + +/**********************************************************************//** Formats a table or index name, quoted as an SQL identifier. If the name contains a slash '/', the result will contain two identifiers separated by a period (.), as in SQL database_name.identifier. |