diff options
author | Nirbhay Choubey <nirbhay@mariadb.com> | 2015-10-31 18:07:02 -0400 |
---|---|---|
committer | Nirbhay Choubey <nirbhay@mariadb.com> | 2015-10-31 18:07:02 -0400 |
commit | 4d1511296288782df0e3d9373396724e250b24c1 (patch) | |
tree | 9fa47d141b71933859d6c2da585e5bb5e52db212 /storage/innobase | |
parent | 17b0b45b1de41a1b188c5de6c3e9d8e6ecc48a72 (diff) | |
parent | d775ecdd010daad4dc6147fba58acd006bf2c60c (diff) | |
download | mariadb-git-4d1511296288782df0e3d9373396724e250b24c1.tar.gz |
Merge tag 'mariadb-10.0.22' into 10.0-galera
Diffstat (limited to 'storage/innobase')
23 files changed, 541 insertions, 159 deletions
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index 8d5d626fb28..66c67c14314 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -19,6 +19,20 @@ INCLUDE(CheckFunctionExists) INCLUDE(CheckCSourceCompiles) INCLUDE(CheckCSourceRuns) +IF(CMAKE_CROSSCOMPILING) + # Use CHECK_C_SOURCE_COMPILES instead of CHECK_C_SOURCE_RUNS when + # cross-compiling. Not as precise, but usually good enough. + # This only make sense for atomic tests in this file, this trick doesn't + # work in a general case. + MACRO(CHECK_C_SOURCE SOURCE VAR) + CHECK_C_SOURCE_COMPILES("${SOURCE}" "${VAR}") + ENDMACRO() +ELSE() + MACRO(CHECK_C_SOURCE SOURCE VAR) + CHECK_C_SOURCE_RUNS("${SOURCE}" "${VAR}") + ENDMACRO() +ENDIF() + # OS tests IF(UNIX) IF(CMAKE_SYSTEM_NAME STREQUAL "Linux") @@ -29,6 +43,9 @@ IF(UNIX) ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1) LINK_LIBRARIES(aio) ENDIF() + IF(HAVE_LIBNUMA) + LINK_LIBRARIES(numa) + ENDIF() ELSEIF(CMAKE_SYSTEM_NAME MATCHES "HP*") ADD_DEFINITIONS("-DUNIV_HPUX") ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "AIX") @@ -57,15 +74,14 @@ SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DUNIV_DEBUG -DUNIV_SYNC_DEB CHECK_FUNCTION_EXISTS(sched_getcpu HAVE_SCHED_GETCPU) IF(NOT MSVC) -# either define HAVE_IB_GCC_ATOMIC_BUILTINS or not -IF(NOT CMAKE_CROSSCOMPILING) + # either define HAVE_IB_GCC_ATOMIC_BUILTINS or not # workaround for gcc 4.1.2 RHEL5/x86, gcc atomic ops only work under -march=i686 IF(CMAKE_SYSTEM_PROCESSOR STREQUAL "i686" AND CMAKE_COMPILER_IS_GNUCC AND CMAKE_C_COMPILER_VERSION VERSION_LESS "4.1.3") SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=i686") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=i686") ENDIF() - CHECK_C_SOURCE_RUNS( + CHECK_C_SOURCE( " int main() { @@ -96,7 +112,7 @@ IF(NOT CMAKE_CROSSCOMPILING) }" HAVE_IB_GCC_ATOMIC_BUILTINS ) - CHECK_C_SOURCE_RUNS( + CHECK_C_SOURCE( " int main() { @@ -112,7 +128,7 @@ IF(NOT CMAKE_CROSSCOMPILING) }" HAVE_IB_GCC_ATOMIC_BUILTINS_BYTE ) - CHECK_C_SOURCE_RUNS( + CHECK_C_SOURCE( "#include<stdint.h> int main() { @@ -132,7 +148,7 @@ IF(NOT CMAKE_CROSSCOMPILING) }" HAVE_IB_GCC_ATOMIC_BUILTINS_64 ) - CHECK_C_SOURCE_RUNS( + CHECK_C_SOURCE( "#include<stdint.h> int main() { @@ -141,7 +157,7 @@ IF(NOT CMAKE_CROSSCOMPILING) }" HAVE_IB_GCC_SYNC_SYNCHRONISE ) - CHECK_C_SOURCE_RUNS( + CHECK_C_SOURCE( "#include<stdint.h> int main() { @@ -151,7 +167,18 @@ IF(NOT CMAKE_CROSSCOMPILING) }" HAVE_IB_GCC_ATOMIC_THREAD_FENCE ) -ENDIF() + CHECK_C_SOURCE( + "#include<stdint.h> + int main() + { + unsigned char c; + + __atomic_test_and_set(&c, __ATOMIC_ACQUIRE); + __atomic_clear(&c, __ATOMIC_RELEASE); + return(0); + }" + HAVE_IB_GCC_ATOMIC_TEST_AND_SET + ) IF(HAVE_IB_GCC_ATOMIC_BUILTINS) ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_BUILTINS=1) @@ -173,28 +200,31 @@ IF(HAVE_IB_GCC_ATOMIC_THREAD_FENCE) ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_THREAD_FENCE=1) ENDIF() - # either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not -IF(NOT CMAKE_CROSSCOMPILING) - CHECK_C_SOURCE_RUNS( - " - #include <pthread.h> - #include <string.h> +IF(HAVE_IB_GCC_ATOMIC_TEST_AND_SET) + ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_TEST_AND_SET=1) +ENDIF() - int main() { - pthread_t x1; - pthread_t x2; - pthread_t x3; +# either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not +CHECK_C_SOURCE( +" +#include <pthread.h> +#include <string.h> - memset(&x1, 0x0, sizeof(x1)); - memset(&x2, 0x0, sizeof(x2)); - memset(&x3, 0x0, sizeof(x3)); +int main() { + pthread_t x1; + pthread_t x2; + pthread_t x3; - __sync_bool_compare_and_swap(&x1, x2, x3); + memset(&x1, 0x0, sizeof(x1)); + memset(&x2, 0x0, sizeof(x2)); + memset(&x3, 0x0, sizeof(x3)); + + __sync_bool_compare_and_swap(&x1, x2, x3); + + return(0); +}" +HAVE_IB_ATOMIC_PTHREAD_T_GCC) - return(0); - }" - HAVE_IB_ATOMIC_PTHREAD_T_GCC) -ENDIF() IF(HAVE_IB_ATOMIC_PTHREAD_T_GCC) ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_GCC=1) ENDIF() @@ -225,7 +255,6 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS") ADD_DEFINITIONS(-DHAVE_IB_SOLARIS_ATOMICS=1) ENDIF() - IF(NOT CMAKE_CROSSCOMPILING) # either define HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS or not CHECK_C_SOURCE_COMPILES( " #include <pthread.h> @@ -264,7 +293,7 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS") return(0); }" HAVE_IB_MACHINE_BARRIER_SOLARIS) - ENDIF() + IF(HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS) ADD_DEFINITIONS(-DHAVE_IB_ATOMIC_PTHREAD_T_SOLARIS=1) ENDIF() diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index c8dd4fae0a9..7dfcd79cfdb 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2012, Facebook Inc. @@ -2118,6 +2118,7 @@ btr_cur_optimistic_update( ulint max_size; ulint new_rec_size; ulint old_rec_size; + ulint max_ins_size = 0; dtuple_t* new_entry; roll_ptr_t roll_ptr; ulint i; @@ -2246,6 +2247,10 @@ any_extern: : (old_rec_size + page_get_max_insert_size_after_reorganize(page, 1)); + if (!page_zip) { + max_ins_size = page_get_max_insert_size_after_reorganize(page, 1); + } + if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT) && (max_size >= new_rec_size)) || (page_get_n_recs(page) <= 1))) { @@ -2305,12 +2310,15 @@ any_extern: ut_ad(err == DB_SUCCESS); func_exit: - if (page_zip - && !(flags & BTR_KEEP_IBUF_BITMAP) + if (!(flags & BTR_KEEP_IBUF_BITMAP) && !dict_index_is_clust(index) && page_is_leaf(page)) { - /* Update the free bits in the insert buffer. */ - ibuf_update_free_bits_zip(block, mtr); + + if (page_zip) { + ibuf_update_free_bits_zip(block, mtr); + } else { + ibuf_update_free_bits_low(block, max_ins_size, mtr); + } } return(err); @@ -2445,6 +2453,7 @@ btr_cur_pessimistic_update( ibool was_first; ulint n_reserved = 0; ulint n_ext; + ulint max_ins_size = 0; *offsets = NULL; *big_rec = NULL; @@ -2623,6 +2632,10 @@ make_external: } } + if (!page_zip) { + max_ins_size = page_get_max_insert_size_after_reorganize(page, 1); + } + /* Store state of explicit locks on rec on the page infimum record, before deleting rec. The page infimum acts as a dummy carrier of the locks, taking care also of lock releases, before we can move the locks @@ -2668,13 +2681,18 @@ make_external: rec_offs_make_valid( page_cursor->rec, index, *offsets); } - } else if (page_zip && - !dict_index_is_clust(index) + } else if (!dict_index_is_clust(index) && page_is_leaf(page)) { + /* Update the free bits in the insert buffer. This is the same block which was skipped by BTR_KEEP_IBUF_BITMAP. */ - ibuf_update_free_bits_zip(block, mtr); + if (page_zip) { + ibuf_update_free_bits_zip(block, mtr); + } else { + ibuf_update_free_bits_low(block, max_ins_size, + mtr); + } } err = DB_SUCCESS; diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index e5800ef30c0..6d8edfca5c0 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -53,6 +53,10 @@ Created 11/5/1995 Heikki Tuuri #include "page0zip.h" #include "srv0mon.h" #include "buf0checksum.h" +#ifdef HAVE_LIBNUMA +#include <numa.h> +#include <numaif.h> +#endif // HAVE_LIBNUMA /* IMPLEMENTATION OF THE BUFFER POOL @@ -1112,6 +1116,22 @@ buf_chunk_init( return(NULL); } +#ifdef HAVE_LIBNUMA + if (srv_numa_interleave) { + int st = mbind(chunk->mem, chunk->mem_size, + MPOL_INTERLEAVE, + numa_all_nodes_ptr->maskp, + numa_all_nodes_ptr->size, + MPOL_MF_MOVE); + if (st != 0) { + ib_logf(IB_LOG_LEVEL_WARN, + "Failed to set NUMA memory policy of buffer" + " pool page frames to MPOL_INTERLEAVE" + " (error: %s).", strerror(errno)); + } + } +#endif // HAVE_LIBNUMA + /* Allocate the block descriptors from the start of the memory block. */ chunk->blocks = (buf_block_t*) chunk->mem; @@ -1442,6 +1462,21 @@ buf_pool_init( ut_ad(n_instances <= MAX_BUFFER_POOLS); ut_ad(n_instances == srv_buf_pool_instances); +#ifdef HAVE_LIBNUMA + if (srv_numa_interleave) { + ib_logf(IB_LOG_LEVEL_INFO, + "Setting NUMA memory policy to MPOL_INTERLEAVE"); + if (set_mempolicy(MPOL_INTERLEAVE, + numa_all_nodes_ptr->maskp, + numa_all_nodes_ptr->size) != 0) { + ib_logf(IB_LOG_LEVEL_WARN, + "Failed to set NUMA memory policy to" + " MPOL_INTERLEAVE (error: %s).", + strerror(errno)); + } + } +#endif // HAVE_LIBNUMA + buf_pool_ptr = (buf_pool_t*) mem_zalloc( n_instances * sizeof *buf_pool_ptr); @@ -1462,6 +1497,18 @@ buf_pool_init( btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64); +#ifdef HAVE_LIBNUMA + if (srv_numa_interleave) { + ib_logf(IB_LOG_LEVEL_INFO, + "Setting NUMA memory policy to MPOL_DEFAULT"); + if (set_mempolicy(MPOL_DEFAULT, NULL, 0) != 0) { + ib_logf(IB_LOG_LEVEL_WARN, + "Failed to set NUMA memory policy to" + " MPOL_DEFAULT (error: %s).", strerror(errno)); + } + } +#endif // HAVE_LIBNUMA + return(DB_SUCCESS); } @@ -5252,11 +5299,19 @@ buf_print_io_instance( pool_info->pages_written_rate); if (pool_info->n_page_get_delta) { + double hit_rate = ((1000 * pool_info->page_read_delta) + / pool_info->n_page_get_delta); + + if (hit_rate > 1000) { + hit_rate = 1000; + } + + hit_rate = 1000 - hit_rate; + fprintf(file, "Buffer pool hit rate %lu / 1000," " young-making rate %lu / 1000 not %lu / 1000\n", - (ulong) (1000 - (1000 * pool_info->page_read_delta - / pool_info->n_page_get_delta)), + (ulong) hit_rate, (ulong) (1000 * pool_info->young_making_delta / pool_info->n_page_get_delta), (ulong) (1000 * pool_info->not_young_making_delta diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 9f05ddf190b..fdfbb549640 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -212,14 +212,6 @@ dict_index_remove_from_cache_low( dict_index_t* index, /*!< in, own: index */ ibool lru_evict); /*!< in: TRUE if page being evicted to make room in the table LRU list */ -/**********************************************************************//** -Removes a table object from the dictionary cache. */ -static -void -dict_table_remove_from_cache_low( -/*=============================*/ - dict_table_t* table, /*!< in, own: table */ - ibool lru_evict); /*!< in: TRUE if evicting from LRU */ #ifdef UNIV_DEBUG /**********************************************************************//** Validate the dictionary table LRU list. @@ -787,6 +779,45 @@ dict_table_get_all_fts_indexes( return(ib_vector_size(indexes)); } +/** Store autoinc value when the table is evicted. +@param[in] table table evicted */ +UNIV_INTERN +void +dict_table_autoinc_store( + const dict_table_t* table) +{ + ut_ad(mutex_own(&dict_sys->mutex)); + + if (table->autoinc != 0) { + ut_ad(dict_sys->autoinc_map->find(table->id) + == dict_sys->autoinc_map->end()); + + dict_sys->autoinc_map->insert( + std::pair<table_id_t, ib_uint64_t>( + table->id, table->autoinc)); + } +} + +/** Restore autoinc value when the table is loaded. +@param[in] table table loaded */ +UNIV_INTERN +void +dict_table_autoinc_restore( + dict_table_t* table) +{ + ut_ad(mutex_own(&dict_sys->mutex)); + + autoinc_map_t::iterator it; + it = dict_sys->autoinc_map->find(table->id); + + if (it != dict_sys->autoinc_map->end()) { + table->autoinc = it->second; + ut_ad(table->autoinc != 0); + + dict_sys->autoinc_map->erase(it); + } +} + /********************************************************************//** Reads the next autoinc value (== autoinc counter value), 0 if not yet initialized. @@ -1080,6 +1111,8 @@ dict_init(void) mutex_create(dict_foreign_err_mutex_key, &dict_foreign_err_mutex, SYNC_NO_ORDER_CHECK); } + + dict_sys->autoinc_map = new autoinc_map_t(); } /**********************************************************************//** @@ -1327,6 +1360,8 @@ dict_table_add_to_cache( UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_non_LRU, table); } + dict_table_autoinc_restore(table); + ut_ad(dict_lru_validate()); dict_sys->size += mem_heap_get_size(table->heap) @@ -2016,7 +2051,6 @@ dict_table_change_id_in_cache( /**********************************************************************//** Removes a table object from the dictionary cache. */ -static void dict_table_remove_from_cache_low( /*=============================*/ @@ -2078,6 +2112,10 @@ dict_table_remove_from_cache_low( ut_ad(dict_lru_validate()); + if (lru_evict) { + dict_table_autoinc_store(table); + } + if (lru_evict && table->drop_aborted) { /* Do as dict_table_try_drop_aborted() does. */ @@ -6842,6 +6880,8 @@ dict_close(void) mutex_free(&dict_foreign_err_mutex); } + delete dict_sys->autoinc_map; + mem_free(dict_sys); dict_sys = NULL; } diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 0b483d3745d..3df10f55893 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -4,7 +4,7 @@ Copyright (c) 2000, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. Copyright (c) 2012, Facebook Inc. -Copyright (c) 2013, 2015, MariaDB Corporation. All Rights Reserved. +Copyright (c) 2013, 2015, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -683,6 +683,12 @@ static SHOW_VAR innodb_status_variables[]= { (char*) &export_vars.innodb_os_log_pending_writes, SHOW_LONG}, {"os_log_written", (char*) &export_vars.innodb_os_log_written, SHOW_LONGLONG}, + {"os_merge_buffers_written", + (char*) &export_vars.innodb_merge_buffers_written, SHOW_LONGLONG}, + {"os_merge_buffers_read", + (char*) &export_vars.innodb_merge_buffers_read, SHOW_LONGLONG}, + {"os_merge_buffers_merged", + (char*) &export_vars.innodb_merge_buffers_merged, SHOW_LONGLONG}, {"page_size", (char*) &export_vars.innodb_page_size, SHOW_LONG}, {"pages_created", @@ -7419,7 +7425,7 @@ ha_innobase::write_row( DBUG_ENTER("ha_innobase::write_row"); - if (srv_read_only_mode) { + if (high_level_read_only) { ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); DBUG_RETURN(HA_ERR_TABLE_READONLY); } else if (prebuilt->trx != trx) { @@ -8159,7 +8165,7 @@ ha_innobase::update_row( ut_a(prebuilt->trx == trx); - if (srv_read_only_mode) { + if (high_level_read_only) { ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); DBUG_RETURN(HA_ERR_TABLE_READONLY); } else if (!trx_is_started(trx)) { @@ -8310,7 +8316,7 @@ ha_innobase::delete_row( ut_a(prebuilt->trx == trx); - if (srv_read_only_mode) { + if (high_level_read_only) { ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); DBUG_RETURN(HA_ERR_TABLE_READONLY); } else if (!trx_is_started(trx)) { @@ -11102,7 +11108,7 @@ ha_innobase::create( if (form->s->stored_fields > REC_MAX_N_USER_FIELDS) { DBUG_RETURN(HA_ERR_TOO_MANY_FIELDS); - } else if (srv_read_only_mode) { + } else if (high_level_read_only) { DBUG_RETURN(HA_ERR_TABLE_READONLY); } @@ -11432,7 +11438,7 @@ ha_innobase::discard_or_import_tablespace( ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N); ut_a(prebuilt->trx == thd_to_trx(ha_thd())); - if (srv_read_only_mode) { + if (high_level_read_only) { DBUG_RETURN(HA_ERR_TABLE_READONLY); } @@ -11526,7 +11532,7 @@ ha_innobase::truncate() DBUG_ENTER("ha_innobase::truncate"); - if (srv_read_only_mode) { + if (high_level_read_only) { DBUG_RETURN(HA_ERR_TABLE_READONLY); } @@ -11877,7 +11883,7 @@ ha_innobase::rename_table( DBUG_ENTER("ha_innobase::rename_table"); - if (srv_read_only_mode) { + if (high_level_read_only) { ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); DBUG_RETURN(HA_ERR_TABLE_READONLY); } @@ -15929,6 +15935,12 @@ innodb_internal_table_validate( } dict_table_close(user_table, FALSE, TRUE); + + DBUG_EXECUTE_IF("innodb_evict_autoinc_table", + mutex_enter(&dict_sys->mutex); + dict_table_remove_from_cache_low(user_table, TRUE); + mutex_exit(&dict_sys->mutex); + ); } return(ret); @@ -18282,6 +18294,13 @@ static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio, "Use native AIO if supported on this platform.", NULL, NULL, TRUE); +#ifdef HAVE_LIBNUMA +static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Use NUMA interleave memory policy to allocate InnoDB buffer pool.", + NULL, NULL, FALSE); +#endif // HAVE_LIBNUMA + static MYSQL_SYSVAR_BOOL(api_enable_binlog, ib_binlog_enabled, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, "Enable binlog for applications direct access InnoDB through InnoDB APIs", @@ -18603,6 +18622,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(version), MYSQL_SYSVAR(use_sys_malloc), MYSQL_SYSVAR(use_native_aio), +#ifdef HAVE_LIBNUMA + MYSQL_SYSVAR(numa_interleave), +#endif // HAVE_LIBNUMA MYSQL_SYSVAR(change_buffering), MYSQL_SYSVAR(change_buffer_max_size), #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index e7ff88bb093..be3fb48f4f1 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -244,7 +244,7 @@ ha_innobase::check_if_supported_inplace_alter( { DBUG_ENTER("check_if_supported_inplace_alter"); - if (srv_read_only_mode) { + if (high_level_read_only) { ha_alter_info->unsupported_reason = innobase_get_err_msg(ER_READ_ONLY_MODE); DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index cb49d532972..5266cf7d245 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -312,6 +312,21 @@ dict_table_autoinc_initialize( dict_table_t* table, /*!< in/out: table */ ib_uint64_t value) /*!< in: next value to assign to a row */ __attribute__((nonnull)); + +/** Store autoinc value when the table is evicted. +@param[in] table table evicted */ +UNIV_INTERN +void +dict_table_autoinc_store( + const dict_table_t* table); + +/** Restore autoinc value when the table is loaded. +@param[in] table table loaded */ +UNIV_INTERN +void +dict_table_autoinc_restore( + dict_table_t* table); + /********************************************************************//** Reads the next autoinc value (== autoinc counter value), 0 if not yet initialized. @@ -371,6 +386,15 @@ dict_table_remove_from_cache( dict_table_t* table) /*!< in, own: table */ __attribute__((nonnull)); /**********************************************************************//** +Removes a table object from the dictionary cache. */ +UNIV_INTERN +void +dict_table_remove_from_cache_low( +/*=============================*/ + dict_table_t* table, /*!< in, own: table */ + ibool lru_evict); /*!< in: TRUE if table being evicted + to make room in the table LRU list */ +/**********************************************************************//** Renames a table object. @return TRUE if success */ UNIV_INTERN @@ -1572,6 +1596,8 @@ extern dict_sys_t* dict_sys; /** the data dictionary rw-latch protecting dict_sys */ extern rw_lock_t dict_operation_lock; +typedef std::map<table_id_t, ib_uint64_t> autoinc_map_t; + /* Dictionary system struct */ struct dict_sys_t{ ib_mutex_t mutex; /*!< mutex protecting the data @@ -1606,6 +1632,8 @@ struct dict_sys_t{ UT_LIST_BASE_NODE_T(dict_table_t) table_non_LRU; /*!< List of tables that can't be evicted from the cache */ + autoinc_map_t* autoinc_map; /*!< Map to store table id and autoinc + when table is evicted */ }; #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic index 21747fdceac..a5df9f7b6b4 100644 --- a/storage/innobase/include/ibuf0ibuf.ic +++ b/storage/innobase/include/ibuf0ibuf.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -128,7 +128,8 @@ ibuf_should_try( && ibuf->max_size != 0 && !dict_index_is_clust(index) && index->table->quiesce == QUIESCE_NONE - && (ignore_sec_unique || !dict_index_is_unique(index))); + && (ignore_sec_unique || !dict_index_is_unique(index)) + && srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE); } /******************************************************************//** diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h index feb64fb1e41..8e4b4f41aae 100644 --- a/storage/innobase/include/os0sync.h +++ b/storage/innobase/include/os0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -38,6 +38,26 @@ Created 9/6/1995 Heikki Tuuri #include "ut0lst.h" #include "sync0types.h" +#if defined __i386__ || defined __x86_64__ || defined _M_IX86 \ + || defined _M_X64 || defined __WIN__ + +#define IB_STRONG_MEMORY_MODEL + +#endif /* __i386__ || __x86_64__ || _M_IX86 || _M_X64 || __WIN__ */ + +#ifdef HAVE_WINDOWS_ATOMICS +typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates + on LONG variable */ +#elif defined(HAVE_ATOMIC_BUILTINS) && !defined(HAVE_ATOMIC_BUILTINS_BYTE) +typedef ulint lock_word_t; +#else + +#define IB_LOCK_WORD_IS_BYTE + +typedef byte lock_word_t; + +#endif /* HAVE_WINDOWS_ATOMICS */ + #ifdef __WIN__ /** Native event (slow)*/ typedef HANDLE os_native_event_t; @@ -446,11 +466,61 @@ amount to decrement. */ # define os_atomic_decrement_uint64(ptr, amount) \ os_atomic_decrement(ptr, amount) -/**********************************************************//** -Returns the old value of *ptr, atomically sets *ptr to new_val */ - -# define os_atomic_test_and_set_ulint(ptr, new_val) \ - __sync_lock_test_and_set(ptr, new_val) +# if defined(HAVE_IB_GCC_ATOMIC_TEST_AND_SET) + +/** Do an atomic test-and-set. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(__atomic_test_and_set(ptr, __ATOMIC_ACQUIRE)); +} + +/** Do an atomic clear. +@param[in,out] ptr Memory location to set to zero */ +inline +void +os_atomic_clear(volatile lock_word_t* ptr) +{ + __atomic_clear(ptr, __ATOMIC_RELEASE); +} + +# elif defined(IB_STRONG_MEMORY_MODEL) + +/** Do an atomic test and set. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(__sync_lock_test_and_set(ptr, 1)); +} + +/** Do an atomic release. + +In theory __sync_lock_release should be used to release the lock. +Unfortunately, it does not work properly alone. The workaround is +that more conservative __sync_lock_test_and_set is used instead. + +Performance regression was observed at some conditions for Intel +architecture. Disable release barrier on Intel architecture for now. +@param[in,out] ptr Memory location to write to +@return the previous value */ +inline +lock_word_t +os_atomic_clear(volatile lock_word_t* ptr) +{ + return(__sync_lock_test_and_set(ptr, 0)); +} + +# else + +# error "Unsupported platform" + +# endif /* HAVE_IB_GCC_ATOMIC_TEST_AND_SET */ #if defined(__powerpc__) || defined(__aarch64__) /* @@ -548,11 +618,51 @@ amount to decrement. */ # define os_atomic_decrement_uint64(ptr, amount) \ os_atomic_increment_uint64(ptr, -(amount)) -/**********************************************************//** -Returns the old value of *ptr, atomically sets *ptr to new_val */ - -# define os_atomic_test_and_set_ulint(ptr, new_val) \ - atomic_swap_ulong(ptr, new_val) +# ifdef IB_LOCK_WORD_IS_BYTE + +/** Do an atomic xchg and set to non-zero. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(atomic_swap_uchar(ptr, 1)); +} + +/** Do an atomic xchg and set to zero. +@param[in,out] ptr Memory location to set to zero +@return the previous value */ +inline +lock_word_t +os_atomic_clear(volatile lock_word_t* ptr) +{ + return(atomic_swap_uchar(ptr, 0)); +} + +# else + +/** Do an atomic xchg and set to non-zero. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(atomic_swap_ulong(ptr, 1)); +} + +/** Do an atomic xchg and set to zero. +@param[in,out] ptr Memory location to set to zero +@return the previous value */ +inline +lock_word_t +os_atomic_clear(volatile lock_word_t* ptr) +{ + return(atomic_swap_ulong(ptr, 0)); +} + +# endif /* IB_LOCK_WORD_IS_BYTE */ # define os_atomic_test_and_set_byte_acquire(ptr, new_val) \ atomic_swap_uchar(ptr, new_val) @@ -673,18 +783,27 @@ amount to decrement. There is no atomic substract function on Windows */ (ib_int64_t*) ptr, \ -(ib_int64_t) amount) - amount)) -/**********************************************************//** -Returns the old value of *ptr, atomically sets *ptr to new_val. -InterlockedExchange() operates on LONG, and the LONG will be -clobbered */ - -# define os_atomic_test_and_set_byte_acquire(ptr, new_val) \ - ((byte) InterlockedExchange(ptr, new_val)) -# define os_atomic_test_and_set_byte_release(ptr, new_val) \ - ((byte) InterlockedExchange(ptr, new_val)) - -# define os_atomic_test_and_set_ulong(ptr, new_val) \ - InterlockedExchange(ptr, new_val) +/** Do an atomic test and set. +InterlockedExchange() operates on LONG, and the LONG will be clobbered +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(InterlockedExchange(ptr, 1)); +} + +/** Do an atomic release. +InterlockedExchange() operates on LONG, and the LONG will be clobbered +@param[in,out] ptr Memory location to set to zero +@return the previous value */ +inline +lock_word_t +os_atomic_clear(volatile lock_word_t* ptr) +{ + return(InterlockedExchange(ptr, 0)); +} # define os_atomic_lock_release_byte(ptr) \ (void) InterlockedExchange(ptr, 0) @@ -737,7 +856,7 @@ for synchronization */ } while (0); /** barrier definitions for memory ordering */ -#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__ +#ifdef IB_STRONG_MEMORY_MODEL /* Performance regression was observed at some conditions for Intel architecture. Disable memory barrier for Intel architecture for now. */ # define os_rmb do { } while(0) diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h index 2d90f47eefe..d2b65721e1a 100644 --- a/storage/innobase/include/srv0mon.h +++ b/storage/innobase/include/srv0mon.h @@ -2,6 +2,7 @@ Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. +Copyright (c) 2014, 2015, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -255,6 +256,9 @@ enum monitor_id_t { MONITOR_OVLD_OS_LOG_FSYNC, MONITOR_OVLD_OS_LOG_PENDING_FSYNC, MONITOR_OVLD_OS_LOG_PENDING_WRITES, + MONITOR_MERGE_BLOCKS_WRITTEN, + MONITOR_MERGE_BLOCKS_READ, + MONITOR_MERGE_BLOCKS_MERGED, /* Transaction related counters */ MONITOR_MODULE_TRX, diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 13f87262f8b..b9a86e76947 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -1,9 +1,9 @@ /***************************************************************************** -Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, 2009, Google Inc. Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. +Copyright (c) 2013, 2015, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -138,6 +138,15 @@ struct srv_stats_t { /** Number of system rows inserted */ ulint_ctr_64_t n_system_rows_inserted; + + /** Number of merge buffers written */ + ulint_ctr_64_t merge_buffers_written; + + /** Number of merge buffers read */ + ulint_ctr_64_t merge_buffers_read; + + /** Number of merge buffers merged */ + ulint_ctr_64_t merge_buffers_merged; }; extern const char* srv_main_thread_op_info; @@ -200,6 +209,9 @@ extern char* srv_arch_dir; recovery and open all tables in RO mode instead of RW mode. We don't sync the max trx id to disk either. */ extern my_bool srv_read_only_mode; +/** Set if InnoDB operates in read-only mode or innodb-force-recovery +is greater than SRV_FORCE_NO_TRX_UNDO. */ +extern my_bool high_level_read_only; /** store to its own file each table created by an user; data dictionary tables are in the system tablespace 0 */ extern my_bool srv_file_per_table; @@ -230,6 +242,7 @@ OS (provided we compiled Innobase with it in), otherwise we will use simulated aio we build below with threads. Currently we support native aio on windows and linux */ extern my_bool srv_use_native_aio; +extern my_bool srv_numa_interleave; #ifdef __WIN__ extern ibool srv_use_native_conditions; #endif /* __WIN__ */ @@ -875,6 +888,9 @@ struct export_var_t{ ulint innodb_purge_view_trx_id_age; /*!< rw_max_trx_id - purged view's min trx_id */ #endif /* UNIV_DEBUG */ + ib_int64_t innodb_merge_buffers_written; + ib_int64_t innodb_merge_buffers_read; + ib_int64_t innodb_merge_buffers_merged; }; /** Thread slot in the thread table. */ @@ -905,6 +921,7 @@ struct srv_slot_t{ #else /* !UNIV_HOTBACKUP */ # define srv_use_adaptive_hash_indexes FALSE # define srv_use_native_aio FALSE +# define srv_numa_interleave FALSE # define srv_force_recovery 0UL # define srv_set_io_thread_op_info(t,info) ((void) 0) # define srv_reset_io_thread_op_info() ((void) 0) diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h index 7b00e16476b..d6f8d8f5e4c 100644 --- a/storage/innobase/include/sync0sync.h +++ b/storage/innobase/include/sync0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2012, Facebook Inc. @@ -46,13 +46,6 @@ Created 9/5/1995 Heikki Tuuri extern "C" my_bool timed_mutexes; #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ -#ifdef HAVE_WINDOWS_ATOMICS -typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates - on LONG variable */ -#else -typedef byte lock_word_t; -#endif - #if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK /* By default, buffer mutexes and rwlocks will be excluded from diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic index a5887b1fd6f..149c81a3788 100644 --- a/storage/innobase/include/sync0sync.ic +++ b/storage/innobase/include/sync0sync.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -74,13 +74,13 @@ Performs an atomic test-and-set instruction to the lock_word field of a mutex. @return the previous value of lock_word: 0 or 1 */ UNIV_INLINE -byte +lock_word_t ib_mutex_test_and_set( -/*===============*/ +/*==================*/ ib_mutex_t* mutex) /*!< in: mutex */ { #if defined(HAVE_ATOMIC_BUILTINS) - return(os_atomic_test_and_set_byte_acquire(&mutex->lock_word, 1)); + return(os_atomic_test_and_set(&mutex->lock_word)); #else ibool ret; @@ -95,7 +95,7 @@ ib_mutex_test_and_set( } return((byte) ret); -#endif +#endif /* HAVE_ATOMIC_BUILTINS */ } /******************************************************************//** @@ -108,15 +108,12 @@ mutex_reset_lock_word( ib_mutex_t* mutex) /*!< in: mutex */ { #if defined(HAVE_ATOMIC_BUILTINS) - /* In theory __sync_lock_release should be used to release the lock. - Unfortunately, it does not work properly alone. The workaround is - that more conservative __sync_lock_test_and_set is used instead. */ - os_atomic_test_and_set_byte_release(&mutex->lock_word, 0); + os_atomic_clear(&mutex->lock_word); #else mutex->lock_word = 0; - os_fast_mutex_unlock_full_barrier(&(mutex->os_fast_mutex)); -#endif + os_fast_mutex_unlock(&(mutex->os_fast_mutex)); +#endif /* HAVE_ATOMIC_BUILTINS */ } /******************************************************************//** diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index 8976052bf64..3f737089ac3 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -44,7 +44,7 @@ Created 1/20/1994 Heikki Tuuri #define INNODB_VERSION_MAJOR 5 #define INNODB_VERSION_MINOR 6 -#define INNODB_VERSION_BUGFIX 26 +#define INNODB_VERSION_BUGFIX 27 /* The following is the InnoDB version as shown in SELECT plugin_version FROM information_schema.plugins; diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 28021a2d31b..f72f7f03018 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -2993,8 +2993,8 @@ lock_rec_inherit_to_gap( /* If srv_locks_unsafe_for_binlog is TRUE or session is using READ COMMITTED isolation level, we do not want locks set by an UPDATE or a DELETE to be inherited as gap type locks. But we - DO want S-locks set by a consistency constraint to be inherited also - then. */ + DO want S-locks/X-locks(taken for replace) set by a consistency + constraint to be inherited also then */ for (lock = lock_rec_get_first(block, heap_no); lock != NULL; @@ -3004,7 +3004,8 @@ lock_rec_inherit_to_gap( && !((srv_locks_unsafe_for_binlog || lock->trx->isolation_level <= TRX_ISO_READ_COMMITTED) - && lock_get_mode(lock) == LOCK_X)) { + && lock_get_mode(lock) == + (lock->trx->duplicates ? LOCK_S : LOCK_X))) { lock_rec_add_to_queue( LOCK_REC | LOCK_GAP | lock_get_mode(lock), diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 1850e798ed3..19757c81d5e 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -3413,11 +3413,7 @@ loop: lsn = log_sys->lsn; - ut_ad(srv_force_recovery != SRV_FORCE_NO_LOG_REDO - || lsn == log_sys->last_checkpoint_lsn + LOG_BLOCK_HDR_SIZE); - - if ((srv_force_recovery != SRV_FORCE_NO_LOG_REDO - && lsn != log_sys->last_checkpoint_lsn) + if (lsn != log_sys->last_checkpoint_lsn #ifdef UNIV_LOG_ARCHIVE || (srv_log_archive_on && lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE) diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index 6a5dff50952..edfa39a8932 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -2738,6 +2738,8 @@ row_ins_sec_index_entry_low( goto func_exit; } + DEBUG_SYNC_C("row_ins_sec_index_entry_dup_locks_created"); + /* We did not find a duplicate and we have now locked with s-locks the necessary records to prevent any insertion of a duplicate by another diff --git a/storage/innobase/row/row0log.cc b/storage/innobase/row/row0log.cc index 74ebe159677..0bc6f33df97 100644 --- a/storage/innobase/row/row0log.cc +++ b/storage/innobase/row/row0log.cc @@ -2586,7 +2586,7 @@ all_done: and be ignored when the operation is unsupported. */ fallocate(index->online_log->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - ofs, srv_buf_size); + ofs, srv_sort_buf_size); #endif /* FALLOC_FL_PUNCH_HOLE */ next_mrec = index->online_log->head.block; @@ -3417,7 +3417,7 @@ all_done: and be ignored when the operation is unsupported. */ fallocate(index->online_log->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - ofs, srv_buf_size); + ofs, srv_sort_buf_size); #endif /* FALLOC_FL_PUNCH_HOLE */ next_mrec = index->online_log->head.block; diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index 284081d4b0c..75aa423b2ac 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 2005, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2014, 2015, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -871,6 +872,8 @@ row_merge_read( success = os_file_read_no_error_handling(OS_FILE_FROM_FD(fd), buf, ofs, srv_sort_buf_size); + srv_stats.merge_buffers_read.inc(); + #ifdef POSIX_FADV_DONTNEED /* Each block is read exactly once. Free up the file cache. */ posix_fadvise(fd, ofs, srv_sort_buf_size, POSIX_FADV_DONTNEED); @@ -905,6 +908,7 @@ row_merge_write( DBUG_EXECUTE_IF("row_merge_write_failure", return(FALSE);); ret = os_file_write("(merge)", OS_FILE_FROM_FD(fd), buf, ofs, buf_len); + srv_stats.merge_buffers_written.inc(); #ifdef UNIV_DEBUG if (row_merge_print_block_write) { @@ -1736,7 +1740,7 @@ write_buffers: /* We have enough data tuples to form a block. Sort them and write to disk. */ - if (buf->n_tuples) { + if (UNIV_LIKELY(buf->n_tuples)) { if (dict_index_is_unique(buf->index)) { row_merge_dup_t dup = { buf->index, table, col_map, 0}; @@ -1777,13 +1781,17 @@ write_buffers: dict_index_get_lock(buf->index)); } - row_merge_buf_write(buf, file, block); + /* Do not write empty buffers to temporary file */ + if (buf->n_tuples) { + + row_merge_buf_write(buf, file, block); - if (!row_merge_write(file->fd, file->offset++, - block)) { - err = DB_TEMP_FILE_WRITE_FAILURE; - trx->error_key_num = i; - break; + if (!row_merge_write(file->fd, file->offset++, + block)) { + err = DB_TEMP_FILE_WRITE_FAILURE; + trx->error_key_num = i; + break; + } } UNIV_MEM_INVALID(&block[0], srv_sort_buf_size); @@ -2068,6 +2076,9 @@ done1: mem_heap_free(heap); b2 = row_merge_write_eof(&block[2 * srv_sort_buf_size], b2, of->fd, &of->offset); + + srv_stats.merge_buffers_merged.inc(); + return(b2 ? DB_SUCCESS : DB_CORRUPTION); } @@ -3747,17 +3758,21 @@ wait_again: DEBUG_FTS_SORT_PRINT("FTS_SORT: Complete Insert\n"); #endif } else { - row_merge_dup_t dup = { - sort_idx, table, col_map, 0}; - - error = row_merge_sort( - trx, &dup, &merge_files[i], - block, &tmpfd); - - if (error == DB_SUCCESS) { - error = row_merge_insert_index_tuples( - trx->id, sort_idx, old_table, - merge_files[i].fd, block); + /* Sorting and inserting is required only if + there really is records */ + if (UNIV_LIKELY(merge_files[i].n_rec)) { + row_merge_dup_t dup = { + sort_idx, table, col_map, 0}; + + error = row_merge_sort( + trx, &dup, &merge_files[i], + block, &tmpfd); + + if (error == DB_SUCCESS) { + error = row_merge_insert_index_tuples( + trx->id, sort_idx, old_table, + merge_files[i].fd, block); + } } } diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc index a0dd32c203f..960c889f871 100644 --- a/storage/innobase/srv/srv0mon.cc +++ b/storage/innobase/srv/srv0mon.cc @@ -2,6 +2,7 @@ Copyright (c) 2010, 2014, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. +Copyright (c) 2014, 2015, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -674,6 +675,24 @@ static monitor_info_t innodb_counter_info[] = MONITOR_EXISTING | MONITOR_DEFAULT_ON), MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_PENDING_WRITES}, + {"os_merge_blocks_written", "os", + "Number of merge blocks written (innodb_os_merge_blocks_written)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_MERGE_BLOCKS_WRITTEN}, + + {"os_merge_blocks_read", "os", + "Number of merge blocks read (innodb_os_merge_blocks_read)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_MERGE_BLOCKS_READ}, + + {"os_merge_blocks_merged", "os", + "Number of merge blocks merged (innodb_os_merge_blocks_merged)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_MERGE_BLOCKS_MERGED}, + /* ========== Counters for Transaction Module ========== */ {"module_trx", "transaction", "Transaction Manager", MONITOR_MODULE, @@ -1626,6 +1645,21 @@ srv_mon_process_existing_counter( update_min = TRUE; break; + /* innodb_os_merge_blocks_written */ + case MONITOR_MERGE_BLOCKS_WRITTEN: + value = srv_stats.merge_buffers_written; + break; + + /* innodb_os_merge_blocks_read */ + case MONITOR_MERGE_BLOCKS_READ: + value = srv_stats.merge_buffers_read; + break; + + /* innodb_os_merge_blocks_merged */ + case MONITOR_MERGE_BLOCKS_MERGED: + value = srv_stats.merge_buffers_merged; + break; + /* innodb_log_waits */ case MONITOR_OVLD_LOG_WAITS: value = srv_stats.log_waits; diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index 2ec4f9d5714..582ad9ea039 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -1,9 +1,9 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. +Copyright (c) 2013, 2015, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -131,6 +131,9 @@ UNIV_INTERN ulint srv_file_format = 0; UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to set it to the highest format we support. */ UNIV_INTERN ulint srv_max_file_format_at_startup = UNIV_FORMAT_MAX; +/** Set if InnoDB operates in read-only mode or innodb-force-recovery +is greater than SRV_FORCE_NO_TRX_UNDO. */ +UNIV_INTERN my_bool high_level_read_only; #if UNIV_FORMAT_A # error "UNIV_FORMAT_A must be 0!" @@ -149,6 +152,7 @@ OS (provided we compiled Innobase with it in), otherwise we will use simulated aio we build below with threads. Currently we support native aio on windows and linux */ UNIV_INTERN my_bool srv_use_native_aio = TRUE; +UNIV_INTERN my_bool srv_numa_interleave = FALSE; #ifdef __WIN__ /* Windows native condition variables. We use runtime loading / function @@ -1563,6 +1567,10 @@ srv_export_innodb_status(void) } #endif /* UNIV_DEBUG */ + export_vars.innodb_merge_buffers_written = srv_stats.merge_buffers_written; + export_vars.innodb_merge_buffers_read = srv_stats.merge_buffers_read; + export_vars.innodb_merge_buffers_merged = srv_stats.merge_buffers_merged; + mutex_exit(&srv_innodb_monitor_mutex); } @@ -2676,13 +2684,8 @@ srv_do_purge( } n_pages_purged = trx_purge( - n_use_threads, srv_purge_batch_size, false); - - if (!(count++ % TRX_SYS_N_RSEGS)) { - /* Force a truncate of the history list. */ - n_pages_purged += trx_purge( - 1, srv_purge_batch_size, true); - } + n_use_threads, srv_purge_batch_size, + (++count % TRX_SYS_N_RSEGS) == 0); *n_total_purged += n_pages_purged; @@ -2875,8 +2878,17 @@ DECLARE_THREAD(srv_purge_coordinator_thread)( n_pages_purged = trx_purge(1, srv_purge_batch_size, false); } - /* Force a truncate of the history list. */ - n_pages_purged = trx_purge(1, srv_purge_batch_size, true); + /* This trx_purge is called to remove any undo records (added by + background threads) after completion of the above loop. When + srv_fast_shutdown != 0, a large batch size can cause significant + delay in shutdown ,so reducing the batch size to magic number 20 + (which was default in 5.5), which we hope will be sufficient to + remove all the undo records */ + const uint temp_batch_size = 20; + + n_pages_purged = trx_purge(1, srv_purge_batch_size <= temp_batch_size + ? srv_purge_batch_size : temp_batch_size, + true); ut_a(n_pages_purged == 0 || srv_fast_shutdown != 0); /* The task queue should always be empty, independent of fast diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 2c85dcbb1f1..e6a12da8354 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1559,9 +1559,8 @@ innobase_start_or_create_for_mysql(void) size_t dirnamelen; bool sys_datafiles_created = false; - if (srv_force_recovery > SRV_FORCE_NO_TRX_UNDO) { - srv_read_only_mode = true; - } + high_level_read_only = srv_read_only_mode + || srv_force_recovery > SRV_FORCE_NO_TRX_UNDO; if (srv_read_only_mode) { ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode"); diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc index e93d3cae0b6..8ff9180eaf2 100644 --- a/storage/innobase/trx/trx0sys.cc +++ b/storage/innobase/trx/trx0sys.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -29,7 +29,10 @@ Created 3/26/1996 Heikki Tuuri #include "trx0sys.ic" #endif -#ifndef UNIV_HOTBACKUP +#ifdef UNIV_HOTBACKUP +#include "fsp0types.h" + +#else /* !UNIV_HOTBACKUP */ #include "fsp0fsp.h" #include "mtr0log.h" #include "mtr0log.h" @@ -1249,18 +1252,15 @@ trx_sys_read_pertable_file_format_id( /* get the file format from the page */ ptr = page + 54; flags = mach_read_from_4(ptr); - if (flags == 0) { - /* file format is Antelope */ - *format_id = 0; - return(TRUE); - } else if (flags & 1) { - /* tablespace flags are ok */ - *format_id = (flags / 32) % 128; - return(TRUE); - } else { + + if (!fsp_flags_is_valid(flags) { /* bad tablespace flags */ return(FALSE); } + + *format_id = FSP_FLAGS_GET_POST_ANTELOPE(flags); + + return(TRUE); } |