diff options
author | Sergei Golubchik <serg@mariadb.org> | 2015-10-09 17:47:30 +0200 |
---|---|---|
committer | Sergei Golubchik <serg@mariadb.org> | 2015-10-09 17:47:30 +0200 |
commit | 04af573d65487225132679fefcb142d24711d01d (patch) | |
tree | 890212b2a9b3eb63a68a7140b4eaeb21b6cc5c60 /storage/innobase | |
parent | cfeedbfd3e292f61c7da8f0a7f86307cbeeddb64 (diff) | |
parent | 86ff4da14dc53659e88ee8cd66412045dcb26e31 (diff) | |
download | mariadb-git-04af573d65487225132679fefcb142d24711d01d.tar.gz |
Merge branch 'merge-innodb-5.6' into 10.0
Diffstat (limited to 'storage/innobase')
-rw-r--r-- | storage/innobase/CMakeLists.txt | 21 | ||||
-rw-r--r-- | storage/innobase/btr/btr0cur.cc | 34 | ||||
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 47 | ||||
-rw-r--r-- | storage/innobase/dict/dict0dict.cc | 58 | ||||
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 30 | ||||
-rw-r--r-- | storage/innobase/handler/handler0alter.cc | 2 | ||||
-rw-r--r-- | storage/innobase/include/dict0dict.h | 28 | ||||
-rw-r--r-- | storage/innobase/include/ibuf0ibuf.ic | 5 | ||||
-rw-r--r-- | storage/innobase/include/os0sync.h | 167 | ||||
-rw-r--r-- | storage/innobase/include/srv0srv.h | 7 | ||||
-rw-r--r-- | storage/innobase/include/sync0sync.h | 9 | ||||
-rw-r--r-- | storage/innobase/include/sync0sync.ic | 19 | ||||
-rw-r--r-- | storage/innobase/lock/lock0lock.cc | 7 | ||||
-rw-r--r-- | storage/innobase/log/log0log.cc | 8 | ||||
-rw-r--r-- | storage/innobase/row/row0ins.cc | 2 | ||||
-rw-r--r-- | storage/innobase/srv/srv0srv.cc | 28 | ||||
-rw-r--r-- | storage/innobase/srv/srv0start.cc | 5 | ||||
-rw-r--r-- | storage/innobase/trx/trx0sys.cc | 22 |
18 files changed, 394 insertions, 105 deletions
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index c24f1cda59e..711672f5363 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -29,6 +29,9 @@ IF(UNIX) ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1) LINK_LIBRARIES(aio) ENDIF() + IF(HAVE_LIBNUMA) + LINK_LIBRARIES(numa) + ENDIF() ELSEIF(CMAKE_SYSTEM_NAME MATCHES "HP*") ADD_DEFINITIONS("-DUNIV_HPUX") ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "AIX") @@ -151,6 +154,18 @@ IF(NOT CMAKE_CROSSCOMPILING) }" HAVE_IB_GCC_ATOMIC_THREAD_FENCE ) + CHECK_C_SOURCE_RUNS( + "#include<stdint.h> + int main() + { + unsigned char c; + + __atomic_test_and_set(&c, __ATOMIC_ACQUIRE); + __atomic_clear(&c, __ATOMIC_RELEASE); + return(0); + }" + HAVE_IB_GCC_ATOMIC_TEST_AND_SET + ) ENDIF() IF(HAVE_IB_GCC_ATOMIC_BUILTINS) @@ -173,6 +188,10 @@ IF(HAVE_IB_GCC_ATOMIC_THREAD_FENCE) ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_THREAD_FENCE=1) ENDIF() +IF(HAVE_IB_GCC_ATOMIC_TEST_AND_SET) + ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_TEST_AND_SET=1) +ENDIF() + # either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not IF(NOT CMAKE_CROSSCOMPILING) CHECK_C_SOURCE_RUNS( diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index c8dd4fae0a9..7dfcd79cfdb 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2012, Facebook Inc. @@ -2118,6 +2118,7 @@ btr_cur_optimistic_update( ulint max_size; ulint new_rec_size; ulint old_rec_size; + ulint max_ins_size = 0; dtuple_t* new_entry; roll_ptr_t roll_ptr; ulint i; @@ -2246,6 +2247,10 @@ any_extern: : (old_rec_size + page_get_max_insert_size_after_reorganize(page, 1)); + if (!page_zip) { + max_ins_size = page_get_max_insert_size_after_reorganize(page, 1); + } + if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT) && (max_size >= new_rec_size)) || (page_get_n_recs(page) <= 1))) { @@ -2305,12 +2310,15 @@ any_extern: ut_ad(err == DB_SUCCESS); func_exit: - if (page_zip - && !(flags & BTR_KEEP_IBUF_BITMAP) + if (!(flags & BTR_KEEP_IBUF_BITMAP) && !dict_index_is_clust(index) && page_is_leaf(page)) { - /* Update the free bits in the insert buffer. */ - ibuf_update_free_bits_zip(block, mtr); + + if (page_zip) { + ibuf_update_free_bits_zip(block, mtr); + } else { + ibuf_update_free_bits_low(block, max_ins_size, mtr); + } } return(err); @@ -2445,6 +2453,7 @@ btr_cur_pessimistic_update( ibool was_first; ulint n_reserved = 0; ulint n_ext; + ulint max_ins_size = 0; *offsets = NULL; *big_rec = NULL; @@ -2623,6 +2632,10 @@ make_external: } } + if (!page_zip) { + max_ins_size = page_get_max_insert_size_after_reorganize(page, 1); + } + /* Store state of explicit locks on rec on the page infimum record, before deleting rec. The page infimum acts as a dummy carrier of the locks, taking care also of lock releases, before we can move the locks @@ -2668,13 +2681,18 @@ make_external: rec_offs_make_valid( page_cursor->rec, index, *offsets); } - } else if (page_zip && - !dict_index_is_clust(index) + } else if (!dict_index_is_clust(index) && page_is_leaf(page)) { + /* Update the free bits in the insert buffer. This is the same block which was skipped by BTR_KEEP_IBUF_BITMAP. */ - ibuf_update_free_bits_zip(block, mtr); + if (page_zip) { + ibuf_update_free_bits_zip(block, mtr); + } else { + ibuf_update_free_bits_low(block, max_ins_size, + mtr); + } } err = DB_SUCCESS; diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index e5800ef30c0..93d8e70c819 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -53,6 +53,10 @@ Created 11/5/1995 Heikki Tuuri #include "page0zip.h" #include "srv0mon.h" #include "buf0checksum.h" +#ifdef HAVE_LIBNUMA +#include <numa.h> +#include <numaif.h> +#endif // HAVE_LIBNUMA /* IMPLEMENTATION OF THE BUFFER POOL @@ -1112,6 +1116,22 @@ buf_chunk_init( return(NULL); } +#ifdef HAVE_LIBNUMA + if (srv_numa_interleave) { + int st = mbind(chunk->mem, chunk->mem_size, + MPOL_INTERLEAVE, + numa_all_nodes_ptr->maskp, + numa_all_nodes_ptr->size, + MPOL_MF_MOVE); + if (st != 0) { + ib_logf(IB_LOG_LEVEL_WARN, + "Failed to set NUMA memory policy of buffer" + " pool page frames to MPOL_INTERLEAVE" + " (error: %s).", strerror(errno)); + } + } +#endif // HAVE_LIBNUMA + /* Allocate the block descriptors from the start of the memory block. */ chunk->blocks = (buf_block_t*) chunk->mem; @@ -1442,6 +1462,21 @@ buf_pool_init( ut_ad(n_instances <= MAX_BUFFER_POOLS); ut_ad(n_instances == srv_buf_pool_instances); +#ifdef HAVE_LIBNUMA + if (srv_numa_interleave) { + ib_logf(IB_LOG_LEVEL_INFO, + "Setting NUMA memory policy to MPOL_INTERLEAVE"); + if (set_mempolicy(MPOL_INTERLEAVE, + numa_all_nodes_ptr->maskp, + numa_all_nodes_ptr->size) != 0) { + ib_logf(IB_LOG_LEVEL_WARN, + "Failed to set NUMA memory policy to" + " MPOL_INTERLEAVE (error: %s).", + strerror(errno)); + } + } +#endif // HAVE_LIBNUMA + buf_pool_ptr = (buf_pool_t*) mem_zalloc( n_instances * sizeof *buf_pool_ptr); @@ -1462,6 +1497,18 @@ buf_pool_init( btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64); +#ifdef HAVE_LIBNUMA + if (srv_numa_interleave) { + ib_logf(IB_LOG_LEVEL_INFO, + "Setting NUMA memory policy to MPOL_DEFAULT"); + if (set_mempolicy(MPOL_DEFAULT, NULL, 0) != 0) { + ib_logf(IB_LOG_LEVEL_WARN, + "Failed to set NUMA memory policy to" + " MPOL_DEFAULT (error: %s).", strerror(errno)); + } + } +#endif // HAVE_LIBNUMA + return(DB_SUCCESS); } diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 1fad7300011..7982671a3ef 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -212,14 +212,6 @@ dict_index_remove_from_cache_low( dict_index_t* index, /*!< in, own: index */ ibool lru_evict); /*!< in: TRUE if page being evicted to make room in the table LRU list */ -/**********************************************************************//** -Removes a table object from the dictionary cache. */ -static -void -dict_table_remove_from_cache_low( -/*=============================*/ - dict_table_t* table, /*!< in, own: table */ - ibool lru_evict); /*!< in: TRUE if evicting from LRU */ #ifdef UNIV_DEBUG /**********************************************************************//** Validate the dictionary table LRU list. @@ -787,6 +779,45 @@ dict_table_get_all_fts_indexes( return(ib_vector_size(indexes)); } +/** Store autoinc value when the table is evicted. +@param[in] table table evicted */ +UNIV_INTERN +void +dict_table_autoinc_store( + const dict_table_t* table) +{ + ut_ad(mutex_own(&dict_sys->mutex)); + + if (table->autoinc != 0) { + ut_ad(dict_sys->autoinc_map->find(table->id) + == dict_sys->autoinc_map->end()); + + dict_sys->autoinc_map->insert( + std::pair<table_id_t, ib_uint64_t>( + table->id, table->autoinc)); + } +} + +/** Restore autoinc value when the table is loaded. +@param[in] table table loaded */ +UNIV_INTERN +void +dict_table_autoinc_restore( + dict_table_t* table) +{ + ut_ad(mutex_own(&dict_sys->mutex)); + + autoinc_map_t::iterator it; + it = dict_sys->autoinc_map->find(table->id); + + if (it != dict_sys->autoinc_map->end()) { + table->autoinc = it->second; + ut_ad(table->autoinc != 0); + + dict_sys->autoinc_map->erase(it); + } +} + /********************************************************************//** Reads the next autoinc value (== autoinc counter value), 0 if not yet initialized. @@ -1080,6 +1111,8 @@ dict_init(void) mutex_create(dict_foreign_err_mutex_key, &dict_foreign_err_mutex, SYNC_NO_ORDER_CHECK); } + + dict_sys->autoinc_map = new autoinc_map_t(); } /**********************************************************************//** @@ -1327,6 +1360,8 @@ dict_table_add_to_cache( UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_non_LRU, table); } + dict_table_autoinc_restore(table); + ut_ad(dict_lru_validate()); dict_sys->size += mem_heap_get_size(table->heap) @@ -2016,7 +2051,6 @@ dict_table_change_id_in_cache( /**********************************************************************//** Removes a table object from the dictionary cache. */ -static void dict_table_remove_from_cache_low( /*=============================*/ @@ -2078,6 +2112,10 @@ dict_table_remove_from_cache_low( ut_ad(dict_lru_validate()); + if (lru_evict) { + dict_table_autoinc_store(table); + } + if (lru_evict && table->drop_aborted) { /* Do as dict_table_try_drop_aborted() does. */ @@ -6820,6 +6858,8 @@ dict_close(void) mutex_free(&dict_foreign_err_mutex); } + delete dict_sys->autoinc_map; + mem_free(dict_sys); dict_sys = NULL; } diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 83e65af0ae6..3e7b471a609 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -6884,7 +6884,7 @@ ha_innobase::write_row( DBUG_ENTER("ha_innobase::write_row"); - if (srv_read_only_mode) { + if (high_level_read_only) { ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); DBUG_RETURN(HA_ERR_TABLE_READONLY); } else if (prebuilt->trx != trx) { @@ -7433,7 +7433,7 @@ ha_innobase::update_row( ut_a(prebuilt->trx == trx); - if (srv_read_only_mode) { + if (high_level_read_only) { ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); DBUG_RETURN(HA_ERR_TABLE_READONLY); } else if (!trx_is_started(trx)) { @@ -7566,7 +7566,7 @@ ha_innobase::delete_row( ut_a(prebuilt->trx == trx); - if (srv_read_only_mode) { + if (high_level_read_only) { ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); DBUG_RETURN(HA_ERR_TABLE_READONLY); } else if (!trx_is_started(trx)) { @@ -9936,7 +9936,7 @@ ha_innobase::create( if (form->s->stored_fields > REC_MAX_N_USER_FIELDS) { DBUG_RETURN(HA_ERR_TOO_MANY_FIELDS); - } else if (srv_read_only_mode) { + } else if (high_level_read_only) { DBUG_RETURN(HA_ERR_TABLE_READONLY); } @@ -10266,7 +10266,7 @@ ha_innobase::discard_or_import_tablespace( ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N); ut_a(prebuilt->trx == thd_to_trx(ha_thd())); - if (srv_read_only_mode) { + if (high_level_read_only) { DBUG_RETURN(HA_ERR_TABLE_READONLY); } @@ -10360,7 +10360,7 @@ ha_innobase::truncate() DBUG_ENTER("ha_innobase::truncate"); - if (srv_read_only_mode) { + if (high_level_read_only) { DBUG_RETURN(HA_ERR_TABLE_READONLY); } @@ -10711,7 +10711,7 @@ ha_innobase::rename_table( DBUG_ENTER("ha_innobase::rename_table"); - if (srv_read_only_mode) { + if (high_level_read_only) { ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); DBUG_RETURN(HA_ERR_TABLE_READONLY); } @@ -14740,6 +14740,12 @@ innodb_internal_table_validate( } dict_table_close(user_table, FALSE, TRUE); + + DBUG_EXECUTE_IF("innodb_evict_autoinc_table", + mutex_enter(&dict_sys->mutex); + dict_table_remove_from_cache_low(user_table, TRUE); + mutex_exit(&dict_sys->mutex); + ); } return(ret); @@ -16783,6 +16789,13 @@ static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio, "Use native AIO if supported on this platform.", NULL, NULL, TRUE); +#ifdef HAVE_LIBNUMA +static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Use NUMA interleave memory policy to allocate InnoDB buffer pool.", + NULL, NULL, FALSE); +#endif // HAVE_LIBNUMA + static MYSQL_SYSVAR_BOOL(api_enable_binlog, ib_binlog_enabled, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, "Enable binlog for applications direct access InnoDB through InnoDB APIs", @@ -17070,6 +17083,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(version), MYSQL_SYSVAR(use_sys_malloc), MYSQL_SYSVAR(use_native_aio), +#ifdef HAVE_LIBNUMA + MYSQL_SYSVAR(numa_interleave), +#endif // HAVE_LIBNUMA MYSQL_SYSVAR(change_buffering), MYSQL_SYSVAR(change_buffer_max_size), #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index f2a74af53c4..703fc3695dc 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -240,7 +240,7 @@ ha_innobase::check_if_supported_inplace_alter( { DBUG_ENTER("check_if_supported_inplace_alter"); - if (srv_read_only_mode) { + if (high_level_read_only) { ha_alter_info->unsupported_reason = innobase_get_err_msg(ER_READ_ONLY_MODE); DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index cb49d532972..5266cf7d245 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -312,6 +312,21 @@ dict_table_autoinc_initialize( dict_table_t* table, /*!< in/out: table */ ib_uint64_t value) /*!< in: next value to assign to a row */ __attribute__((nonnull)); + +/** Store autoinc value when the table is evicted. +@param[in] table table evicted */ +UNIV_INTERN +void +dict_table_autoinc_store( + const dict_table_t* table); + +/** Restore autoinc value when the table is loaded. +@param[in] table table loaded */ +UNIV_INTERN +void +dict_table_autoinc_restore( + dict_table_t* table); + /********************************************************************//** Reads the next autoinc value (== autoinc counter value), 0 if not yet initialized. @@ -371,6 +386,15 @@ dict_table_remove_from_cache( dict_table_t* table) /*!< in, own: table */ __attribute__((nonnull)); /**********************************************************************//** +Removes a table object from the dictionary cache. */ +UNIV_INTERN +void +dict_table_remove_from_cache_low( +/*=============================*/ + dict_table_t* table, /*!< in, own: table */ + ibool lru_evict); /*!< in: TRUE if table being evicted + to make room in the table LRU list */ +/**********************************************************************//** Renames a table object. @return TRUE if success */ UNIV_INTERN @@ -1572,6 +1596,8 @@ extern dict_sys_t* dict_sys; /** the data dictionary rw-latch protecting dict_sys */ extern rw_lock_t dict_operation_lock; +typedef std::map<table_id_t, ib_uint64_t> autoinc_map_t; + /* Dictionary system struct */ struct dict_sys_t{ ib_mutex_t mutex; /*!< mutex protecting the data @@ -1606,6 +1632,8 @@ struct dict_sys_t{ UT_LIST_BASE_NODE_T(dict_table_t) table_non_LRU; /*!< List of tables that can't be evicted from the cache */ + autoinc_map_t* autoinc_map; /*!< Map to store table id and autoinc + when table is evicted */ }; #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic index 21747fdceac..a5df9f7b6b4 100644 --- a/storage/innobase/include/ibuf0ibuf.ic +++ b/storage/innobase/include/ibuf0ibuf.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -128,7 +128,8 @@ ibuf_should_try( && ibuf->max_size != 0 && !dict_index_is_clust(index) && index->table->quiesce == QUIESCE_NONE - && (ignore_sec_unique || !dict_index_is_unique(index))); + && (ignore_sec_unique || !dict_index_is_unique(index)) + && srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE); } /******************************************************************//** diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h index feb64fb1e41..8e4b4f41aae 100644 --- a/storage/innobase/include/os0sync.h +++ b/storage/innobase/include/os0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -38,6 +38,26 @@ Created 9/6/1995 Heikki Tuuri #include "ut0lst.h" #include "sync0types.h" +#if defined __i386__ || defined __x86_64__ || defined _M_IX86 \ + || defined _M_X64 || defined __WIN__ + +#define IB_STRONG_MEMORY_MODEL + +#endif /* __i386__ || __x86_64__ || _M_IX86 || _M_X64 || __WIN__ */ + +#ifdef HAVE_WINDOWS_ATOMICS +typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates + on LONG variable */ +#elif defined(HAVE_ATOMIC_BUILTINS) && !defined(HAVE_ATOMIC_BUILTINS_BYTE) +typedef ulint lock_word_t; +#else + +#define IB_LOCK_WORD_IS_BYTE + +typedef byte lock_word_t; + +#endif /* HAVE_WINDOWS_ATOMICS */ + #ifdef __WIN__ /** Native event (slow)*/ typedef HANDLE os_native_event_t; @@ -446,11 +466,61 @@ amount to decrement. */ # define os_atomic_decrement_uint64(ptr, amount) \ os_atomic_decrement(ptr, amount) -/**********************************************************//** -Returns the old value of *ptr, atomically sets *ptr to new_val */ - -# define os_atomic_test_and_set_ulint(ptr, new_val) \ - __sync_lock_test_and_set(ptr, new_val) +# if defined(HAVE_IB_GCC_ATOMIC_TEST_AND_SET) + +/** Do an atomic test-and-set. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(__atomic_test_and_set(ptr, __ATOMIC_ACQUIRE)); +} + +/** Do an atomic clear. +@param[in,out] ptr Memory location to set to zero */ +inline +void +os_atomic_clear(volatile lock_word_t* ptr) +{ + __atomic_clear(ptr, __ATOMIC_RELEASE); +} + +# elif defined(IB_STRONG_MEMORY_MODEL) + +/** Do an atomic test and set. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(__sync_lock_test_and_set(ptr, 1)); +} + +/** Do an atomic release. + +In theory __sync_lock_release should be used to release the lock. +Unfortunately, it does not work properly alone. The workaround is +that more conservative __sync_lock_test_and_set is used instead. + +Performance regression was observed at some conditions for Intel +architecture. Disable release barrier on Intel architecture for now. +@param[in,out] ptr Memory location to write to +@return the previous value */ +inline +lock_word_t +os_atomic_clear(volatile lock_word_t* ptr) +{ + return(__sync_lock_test_and_set(ptr, 0)); +} + +# else + +# error "Unsupported platform" + +# endif /* HAVE_IB_GCC_ATOMIC_TEST_AND_SET */ #if defined(__powerpc__) || defined(__aarch64__) /* @@ -548,11 +618,51 @@ amount to decrement. */ # define os_atomic_decrement_uint64(ptr, amount) \ os_atomic_increment_uint64(ptr, -(amount)) -/**********************************************************//** -Returns the old value of *ptr, atomically sets *ptr to new_val */ - -# define os_atomic_test_and_set_ulint(ptr, new_val) \ - atomic_swap_ulong(ptr, new_val) +# ifdef IB_LOCK_WORD_IS_BYTE + +/** Do an atomic xchg and set to non-zero. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(atomic_swap_uchar(ptr, 1)); +} + +/** Do an atomic xchg and set to zero. +@param[in,out] ptr Memory location to set to zero +@return the previous value */ +inline +lock_word_t +os_atomic_clear(volatile lock_word_t* ptr) +{ + return(atomic_swap_uchar(ptr, 0)); +} + +# else + +/** Do an atomic xchg and set to non-zero. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(atomic_swap_ulong(ptr, 1)); +} + +/** Do an atomic xchg and set to zero. +@param[in,out] ptr Memory location to set to zero +@return the previous value */ +inline +lock_word_t +os_atomic_clear(volatile lock_word_t* ptr) +{ + return(atomic_swap_ulong(ptr, 0)); +} + +# endif /* IB_LOCK_WORD_IS_BYTE */ # define os_atomic_test_and_set_byte_acquire(ptr, new_val) \ atomic_swap_uchar(ptr, new_val) @@ -673,18 +783,27 @@ amount to decrement. There is no atomic substract function on Windows */ (ib_int64_t*) ptr, \ -(ib_int64_t) amount) - amount)) -/**********************************************************//** -Returns the old value of *ptr, atomically sets *ptr to new_val. -InterlockedExchange() operates on LONG, and the LONG will be -clobbered */ - -# define os_atomic_test_and_set_byte_acquire(ptr, new_val) \ - ((byte) InterlockedExchange(ptr, new_val)) -# define os_atomic_test_and_set_byte_release(ptr, new_val) \ - ((byte) InterlockedExchange(ptr, new_val)) - -# define os_atomic_test_and_set_ulong(ptr, new_val) \ - InterlockedExchange(ptr, new_val) +/** Do an atomic test and set. +InterlockedExchange() operates on LONG, and the LONG will be clobbered +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(InterlockedExchange(ptr, 1)); +} + +/** Do an atomic release. +InterlockedExchange() operates on LONG, and the LONG will be clobbered +@param[in,out] ptr Memory location to set to zero +@return the previous value */ +inline +lock_word_t +os_atomic_clear(volatile lock_word_t* ptr) +{ + return(InterlockedExchange(ptr, 0)); +} # define os_atomic_lock_release_byte(ptr) \ (void) InterlockedExchange(ptr, 0) @@ -737,7 +856,7 @@ for synchronization */ } while (0); /** barrier definitions for memory ordering */ -#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__ +#ifdef IB_STRONG_MEMORY_MODEL /* Performance regression was observed at some conditions for Intel architecture. Disable memory barrier for Intel architecture for now. */ # define os_rmb do { } while(0) diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index d06a14a9153..63b73a07746 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, 2009, Google Inc. Copyright (c) 2009, Percona Inc. Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. @@ -200,6 +200,9 @@ extern char* srv_arch_dir; recovery and open all tables in RO mode instead of RW mode. We don't sync the max trx id to disk either. */ extern my_bool srv_read_only_mode; +/** Set if InnoDB operates in read-only mode or innodb-force-recovery +is greater than SRV_FORCE_NO_TRX_UNDO. */ +extern my_bool high_level_read_only; /** store to its own file each table created by an user; data dictionary tables are in the system tablespace 0 */ extern my_bool srv_file_per_table; @@ -230,6 +233,7 @@ OS (provided we compiled Innobase with it in), otherwise we will use simulated aio we build below with threads. Currently we support native aio on windows and linux */ extern my_bool srv_use_native_aio; +extern my_bool srv_numa_interleave; #ifdef __WIN__ extern ibool srv_use_native_conditions; #endif /* __WIN__ */ @@ -901,6 +905,7 @@ struct srv_slot_t{ #else /* !UNIV_HOTBACKUP */ # define srv_use_adaptive_hash_indexes FALSE # define srv_use_native_aio FALSE +# define srv_numa_interleave FALSE # define srv_force_recovery 0UL # define srv_set_io_thread_op_info(t,info) ((void) 0) # define srv_reset_io_thread_op_info() ((void) 0) diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h index 7b00e16476b..d6f8d8f5e4c 100644 --- a/storage/innobase/include/sync0sync.h +++ b/storage/innobase/include/sync0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2012, Facebook Inc. @@ -46,13 +46,6 @@ Created 9/5/1995 Heikki Tuuri extern "C" my_bool timed_mutexes; #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ -#ifdef HAVE_WINDOWS_ATOMICS -typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates - on LONG variable */ -#else -typedef byte lock_word_t; -#endif - #if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK /* By default, buffer mutexes and rwlocks will be excluded from diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic index 97ec63c0dd2..55f728fd744 100644 --- a/storage/innobase/include/sync0sync.ic +++ b/storage/innobase/include/sync0sync.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -74,13 +74,13 @@ Performs an atomic test-and-set instruction to the lock_word field of a mutex. @return the previous value of lock_word: 0 or 1 */ UNIV_INLINE -byte +lock_word_t ib_mutex_test_and_set( -/*===============*/ +/*==================*/ ib_mutex_t* mutex) /*!< in: mutex */ { #if defined(HAVE_ATOMIC_BUILTINS) - return(os_atomic_test_and_set_byte_acquire(&mutex->lock_word, 1)); + return(os_atomic_test_and_set(&mutex->lock_word)); #else ibool ret; @@ -95,7 +95,7 @@ ib_mutex_test_and_set( } return((byte) ret); -#endif +#endif /* HAVE_ATOMIC_BUILTINS */ } /******************************************************************//** @@ -108,15 +108,12 @@ mutex_reset_lock_word( ib_mutex_t* mutex) /*!< in: mutex */ { #if defined(HAVE_ATOMIC_BUILTINS) - /* In theory __sync_lock_release should be used to release the lock. - Unfortunately, it does not work properly alone. The workaround is - that more conservative __sync_lock_test_and_set is used instead. */ - os_atomic_test_and_set_byte_release(&mutex->lock_word, 0); + os_atomic_clear(&mutex->lock_word); #else mutex->lock_word = 0; - os_fast_mutex_unlock_full_barrier(&(mutex->os_fast_mutex)); -#endif + os_fast_mutex_unlock(&(mutex->os_fast_mutex)); +#endif /* HAVE_ATOMIC_BUILTINS */ } /******************************************************************//** diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 7755d6d5ef1..4099ca1932f 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -2691,8 +2691,8 @@ lock_rec_inherit_to_gap( /* If srv_locks_unsafe_for_binlog is TRUE or session is using READ COMMITTED isolation level, we do not want locks set by an UPDATE or a DELETE to be inherited as gap type locks. But we - DO want S-locks set by a consistency constraint to be inherited also - then. */ + DO want S-locks/X-locks(taken for replace) set by a consistency + constraint to be inherited also then */ for (lock = lock_rec_get_first(block, heap_no); lock != NULL; @@ -2702,7 +2702,8 @@ lock_rec_inherit_to_gap( && !((srv_locks_unsafe_for_binlog || lock->trx->isolation_level <= TRX_ISO_READ_COMMITTED) - && lock_get_mode(lock) == LOCK_X)) { + && lock_get_mode(lock) == + (lock->trx->duplicates ? LOCK_S : LOCK_X))) { lock_rec_add_to_queue( LOCK_REC | LOCK_GAP | lock_get_mode(lock), diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index 1850e798ed3..19757c81d5e 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -3413,11 +3413,7 @@ loop: lsn = log_sys->lsn; - ut_ad(srv_force_recovery != SRV_FORCE_NO_LOG_REDO - || lsn == log_sys->last_checkpoint_lsn + LOG_BLOCK_HDR_SIZE); - - if ((srv_force_recovery != SRV_FORCE_NO_LOG_REDO - && lsn != log_sys->last_checkpoint_lsn) + if (lsn != log_sys->last_checkpoint_lsn #ifdef UNIV_LOG_ARCHIVE || (srv_log_archive_on && lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE) diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index 8a39c965192..9f22889c5fb 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -2711,6 +2711,8 @@ row_ins_sec_index_entry_low( goto func_exit; } + DEBUG_SYNC_C("row_ins_sec_index_entry_dup_locks_created"); + /* We did not find a duplicate and we have now locked with s-locks the necessary records to prevent any insertion of a duplicate by another diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index cd3bed9e2fe..89dadf9f08c 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved. @@ -127,6 +127,9 @@ UNIV_INTERN ulint srv_file_format = 0; UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to set it to the highest format we support. */ UNIV_INTERN ulint srv_max_file_format_at_startup = UNIV_FORMAT_MAX; +/** Set if InnoDB operates in read-only mode or innodb-force-recovery +is greater than SRV_FORCE_NO_TRX_UNDO. */ +UNIV_INTERN my_bool high_level_read_only; #if UNIV_FORMAT_A # error "UNIV_FORMAT_A must be 0!" @@ -145,6 +148,7 @@ OS (provided we compiled Innobase with it in), otherwise we will use simulated aio we build below with threads. Currently we support native aio on windows and linux */ UNIV_INTERN my_bool srv_use_native_aio = TRUE; +UNIV_INTERN my_bool srv_numa_interleave = FALSE; #ifdef __WIN__ /* Windows native condition variables. We use runtime loading / function @@ -2647,13 +2651,8 @@ srv_do_purge( } n_pages_purged = trx_purge( - n_use_threads, srv_purge_batch_size, false); - - if (!(count++ % TRX_SYS_N_RSEGS)) { - /* Force a truncate of the history list. */ - n_pages_purged += trx_purge( - 1, srv_purge_batch_size, true); - } + n_use_threads, srv_purge_batch_size, + (++count % TRX_SYS_N_RSEGS) == 0); *n_total_purged += n_pages_purged; @@ -2846,8 +2845,17 @@ DECLARE_THREAD(srv_purge_coordinator_thread)( n_pages_purged = trx_purge(1, srv_purge_batch_size, false); } - /* Force a truncate of the history list. */ - n_pages_purged = trx_purge(1, srv_purge_batch_size, true); + /* This trx_purge is called to remove any undo records (added by + background threads) after completion of the above loop. When + srv_fast_shutdown != 0, a large batch size can cause significant + delay in shutdown ,so reducing the batch size to magic number 20 + (which was default in 5.5), which we hope will be sufficient to + remove all the undo records */ + const uint temp_batch_size = 20; + + n_pages_purged = trx_purge(1, srv_purge_batch_size <= temp_batch_size + ? srv_purge_batch_size : temp_batch_size, + true); ut_a(n_pages_purged == 0 || srv_fast_shutdown != 0); /* The task queue should always be empty, independent of fast diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index 7048a44ae97..ae9a91a8687 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1555,9 +1555,8 @@ innobase_start_or_create_for_mysql(void) size_t dirnamelen; bool sys_datafiles_created = false; - if (srv_force_recovery > SRV_FORCE_NO_TRX_UNDO) { - srv_read_only_mode = true; - } + high_level_read_only = srv_read_only_mode + || srv_force_recovery > SRV_FORCE_NO_TRX_UNDO; if (srv_read_only_mode) { ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode"); diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc index 5eb3cef46c1..e5f03f4b96a 100644 --- a/storage/innobase/trx/trx0sys.cc +++ b/storage/innobase/trx/trx0sys.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -29,7 +29,10 @@ Created 3/26/1996 Heikki Tuuri #include "trx0sys.ic" #endif -#ifndef UNIV_HOTBACKUP +#ifdef UNIV_HOTBACKUP +#include "fsp0types.h" + +#else /* !UNIV_HOTBACKUP */ #include "fsp0fsp.h" #include "mtr0log.h" #include "mtr0log.h" @@ -1115,18 +1118,15 @@ trx_sys_read_pertable_file_format_id( /* get the file format from the page */ ptr = page + 54; flags = mach_read_from_4(ptr); - if (flags == 0) { - /* file format is Antelope */ - *format_id = 0; - return(TRUE); - } else if (flags & 1) { - /* tablespace flags are ok */ - *format_id = (flags / 32) % 128; - return(TRUE); - } else { + + if (!fsp_flags_is_valid(flags) { /* bad tablespace flags */ return(FALSE); } + + *format_id = FSP_FLAGS_GET_POST_ANTELOPE(flags); + + return(TRUE); } |