diff options
Diffstat (limited to 'storage/innobase')
-rw-r--r-- | storage/innobase/CMakeLists.txt | 21 | ||||
-rw-r--r-- | storage/innobase/btr/btr0cur.cc | 34 | ||||
-rw-r--r-- | storage/innobase/buf/buf0buf.cc | 47 | ||||
-rw-r--r-- | storage/innobase/dict/dict0dict.cc | 58 | ||||
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 30 | ||||
-rw-r--r-- | storage/innobase/handler/handler0alter.cc | 2 | ||||
-rw-r--r-- | storage/innobase/include/dict0dict.h | 28 | ||||
-rw-r--r-- | storage/innobase/include/ibuf0ibuf.ic | 5 | ||||
-rw-r--r-- | storage/innobase/include/os0sync.h | 171 | ||||
-rw-r--r-- | storage/innobase/include/srv0srv.h | 7 | ||||
-rw-r--r-- | storage/innobase/include/sync0sync.h | 11 | ||||
-rw-r--r-- | storage/innobase/include/sync0sync.ic | 25 | ||||
-rw-r--r-- | storage/innobase/lock/lock0lock.cc | 7 | ||||
-rw-r--r-- | storage/innobase/log/log0log.cc | 8 | ||||
-rw-r--r-- | storage/innobase/row/row0ins.cc | 2 | ||||
-rw-r--r-- | storage/innobase/srv/srv0srv.cc | 28 | ||||
-rw-r--r-- | storage/innobase/srv/srv0start.cc | 5 | ||||
-rw-r--r-- | storage/innobase/trx/trx0sys.cc | 22 |
18 files changed, 393 insertions, 118 deletions
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt index eeb53f96c9f..2e939899d24 100644 --- a/storage/innobase/CMakeLists.txt +++ b/storage/innobase/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -29,6 +29,9 @@ IF(UNIX) ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1) LINK_LIBRARIES(aio) ENDIF() + IF(HAVE_LIBNUMA) + LINK_LIBRARIES(numa) + ENDIF() ELSEIF(CMAKE_SYSTEM_NAME MATCHES "HP*") ADD_DEFINITIONS("-DUNIV_HPUX") ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "AIX") @@ -145,6 +148,18 @@ IF(NOT CMAKE_CROSSCOMPILING) }" HAVE_IB_GCC_ATOMIC_THREAD_FENCE ) + CHECK_C_SOURCE_RUNS( + "#include<stdint.h> + int main() + { + unsigned char c; + + __atomic_test_and_set(&c, __ATOMIC_ACQUIRE); + __atomic_clear(&c, __ATOMIC_RELEASE); + return(0); + }" + HAVE_IB_GCC_ATOMIC_TEST_AND_SET + ) ENDIF() IF(HAVE_IB_GCC_ATOMIC_BUILTINS) @@ -167,6 +182,10 @@ IF(HAVE_IB_GCC_ATOMIC_THREAD_FENCE) ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_THREAD_FENCE=1) ENDIF() +IF(HAVE_IB_GCC_ATOMIC_TEST_AND_SET) + ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_TEST_AND_SET=1) +ENDIF() + # either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not IF(NOT CMAKE_CROSSCOMPILING) CHECK_C_SOURCE_RUNS( diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc index 1611fb6394c..ad323531da6 100644 --- a/storage/innobase/btr/btr0cur.cc +++ b/storage/innobase/btr/btr0cur.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2012, Facebook Inc. @@ -2117,6 +2117,7 @@ btr_cur_optimistic_update( ulint max_size; ulint new_rec_size; ulint old_rec_size; + ulint max_ins_size = 0; dtuple_t* new_entry; roll_ptr_t roll_ptr; ulint i; @@ -2245,6 +2246,10 @@ any_extern: : (old_rec_size + page_get_max_insert_size_after_reorganize(page, 1)); + if (!page_zip) { + max_ins_size = page_get_max_insert_size_after_reorganize(page, 1); + } + if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT) && (max_size >= new_rec_size)) || (page_get_n_recs(page) <= 1))) { @@ -2304,12 +2309,15 @@ any_extern: ut_ad(err == DB_SUCCESS); func_exit: - if (page_zip - && !(flags & BTR_KEEP_IBUF_BITMAP) + if (!(flags & BTR_KEEP_IBUF_BITMAP) && !dict_index_is_clust(index) && page_is_leaf(page)) { - /* Update the free bits in the insert buffer. */ - ibuf_update_free_bits_zip(block, mtr); + + if (page_zip) { + ibuf_update_free_bits_zip(block, mtr); + } else { + ibuf_update_free_bits_low(block, max_ins_size, mtr); + } } return(err); @@ -2444,6 +2452,7 @@ btr_cur_pessimistic_update( ibool was_first; ulint n_reserved = 0; ulint n_ext; + ulint max_ins_size = 0; *offsets = NULL; *big_rec = NULL; @@ -2622,6 +2631,10 @@ make_external: } } + if (!page_zip) { + max_ins_size = page_get_max_insert_size_after_reorganize(page, 1); + } + /* Store state of explicit locks on rec on the page infimum record, before deleting rec. The page infimum acts as a dummy carrier of the locks, taking care also of lock releases, before we can move the locks @@ -2667,13 +2680,18 @@ make_external: rec_offs_make_valid( page_cursor->rec, index, *offsets); } - } else if (page_zip && - !dict_index_is_clust(index) + } else if (!dict_index_is_clust(index) && page_is_leaf(page)) { + /* Update the free bits in the insert buffer. This is the same block which was skipped by BTR_KEEP_IBUF_BITMAP. */ - ibuf_update_free_bits_zip(block, mtr); + if (page_zip) { + ibuf_update_free_bits_zip(block, mtr); + } else { + ibuf_update_free_bits_low(block, max_ins_size, + mtr); + } } err = DB_SUCCESS; diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc index 311e3326f2b..85e44294e60 100644 --- a/storage/innobase/buf/buf0buf.cc +++ b/storage/innobase/buf/buf0buf.cc @@ -53,6 +53,10 @@ Created 11/5/1995 Heikki Tuuri #include "page0zip.h" #include "srv0mon.h" #include "buf0checksum.h" +#ifdef HAVE_LIBNUMA +#include <numa.h> +#include <numaif.h> +#endif // HAVE_LIBNUMA /* IMPLEMENTATION OF THE BUFFER POOL @@ -1112,6 +1116,22 @@ buf_chunk_init( return(NULL); } +#ifdef HAVE_LIBNUMA + if (srv_numa_interleave) { + int st = mbind(chunk->mem, chunk->mem_size, + MPOL_INTERLEAVE, + numa_all_nodes_ptr->maskp, + numa_all_nodes_ptr->size, + MPOL_MF_MOVE); + if (st != 0) { + ib_logf(IB_LOG_LEVEL_WARN, + "Failed to set NUMA memory policy of buffer" + " pool page frames to MPOL_INTERLEAVE" + " (error: %s).", strerror(errno)); + } + } +#endif // HAVE_LIBNUMA + /* Allocate the block descriptors from the start of the memory block. */ chunk->blocks = (buf_block_t*) chunk->mem; @@ -1442,6 +1462,21 @@ buf_pool_init( ut_ad(n_instances <= MAX_BUFFER_POOLS); ut_ad(n_instances == srv_buf_pool_instances); +#ifdef HAVE_LIBNUMA + if (srv_numa_interleave) { + ib_logf(IB_LOG_LEVEL_INFO, + "Setting NUMA memory policy to MPOL_INTERLEAVE"); + if (set_mempolicy(MPOL_INTERLEAVE, + numa_all_nodes_ptr->maskp, + numa_all_nodes_ptr->size) != 0) { + ib_logf(IB_LOG_LEVEL_WARN, + "Failed to set NUMA memory policy to" + " MPOL_INTERLEAVE (error: %s).", + strerror(errno)); + } + } +#endif // HAVE_LIBNUMA + buf_pool_ptr = (buf_pool_t*) mem_zalloc( n_instances * sizeof *buf_pool_ptr); @@ -1462,6 +1497,18 @@ buf_pool_init( btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64); +#ifdef HAVE_LIBNUMA + if (srv_numa_interleave) { + ib_logf(IB_LOG_LEVEL_INFO, + "Setting NUMA memory policy to MPOL_DEFAULT"); + if (set_mempolicy(MPOL_DEFAULT, NULL, 0) != 0) { + ib_logf(IB_LOG_LEVEL_WARN, + "Failed to set NUMA memory policy to" + " MPOL_DEFAULT (error: %s).", strerror(errno)); + } + } +#endif // HAVE_LIBNUMA + return(DB_SUCCESS); } diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc index 78b4cc77945..e530ec9e97a 100644 --- a/storage/innobase/dict/dict0dict.cc +++ b/storage/innobase/dict/dict0dict.cc @@ -207,14 +207,6 @@ dict_index_remove_from_cache_low( dict_index_t* index, /*!< in, own: index */ ibool lru_evict); /*!< in: TRUE if page being evicted to make room in the table LRU list */ -/**********************************************************************//** -Removes a table object from the dictionary cache. */ -static -void -dict_table_remove_from_cache_low( -/*=============================*/ - dict_table_t* table, /*!< in, own: table */ - ibool lru_evict); /*!< in: TRUE if evicting from LRU */ #ifdef UNIV_DEBUG /**********************************************************************//** Validate the dictionary table LRU list. @@ -748,6 +740,45 @@ dict_table_get_all_fts_indexes( return(ib_vector_size(indexes)); } +/** Store autoinc value when the table is evicted. +@param[in] table table evicted */ +UNIV_INTERN +void +dict_table_autoinc_store( + const dict_table_t* table) +{ + ut_ad(mutex_own(&dict_sys->mutex)); + + if (table->autoinc != 0) { + ut_ad(dict_sys->autoinc_map->find(table->id) + == dict_sys->autoinc_map->end()); + + dict_sys->autoinc_map->insert( + std::pair<table_id_t, ib_uint64_t>( + table->id, table->autoinc)); + } +} + +/** Restore autoinc value when the table is loaded. +@param[in] table table loaded */ +UNIV_INTERN +void +dict_table_autoinc_restore( + dict_table_t* table) +{ + ut_ad(mutex_own(&dict_sys->mutex)); + + autoinc_map_t::iterator it; + it = dict_sys->autoinc_map->find(table->id); + + if (it != dict_sys->autoinc_map->end()) { + table->autoinc = it->second; + ut_ad(table->autoinc != 0); + + dict_sys->autoinc_map->erase(it); + } +} + /********************************************************************//** Reads the next autoinc value (== autoinc counter value), 0 if not yet initialized. @@ -1041,6 +1072,8 @@ dict_init(void) mutex_create(dict_foreign_err_mutex_key, &dict_foreign_err_mutex, SYNC_NO_ORDER_CHECK); } + + dict_sys->autoinc_map = new autoinc_map_t(); } /**********************************************************************//** @@ -1288,6 +1321,8 @@ dict_table_add_to_cache( UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_non_LRU, table); } + dict_table_autoinc_restore(table); + ut_ad(dict_lru_validate()); dict_sys->size += mem_heap_get_size(table->heap) @@ -1978,7 +2013,6 @@ dict_table_change_id_in_cache( /**********************************************************************//** Removes a table object from the dictionary cache. */ -static void dict_table_remove_from_cache_low( /*=============================*/ @@ -2040,6 +2074,10 @@ dict_table_remove_from_cache_low( ut_ad(dict_lru_validate()); + if (lru_evict) { + dict_table_autoinc_store(table); + } + if (lru_evict && table->drop_aborted) { /* Do as dict_table_try_drop_aborted() does. */ @@ -6330,6 +6368,8 @@ dict_close(void) mutex_free(&dict_foreign_err_mutex); } + delete dict_sys->autoinc_map; + mem_free(dict_sys); dict_sys = NULL; } diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index d3a81e36bca..22d51a439e1 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -6495,7 +6495,7 @@ ha_innobase::write_row( DBUG_ENTER("ha_innobase::write_row"); - if (srv_read_only_mode) { + if (high_level_read_only) { ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); DBUG_RETURN(HA_ERR_TABLE_READONLY); } else if (prebuilt->trx != trx) { @@ -7039,7 +7039,7 @@ ha_innobase::update_row( ut_a(prebuilt->trx == trx); - if (srv_read_only_mode) { + if (high_level_read_only) { ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); DBUG_RETURN(HA_ERR_TABLE_READONLY); } else if (!trx_is_started(trx)) { @@ -7171,7 +7171,7 @@ ha_innobase::delete_row( ut_a(prebuilt->trx == trx); - if (srv_read_only_mode) { + if (high_level_read_only) { ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); DBUG_RETURN(HA_ERR_TABLE_READONLY); } else if (!trx_is_started(trx)) { @@ -9499,7 +9499,7 @@ ha_innobase::create( if (form->s->fields > REC_MAX_N_USER_FIELDS) { DBUG_RETURN(HA_ERR_TOO_MANY_FIELDS); - } else if (srv_read_only_mode) { + } else if (high_level_read_only) { DBUG_RETURN(HA_ERR_INNODB_READ_ONLY); } @@ -9829,7 +9829,7 @@ ha_innobase::discard_or_import_tablespace( ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N); ut_a(prebuilt->trx == thd_to_trx(ha_thd())); - if (srv_read_only_mode) { + if (high_level_read_only) { DBUG_RETURN(HA_ERR_TABLE_READONLY); } @@ -9923,7 +9923,7 @@ ha_innobase::truncate() DBUG_ENTER("ha_innobase::truncate"); - if (srv_read_only_mode) { + if (high_level_read_only) { DBUG_RETURN(HA_ERR_TABLE_READONLY); } @@ -10274,7 +10274,7 @@ ha_innobase::rename_table( DBUG_ENTER("ha_innobase::rename_table"); - if (srv_read_only_mode) { + if (high_level_read_only) { ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE); DBUG_RETURN(HA_ERR_TABLE_READONLY); } @@ -14288,6 +14288,12 @@ innodb_internal_table_validate( } dict_table_close(user_table, FALSE, TRUE); + + DBUG_EXECUTE_IF("innodb_evict_autoinc_table", + mutex_enter(&dict_sys->mutex); + dict_table_remove_from_cache_low(user_table, TRUE); + mutex_exit(&dict_sys->mutex); + ); } return(ret); @@ -16301,6 +16307,13 @@ static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio, "Use native AIO if supported on this platform.", NULL, NULL, TRUE); +#ifdef HAVE_LIBNUMA +static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Use NUMA interleave memory policy to allocate InnoDB buffer pool.", + NULL, NULL, FALSE); +#endif // HAVE_LIBNUMA + static MYSQL_SYSVAR_BOOL(api_enable_binlog, ib_binlog_enabled, PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, "Enable binlog for applications direct access InnoDB through InnoDB APIs", @@ -16579,6 +16592,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(version), MYSQL_SYSVAR(use_sys_malloc), MYSQL_SYSVAR(use_native_aio), +#ifdef HAVE_LIBNUMA + MYSQL_SYSVAR(numa_interleave), +#endif // HAVE_LIBNUMA MYSQL_SYSVAR(change_buffering), MYSQL_SYSVAR(change_buffer_max_size), #if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc index 96cabae3f0d..2354f5537cb 100644 --- a/storage/innobase/handler/handler0alter.cc +++ b/storage/innobase/handler/handler0alter.cc @@ -234,7 +234,7 @@ ha_innobase::check_if_supported_inplace_alter( { DBUG_ENTER("check_if_supported_inplace_alter"); - if (srv_read_only_mode) { + if (high_level_read_only) { ha_alter_info->unsupported_reason = innobase_get_err_msg(ER_READ_ONLY_MODE); DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED); diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index d770449e851..dea4a9a2a50 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -309,6 +309,21 @@ dict_table_autoinc_initialize( dict_table_t* table, /*!< in/out: table */ ib_uint64_t value) /*!< in: next value to assign to a row */ __attribute__((nonnull)); + +/** Store autoinc value when the table is evicted. +@param[in] table table evicted */ +UNIV_INTERN +void +dict_table_autoinc_store( + const dict_table_t* table); + +/** Restore autoinc value when the table is loaded. +@param[in] table table loaded */ +UNIV_INTERN +void +dict_table_autoinc_restore( + dict_table_t* table); + /********************************************************************//** Reads the next autoinc value (== autoinc counter value), 0 if not yet initialized. @@ -368,6 +383,15 @@ dict_table_remove_from_cache( dict_table_t* table) /*!< in, own: table */ __attribute__((nonnull)); /**********************************************************************//** +Removes a table object from the dictionary cache. */ +UNIV_INTERN +void +dict_table_remove_from_cache_low( +/*=============================*/ + dict_table_t* table, /*!< in, own: table */ + ibool lru_evict); /*!< in: TRUE if table being evicted + to make room in the table LRU list */ +/**********************************************************************//** Renames a table object. @return TRUE if success */ UNIV_INTERN @@ -1543,6 +1567,8 @@ extern dict_sys_t* dict_sys; /** the data dictionary rw-latch protecting dict_sys */ extern rw_lock_t dict_operation_lock; +typedef std::map<table_id_t, ib_uint64_t> autoinc_map_t; + /* Dictionary system struct */ struct dict_sys_t{ ib_mutex_t mutex; /*!< mutex protecting the data @@ -1577,6 +1603,8 @@ struct dict_sys_t{ UT_LIST_BASE_NODE_T(dict_table_t) table_non_LRU; /*!< List of tables that can't be evicted from the cache */ + autoinc_map_t* autoinc_map; /*!< Map to store table id and autoinc + when table is evicted */ }; #endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic index 21747fdceac..a5df9f7b6b4 100644 --- a/storage/innobase/include/ibuf0ibuf.ic +++ b/storage/innobase/include/ibuf0ibuf.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -128,7 +128,8 @@ ibuf_should_try( && ibuf->max_size != 0 && !dict_index_is_clust(index) && index->table->quiesce == QUIESCE_NONE - && (ignore_sec_unique || !dict_index_is_unique(index))); + && (ignore_sec_unique || !dict_index_is_unique(index)) + && srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE); } /******************************************************************//** diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h index 57b29fff663..9329a0effb4 100644 --- a/storage/innobase/include/os0sync.h +++ b/storage/innobase/include/os0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -38,6 +38,26 @@ Created 9/6/1995 Heikki Tuuri #include "ut0lst.h" #include "sync0types.h" +#if defined __i386__ || defined __x86_64__ || defined _M_IX86 \ + || defined _M_X64 || defined __WIN__ + +#define IB_STRONG_MEMORY_MODEL + +#endif /* __i386__ || __x86_64__ || _M_IX86 || _M_X64 || __WIN__ */ + +#ifdef HAVE_WINDOWS_ATOMICS +typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates + on LONG variable */ +#elif defined(HAVE_ATOMIC_BUILTINS) && !defined(HAVE_ATOMIC_BUILTINS_BYTE) +typedef ulint lock_word_t; +#else + +#define IB_LOCK_WORD_IS_BYTE + +typedef byte lock_word_t; + +#endif /* HAVE_WINDOWS_ATOMICS */ + #ifdef __WIN__ /** Native event (slow)*/ typedef HANDLE os_native_event_t; @@ -429,14 +449,61 @@ amount to decrement. */ # define os_atomic_decrement_uint64(ptr, amount) \ os_atomic_decrement(ptr, amount) -/**********************************************************//** -Returns the old value of *ptr, atomically sets *ptr to new_val */ - -# define os_atomic_test_and_set_byte(ptr, new_val) \ - __sync_lock_test_and_set(ptr, (byte) new_val) - -# define os_atomic_test_and_set_ulint(ptr, new_val) \ - __sync_lock_test_and_set(ptr, new_val) +# if defined(HAVE_IB_GCC_ATOMIC_TEST_AND_SET) + +/** Do an atomic test-and-set. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(__atomic_test_and_set(ptr, __ATOMIC_ACQUIRE)); +} + +/** Do an atomic clear. +@param[in,out] ptr Memory location to set to zero */ +inline +void +os_atomic_clear(volatile lock_word_t* ptr) +{ + __atomic_clear(ptr, __ATOMIC_RELEASE); +} + +# elif defined(IB_STRONG_MEMORY_MODEL) + +/** Do an atomic test and set. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(__sync_lock_test_and_set(ptr, 1)); +} + +/** Do an atomic release. + +In theory __sync_lock_release should be used to release the lock. +Unfortunately, it does not work properly alone. The workaround is +that more conservative __sync_lock_test_and_set is used instead. + +Performance regression was observed at some conditions for Intel +architecture. Disable release barrier on Intel architecture for now. +@param[in,out] ptr Memory location to write to +@return the previous value */ +inline +lock_word_t +os_atomic_clear(volatile lock_word_t* ptr) +{ + return(__sync_lock_test_and_set(ptr, 0)); +} + +# else + +# error "Unsupported platform" + +# endif /* HAVE_IB_GCC_ATOMIC_TEST_AND_SET */ #elif defined(HAVE_IB_SOLARIS_ATOMICS) @@ -511,14 +578,51 @@ amount to decrement. */ # define os_atomic_decrement_uint64(ptr, amount) \ os_atomic_increment_uint64(ptr, -(amount)) -/**********************************************************//** -Returns the old value of *ptr, atomically sets *ptr to new_val */ - -# define os_atomic_test_and_set_byte(ptr, new_val) \ - atomic_swap_uchar(ptr, new_val) - -# define os_atomic_test_and_set_ulint(ptr, new_val) \ - atomic_swap_ulong(ptr, new_val) +# ifdef IB_LOCK_WORD_IS_BYTE + +/** Do an atomic xchg and set to non-zero. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(atomic_swap_uchar(ptr, 1)); +} + +/** Do an atomic xchg and set to zero. +@param[in,out] ptr Memory location to set to zero +@return the previous value */ +inline +lock_word_t +os_atomic_clear(volatile lock_word_t* ptr) +{ + return(atomic_swap_uchar(ptr, 0)); +} + +# else + +/** Do an atomic xchg and set to non-zero. +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(atomic_swap_ulong(ptr, 1)); +} + +/** Do an atomic xchg and set to zero. +@param[in,out] ptr Memory location to set to zero +@return the previous value */ +inline +lock_word_t +os_atomic_clear(volatile lock_word_t* ptr) +{ + return(atomic_swap_ulong(ptr, 0)); +} + +# endif /* IB_LOCK_WORD_IS_BYTE */ #elif defined(HAVE_WINDOWS_ATOMICS) @@ -633,16 +737,27 @@ amount to decrement. There is no atomic substract function on Windows */ (ib_int64_t*) ptr, \ -(ib_int64_t) amount) - amount)) -/**********************************************************//** -Returns the old value of *ptr, atomically sets *ptr to new_val. -InterlockedExchange() operates on LONG, and the LONG will be -clobbered */ - -# define os_atomic_test_and_set_byte(ptr, new_val) \ - ((byte) InterlockedExchange(ptr, new_val)) - -# define os_atomic_test_and_set_ulong(ptr, new_val) \ - InterlockedExchange(ptr, new_val) +/** Do an atomic test and set. +InterlockedExchange() operates on LONG, and the LONG will be clobbered +@param[in,out] ptr Memory location to set to non-zero +@return the previous value */ +inline +lock_word_t +os_atomic_test_and_set(volatile lock_word_t* ptr) +{ + return(InterlockedExchange(ptr, 1)); +} + +/** Do an atomic release. +InterlockedExchange() operates on LONG, and the LONG will be clobbered +@param[in,out] ptr Memory location to set to zero +@return the previous value */ +inline +lock_word_t +os_atomic_clear(volatile lock_word_t* ptr) +{ + return(InterlockedExchange(ptr, 0)); +} #else # define IB_ATOMICS_STARTUP_MSG \ @@ -692,7 +807,7 @@ for synchronization */ } while (0); /** barrier definitions for memory ordering */ -#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__ +#ifdef IB_STRONG_MEMORY_MODEL /* Performance regression was observed at some conditions for Intel architecture. Disable memory barrier for Intel architecture for now. */ # define os_rmb diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 7a6c9f93e3d..6e2f76af30d 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, 2009, Google Inc. Copyright (c) 2009, Percona Inc. @@ -187,6 +187,9 @@ extern char* srv_arch_dir; recovery and open all tables in RO mode instead of RW mode. We don't sync the max trx id to disk either. */ extern my_bool srv_read_only_mode; +/** Set if InnoDB operates in read-only mode or innodb-force-recovery +is greater than SRV_FORCE_NO_TRX_UNDO. */ +extern my_bool high_level_read_only; /** store to its own file each table created by an user; data dictionary tables are in the system tablespace 0 */ extern my_bool srv_file_per_table; @@ -217,6 +220,7 @@ OS (provided we compiled Innobase with it in), otherwise we will use simulated aio we build below with threads. Currently we support native aio on windows and linux */ extern my_bool srv_use_native_aio; +extern my_bool srv_numa_interleave; #ifdef __WIN__ extern ibool srv_use_native_conditions; #endif /* __WIN__ */ @@ -875,6 +879,7 @@ struct srv_slot_t{ #else /* !UNIV_HOTBACKUP */ # define srv_use_adaptive_hash_indexes FALSE # define srv_use_native_aio FALSE +# define srv_numa_interleave FALSE # define srv_force_recovery 0UL # define srv_set_io_thread_op_info(t,info) ((void) 0) # define srv_reset_io_thread_op_info() ((void) 0) diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h index 82fb353a41b..d6f8d8f5e4c 100644 --- a/storage/innobase/include/sync0sync.h +++ b/storage/innobase/include/sync0sync.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Copyright (c) 2012, Facebook Inc. @@ -46,15 +46,6 @@ Created 9/5/1995 Heikki Tuuri extern "C" my_bool timed_mutexes; #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ -#ifdef HAVE_WINDOWS_ATOMICS -typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates - on LONG variable */ -#elif defined(HAVE_ATOMIC_BUILTINS) && !defined(HAVE_ATOMIC_BUILTINS_BYTE) -typedef ulint lock_word_t; -#else -typedef byte lock_word_t; -#endif - #if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK /* By default, buffer mutexes and rwlocks will be excluded from diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic index 616e53d4aac..9a062db71a8 100644 --- a/storage/innobase/include/sync0sync.ic +++ b/storage/innobase/include/sync0sync.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -74,17 +74,13 @@ Performs an atomic test-and-set instruction to the lock_word field of a mutex. @return the previous value of lock_word: 0 or 1 */ UNIV_INLINE -byte +lock_word_t ib_mutex_test_and_set( -/*===============*/ +/*==================*/ ib_mutex_t* mutex) /*!< in: mutex */ { #if defined(HAVE_ATOMIC_BUILTINS) -# if defined(HAVE_ATOMIC_BUILTINS_BYTE) - return(os_atomic_test_and_set_byte(&mutex->lock_word, 1)); -# else - return(os_atomic_test_and_set_ulint(&mutex->lock_word, 1)); -# endif + return(os_atomic_test_and_set(&mutex->lock_word)); #else ibool ret; @@ -100,7 +96,7 @@ ib_mutex_test_and_set( } return((byte) ret); -#endif +#endif /* HAVE_ATOMIC_BUILTINS */ } /******************************************************************//** @@ -113,19 +109,12 @@ mutex_reset_lock_word( ib_mutex_t* mutex) /*!< in: mutex */ { #if defined(HAVE_ATOMIC_BUILTINS) - /* In theory __sync_lock_release should be used to release the lock. - Unfortunately, it does not work properly alone. The workaround is - that more conservative __sync_lock_test_and_set is used instead. */ -# if defined(HAVE_ATOMIC_BUILTINS_BYTE) - os_atomic_test_and_set_byte(&mutex->lock_word, 0); -# else - os_atomic_test_and_set_ulint(&mutex->lock_word, 0); -# endif + os_atomic_clear(&mutex->lock_word); #else mutex->lock_word = 0; os_fast_mutex_unlock(&(mutex->os_fast_mutex)); -#endif +#endif /* HAVE_ATOMIC_BUILTINS */ } /******************************************************************//** diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 9d047c4cc3b..b5e4df316ad 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -2634,8 +2634,8 @@ lock_rec_inherit_to_gap( /* If srv_locks_unsafe_for_binlog is TRUE or session is using READ COMMITTED isolation level, we do not want locks set by an UPDATE or a DELETE to be inherited as gap type locks. But we - DO want S-locks set by a consistency constraint to be inherited also - then. */ + DO want S-locks/X-locks(taken for replace) set by a consistency + constraint to be inherited also then */ for (lock = lock_rec_get_first(block, heap_no); lock != NULL; @@ -2645,7 +2645,8 @@ lock_rec_inherit_to_gap( && !((srv_locks_unsafe_for_binlog || lock->trx->isolation_level <= TRX_ISO_READ_COMMITTED) - && lock_get_mode(lock) == LOCK_X)) { + && lock_get_mode(lock) == + (lock->trx->duplicates ? LOCK_S : LOCK_X))) { lock_rec_add_to_queue( LOCK_REC | LOCK_GAP | lock_get_mode(lock), diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc index d0e0453849e..3ff4a9d7d1e 100644 --- a/storage/innobase/log/log0log.cc +++ b/storage/innobase/log/log0log.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2009, Google Inc. Portions of this file contain modifications contributed and copyrighted by @@ -3406,11 +3406,7 @@ loop: lsn = log_sys->lsn; - ut_ad(srv_force_recovery != SRV_FORCE_NO_LOG_REDO - || lsn == log_sys->last_checkpoint_lsn + LOG_BLOCK_HDR_SIZE); - - if ((srv_force_recovery != SRV_FORCE_NO_LOG_REDO - && lsn != log_sys->last_checkpoint_lsn) + if (lsn != log_sys->last_checkpoint_lsn #ifdef UNIV_LOG_ARCHIVE || (srv_log_archive_on && lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE) diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc index 9370f8cef34..f0f7e5fcdf0 100644 --- a/storage/innobase/row/row0ins.cc +++ b/storage/innobase/row/row0ins.cc @@ -2709,6 +2709,8 @@ row_ins_sec_index_entry_low( goto func_exit; } + DEBUG_SYNC_C("row_ins_sec_index_entry_dup_locks_created"); + /* We did not find a duplicate and we have now locked with s-locks the necessary records to prevent any insertion of a duplicate by another diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc index f4ea8895d2f..ad35307c8b2 100644 --- a/storage/innobase/srv/srv0srv.cc +++ b/storage/innobase/srv/srv0srv.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, 2009 Google Inc. Copyright (c) 2009, Percona Inc. @@ -126,6 +126,9 @@ UNIV_INTERN ulint srv_file_format = 0; UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to set it to the highest format we support. */ UNIV_INTERN ulint srv_max_file_format_at_startup = UNIV_FORMAT_MAX; +/** Set if InnoDB operates in read-only mode or innodb-force-recovery +is greater than SRV_FORCE_NO_TRX_UNDO. */ +UNIV_INTERN my_bool high_level_read_only; #if UNIV_FORMAT_A # error "UNIV_FORMAT_A must be 0!" @@ -144,6 +147,7 @@ OS (provided we compiled Innobase with it in), otherwise we will use simulated aio we build below with threads. Currently we support native aio on windows and linux */ UNIV_INTERN my_bool srv_use_native_aio = TRUE; +UNIV_INTERN my_bool srv_numa_interleave = FALSE; #ifdef __WIN__ /* Windows native condition variables. We use runtime loading / function @@ -2581,13 +2585,8 @@ srv_do_purge( } n_pages_purged = trx_purge( - n_use_threads, srv_purge_batch_size, false); - - if (!(count++ % TRX_SYS_N_RSEGS)) { - /* Force a truncate of the history list. */ - n_pages_purged += trx_purge( - 1, srv_purge_batch_size, true); - } + n_use_threads, srv_purge_batch_size, + (++count % TRX_SYS_N_RSEGS) == 0); *n_total_purged += n_pages_purged; @@ -2780,8 +2779,17 @@ DECLARE_THREAD(srv_purge_coordinator_thread)( n_pages_purged = trx_purge(1, srv_purge_batch_size, false); } - /* Force a truncate of the history list. */ - n_pages_purged = trx_purge(1, srv_purge_batch_size, true); + /* This trx_purge is called to remove any undo records (added by + background threads) after completion of the above loop. When + srv_fast_shutdown != 0, a large batch size can cause significant + delay in shutdown ,so reducing the batch size to magic number 20 + (which was default in 5.5), which we hope will be sufficient to + remove all the undo records */ + const uint temp_batch_size = 20; + + n_pages_purged = trx_purge(1, srv_purge_batch_size <= temp_batch_size + ? srv_purge_batch_size : temp_batch_size, + true); ut_a(n_pages_purged == 0 || srv_fast_shutdown != 0); /* The task queue should always be empty, independent of fast diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc index d888d13d863..8a9afd561a9 100644 --- a/storage/innobase/srv/srv0start.cc +++ b/storage/innobase/srv/srv0start.cc @@ -1545,9 +1545,8 @@ innobase_start_or_create_for_mysql(void) char* logfile0 = NULL; size_t dirnamelen; - if (srv_force_recovery > SRV_FORCE_NO_TRX_UNDO) { - srv_read_only_mode = true; - } + high_level_read_only = srv_read_only_mode + || srv_force_recovery > SRV_FORCE_NO_TRX_UNDO; if (srv_read_only_mode) { ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode"); diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc index 5eb3cef46c1..e5f03f4b96a 100644 --- a/storage/innobase/trx/trx0sys.cc +++ b/storage/innobase/trx/trx0sys.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -29,7 +29,10 @@ Created 3/26/1996 Heikki Tuuri #include "trx0sys.ic" #endif -#ifndef UNIV_HOTBACKUP +#ifdef UNIV_HOTBACKUP +#include "fsp0types.h" + +#else /* !UNIV_HOTBACKUP */ #include "fsp0fsp.h" #include "mtr0log.h" #include "mtr0log.h" @@ -1115,18 +1118,15 @@ trx_sys_read_pertable_file_format_id( /* get the file format from the page */ ptr = page + 54; flags = mach_read_from_4(ptr); - if (flags == 0) { - /* file format is Antelope */ - *format_id = 0; - return(TRUE); - } else if (flags & 1) { - /* tablespace flags are ok */ - *format_id = (flags / 32) % 128; - return(TRUE); - } else { + + if (!fsp_flags_is_valid(flags) { /* bad tablespace flags */ return(FALSE); } + + *format_id = FSP_FLAGS_GET_POST_ANTELOPE(flags); + + return(TRUE); } |