summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergei Golubchik <serg@mariadb.org>2015-10-09 17:21:46 +0200
committerSergei Golubchik <serg@mariadb.org>2015-10-09 17:21:46 +0200
commit86ff4da14dc53659e88ee8cd66412045dcb26e31 (patch)
tree20c7b23d709799f0ba891c5aa0373fe608019970
parent56544122c75578a23a728e6acafc72dfc449484e (diff)
downloadmariadb-git-86ff4da14dc53659e88ee8cd66412045dcb26e31.tar.gz
5.6.27
-rw-r--r--storage/innobase/CMakeLists.txt21
-rw-r--r--storage/innobase/btr/btr0cur.cc34
-rw-r--r--storage/innobase/buf/buf0buf.cc47
-rw-r--r--storage/innobase/dict/dict0dict.cc58
-rw-r--r--storage/innobase/handler/ha_innodb.cc30
-rw-r--r--storage/innobase/handler/handler0alter.cc2
-rw-r--r--storage/innobase/include/dict0dict.h28
-rw-r--r--storage/innobase/include/ibuf0ibuf.ic5
-rw-r--r--storage/innobase/include/os0sync.h171
-rw-r--r--storage/innobase/include/srv0srv.h7
-rw-r--r--storage/innobase/include/sync0sync.h11
-rw-r--r--storage/innobase/include/sync0sync.ic25
-rw-r--r--storage/innobase/lock/lock0lock.cc7
-rw-r--r--storage/innobase/log/log0log.cc8
-rw-r--r--storage/innobase/row/row0ins.cc2
-rw-r--r--storage/innobase/srv/srv0srv.cc28
-rw-r--r--storage/innobase/srv/srv0start.cc5
-rw-r--r--storage/innobase/trx/trx0sys.cc22
18 files changed, 393 insertions, 118 deletions
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index eeb53f96c9f..2e939899d24 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -29,6 +29,9 @@ IF(UNIX)
ADD_DEFINITIONS(-DLINUX_NATIVE_AIO=1)
LINK_LIBRARIES(aio)
ENDIF()
+ IF(HAVE_LIBNUMA)
+ LINK_LIBRARIES(numa)
+ ENDIF()
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "HP*")
ADD_DEFINITIONS("-DUNIV_HPUX")
ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "AIX")
@@ -145,6 +148,18 @@ IF(NOT CMAKE_CROSSCOMPILING)
}"
HAVE_IB_GCC_ATOMIC_THREAD_FENCE
)
+ CHECK_C_SOURCE_RUNS(
+ "#include<stdint.h>
+ int main()
+ {
+ unsigned char c;
+
+ __atomic_test_and_set(&c, __ATOMIC_ACQUIRE);
+ __atomic_clear(&c, __ATOMIC_RELEASE);
+ return(0);
+ }"
+ HAVE_IB_GCC_ATOMIC_TEST_AND_SET
+ )
ENDIF()
IF(HAVE_IB_GCC_ATOMIC_BUILTINS)
@@ -167,6 +182,10 @@ IF(HAVE_IB_GCC_ATOMIC_THREAD_FENCE)
ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_THREAD_FENCE=1)
ENDIF()
+IF(HAVE_IB_GCC_ATOMIC_TEST_AND_SET)
+ ADD_DEFINITIONS(-DHAVE_IB_GCC_ATOMIC_TEST_AND_SET=1)
+ENDIF()
+
# either define HAVE_IB_ATOMIC_PTHREAD_T_GCC or not
IF(NOT CMAKE_CROSSCOMPILING)
CHECK_C_SOURCE_RUNS(
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index 1611fb6394c..ad323531da6 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2012, Facebook Inc.
@@ -2117,6 +2117,7 @@ btr_cur_optimistic_update(
ulint max_size;
ulint new_rec_size;
ulint old_rec_size;
+ ulint max_ins_size = 0;
dtuple_t* new_entry;
roll_ptr_t roll_ptr;
ulint i;
@@ -2245,6 +2246,10 @@ any_extern:
: (old_rec_size
+ page_get_max_insert_size_after_reorganize(page, 1));
+ if (!page_zip) {
+ max_ins_size = page_get_max_insert_size_after_reorganize(page, 1);
+ }
+
if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)
&& (max_size >= new_rec_size))
|| (page_get_n_recs(page) <= 1))) {
@@ -2304,12 +2309,15 @@ any_extern:
ut_ad(err == DB_SUCCESS);
func_exit:
- if (page_zip
- && !(flags & BTR_KEEP_IBUF_BITMAP)
+ if (!(flags & BTR_KEEP_IBUF_BITMAP)
&& !dict_index_is_clust(index)
&& page_is_leaf(page)) {
- /* Update the free bits in the insert buffer. */
- ibuf_update_free_bits_zip(block, mtr);
+
+ if (page_zip) {
+ ibuf_update_free_bits_zip(block, mtr);
+ } else {
+ ibuf_update_free_bits_low(block, max_ins_size, mtr);
+ }
}
return(err);
@@ -2444,6 +2452,7 @@ btr_cur_pessimistic_update(
ibool was_first;
ulint n_reserved = 0;
ulint n_ext;
+ ulint max_ins_size = 0;
*offsets = NULL;
*big_rec = NULL;
@@ -2622,6 +2631,10 @@ make_external:
}
}
+ if (!page_zip) {
+ max_ins_size = page_get_max_insert_size_after_reorganize(page, 1);
+ }
+
/* Store state of explicit locks on rec on the page infimum record,
before deleting rec. The page infimum acts as a dummy carrier of the
locks, taking care also of lock releases, before we can move the locks
@@ -2667,13 +2680,18 @@ make_external:
rec_offs_make_valid(
page_cursor->rec, index, *offsets);
}
- } else if (page_zip &&
- !dict_index_is_clust(index)
+ } else if (!dict_index_is_clust(index)
&& page_is_leaf(page)) {
+
/* Update the free bits in the insert buffer.
This is the same block which was skipped by
BTR_KEEP_IBUF_BITMAP. */
- ibuf_update_free_bits_zip(block, mtr);
+ if (page_zip) {
+ ibuf_update_free_bits_zip(block, mtr);
+ } else {
+ ibuf_update_free_bits_low(block, max_ins_size,
+ mtr);
+ }
}
err = DB_SUCCESS;
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 311e3326f2b..85e44294e60 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -53,6 +53,10 @@ Created 11/5/1995 Heikki Tuuri
#include "page0zip.h"
#include "srv0mon.h"
#include "buf0checksum.h"
+#ifdef HAVE_LIBNUMA
+#include <numa.h>
+#include <numaif.h>
+#endif // HAVE_LIBNUMA
/*
IMPLEMENTATION OF THE BUFFER POOL
@@ -1112,6 +1116,22 @@ buf_chunk_init(
return(NULL);
}
+#ifdef HAVE_LIBNUMA
+ if (srv_numa_interleave) {
+ int st = mbind(chunk->mem, chunk->mem_size,
+ MPOL_INTERLEAVE,
+ numa_all_nodes_ptr->maskp,
+ numa_all_nodes_ptr->size,
+ MPOL_MF_MOVE);
+ if (st != 0) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Failed to set NUMA memory policy of buffer"
+ " pool page frames to MPOL_INTERLEAVE"
+ " (error: %s).", strerror(errno));
+ }
+ }
+#endif // HAVE_LIBNUMA
+
/* Allocate the block descriptors from
the start of the memory block. */
chunk->blocks = (buf_block_t*) chunk->mem;
@@ -1442,6 +1462,21 @@ buf_pool_init(
ut_ad(n_instances <= MAX_BUFFER_POOLS);
ut_ad(n_instances == srv_buf_pool_instances);
+#ifdef HAVE_LIBNUMA
+ if (srv_numa_interleave) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Setting NUMA memory policy to MPOL_INTERLEAVE");
+ if (set_mempolicy(MPOL_INTERLEAVE,
+ numa_all_nodes_ptr->maskp,
+ numa_all_nodes_ptr->size) != 0) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Failed to set NUMA memory policy to"
+ " MPOL_INTERLEAVE (error: %s).",
+ strerror(errno));
+ }
+ }
+#endif // HAVE_LIBNUMA
+
buf_pool_ptr = (buf_pool_t*) mem_zalloc(
n_instances * sizeof *buf_pool_ptr);
@@ -1462,6 +1497,18 @@ buf_pool_init(
btr_search_sys_create(buf_pool_get_curr_size() / sizeof(void*) / 64);
+#ifdef HAVE_LIBNUMA
+ if (srv_numa_interleave) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Setting NUMA memory policy to MPOL_DEFAULT");
+ if (set_mempolicy(MPOL_DEFAULT, NULL, 0) != 0) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Failed to set NUMA memory policy to"
+ " MPOL_DEFAULT (error: %s).", strerror(errno));
+ }
+ }
+#endif // HAVE_LIBNUMA
+
return(DB_SUCCESS);
}
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index 78b4cc77945..e530ec9e97a 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -207,14 +207,6 @@ dict_index_remove_from_cache_low(
dict_index_t* index, /*!< in, own: index */
ibool lru_evict); /*!< in: TRUE if page being evicted
to make room in the table LRU list */
-/**********************************************************************//**
-Removes a table object from the dictionary cache. */
-static
-void
-dict_table_remove_from_cache_low(
-/*=============================*/
- dict_table_t* table, /*!< in, own: table */
- ibool lru_evict); /*!< in: TRUE if evicting from LRU */
#ifdef UNIV_DEBUG
/**********************************************************************//**
Validate the dictionary table LRU list.
@@ -748,6 +740,45 @@ dict_table_get_all_fts_indexes(
return(ib_vector_size(indexes));
}
+/** Store autoinc value when the table is evicted.
+@param[in] table table evicted */
+UNIV_INTERN
+void
+dict_table_autoinc_store(
+ const dict_table_t* table)
+{
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ if (table->autoinc != 0) {
+ ut_ad(dict_sys->autoinc_map->find(table->id)
+ == dict_sys->autoinc_map->end());
+
+ dict_sys->autoinc_map->insert(
+ std::pair<table_id_t, ib_uint64_t>(
+ table->id, table->autoinc));
+ }
+}
+
+/** Restore autoinc value when the table is loaded.
+@param[in] table table loaded */
+UNIV_INTERN
+void
+dict_table_autoinc_restore(
+ dict_table_t* table)
+{
+ ut_ad(mutex_own(&dict_sys->mutex));
+
+ autoinc_map_t::iterator it;
+ it = dict_sys->autoinc_map->find(table->id);
+
+ if (it != dict_sys->autoinc_map->end()) {
+ table->autoinc = it->second;
+ ut_ad(table->autoinc != 0);
+
+ dict_sys->autoinc_map->erase(it);
+ }
+}
+
/********************************************************************//**
Reads the next autoinc value (== autoinc counter value), 0 if not yet
initialized.
@@ -1041,6 +1072,8 @@ dict_init(void)
mutex_create(dict_foreign_err_mutex_key,
&dict_foreign_err_mutex, SYNC_NO_ORDER_CHECK);
}
+
+ dict_sys->autoinc_map = new autoinc_map_t();
}
/**********************************************************************//**
@@ -1288,6 +1321,8 @@ dict_table_add_to_cache(
UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_non_LRU, table);
}
+ dict_table_autoinc_restore(table);
+
ut_ad(dict_lru_validate());
dict_sys->size += mem_heap_get_size(table->heap)
@@ -1978,7 +2013,6 @@ dict_table_change_id_in_cache(
/**********************************************************************//**
Removes a table object from the dictionary cache. */
-static
void
dict_table_remove_from_cache_low(
/*=============================*/
@@ -2040,6 +2074,10 @@ dict_table_remove_from_cache_low(
ut_ad(dict_lru_validate());
+ if (lru_evict) {
+ dict_table_autoinc_store(table);
+ }
+
if (lru_evict && table->drop_aborted) {
/* Do as dict_table_try_drop_aborted() does. */
@@ -6330,6 +6368,8 @@ dict_close(void)
mutex_free(&dict_foreign_err_mutex);
}
+ delete dict_sys->autoinc_map;
+
mem_free(dict_sys);
dict_sys = NULL;
}
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index d3a81e36bca..22d51a439e1 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -6495,7 +6495,7 @@ ha_innobase::write_row(
DBUG_ENTER("ha_innobase::write_row");
- if (srv_read_only_mode) {
+ if (high_level_read_only) {
ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
DBUG_RETURN(HA_ERR_TABLE_READONLY);
} else if (prebuilt->trx != trx) {
@@ -7039,7 +7039,7 @@ ha_innobase::update_row(
ut_a(prebuilt->trx == trx);
- if (srv_read_only_mode) {
+ if (high_level_read_only) {
ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
DBUG_RETURN(HA_ERR_TABLE_READONLY);
} else if (!trx_is_started(trx)) {
@@ -7171,7 +7171,7 @@ ha_innobase::delete_row(
ut_a(prebuilt->trx == trx);
- if (srv_read_only_mode) {
+ if (high_level_read_only) {
ib_senderrf(ha_thd(), IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
DBUG_RETURN(HA_ERR_TABLE_READONLY);
} else if (!trx_is_started(trx)) {
@@ -9499,7 +9499,7 @@ ha_innobase::create(
if (form->s->fields > REC_MAX_N_USER_FIELDS) {
DBUG_RETURN(HA_ERR_TOO_MANY_FIELDS);
- } else if (srv_read_only_mode) {
+ } else if (high_level_read_only) {
DBUG_RETURN(HA_ERR_INNODB_READ_ONLY);
}
@@ -9829,7 +9829,7 @@ ha_innobase::discard_or_import_tablespace(
ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
- if (srv_read_only_mode) {
+ if (high_level_read_only) {
DBUG_RETURN(HA_ERR_TABLE_READONLY);
}
@@ -9923,7 +9923,7 @@ ha_innobase::truncate()
DBUG_ENTER("ha_innobase::truncate");
- if (srv_read_only_mode) {
+ if (high_level_read_only) {
DBUG_RETURN(HA_ERR_TABLE_READONLY);
}
@@ -10274,7 +10274,7 @@ ha_innobase::rename_table(
DBUG_ENTER("ha_innobase::rename_table");
- if (srv_read_only_mode) {
+ if (high_level_read_only) {
ib_senderrf(thd, IB_LOG_LEVEL_WARN, ER_READ_ONLY_MODE);
DBUG_RETURN(HA_ERR_TABLE_READONLY);
}
@@ -14288,6 +14288,12 @@ innodb_internal_table_validate(
}
dict_table_close(user_table, FALSE, TRUE);
+
+ DBUG_EXECUTE_IF("innodb_evict_autoinc_table",
+ mutex_enter(&dict_sys->mutex);
+ dict_table_remove_from_cache_low(user_table, TRUE);
+ mutex_exit(&dict_sys->mutex);
+ );
}
return(ret);
@@ -16301,6 +16307,13 @@ static MYSQL_SYSVAR_BOOL(use_native_aio, srv_use_native_aio,
"Use native AIO if supported on this platform.",
NULL, NULL, TRUE);
+#ifdef HAVE_LIBNUMA
+static MYSQL_SYSVAR_BOOL(numa_interleave, srv_numa_interleave,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Use NUMA interleave memory policy to allocate InnoDB buffer pool.",
+ NULL, NULL, FALSE);
+#endif // HAVE_LIBNUMA
+
static MYSQL_SYSVAR_BOOL(api_enable_binlog, ib_binlog_enabled,
PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
"Enable binlog for applications direct access InnoDB through InnoDB APIs",
@@ -16579,6 +16592,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(version),
MYSQL_SYSVAR(use_sys_malloc),
MYSQL_SYSVAR(use_native_aio),
+#ifdef HAVE_LIBNUMA
+ MYSQL_SYSVAR(numa_interleave),
+#endif // HAVE_LIBNUMA
MYSQL_SYSVAR(change_buffering),
MYSQL_SYSVAR(change_buffer_max_size),
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 96cabae3f0d..2354f5537cb 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -234,7 +234,7 @@ ha_innobase::check_if_supported_inplace_alter(
{
DBUG_ENTER("check_if_supported_inplace_alter");
- if (srv_read_only_mode) {
+ if (high_level_read_only) {
ha_alter_info->unsupported_reason =
innobase_get_err_msg(ER_READ_ONLY_MODE);
DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index d770449e851..dea4a9a2a50 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -309,6 +309,21 @@ dict_table_autoinc_initialize(
dict_table_t* table, /*!< in/out: table */
ib_uint64_t value) /*!< in: next value to assign to a row */
__attribute__((nonnull));
+
+/** Store autoinc value when the table is evicted.
+@param[in] table table evicted */
+UNIV_INTERN
+void
+dict_table_autoinc_store(
+ const dict_table_t* table);
+
+/** Restore autoinc value when the table is loaded.
+@param[in] table table loaded */
+UNIV_INTERN
+void
+dict_table_autoinc_restore(
+ dict_table_t* table);
+
/********************************************************************//**
Reads the next autoinc value (== autoinc counter value), 0 if not yet
initialized.
@@ -368,6 +383,15 @@ dict_table_remove_from_cache(
dict_table_t* table) /*!< in, own: table */
__attribute__((nonnull));
/**********************************************************************//**
+Removes a table object from the dictionary cache. */
+UNIV_INTERN
+void
+dict_table_remove_from_cache_low(
+/*=============================*/
+ dict_table_t* table, /*!< in, own: table */
+ ibool lru_evict); /*!< in: TRUE if table being evicted
+ to make room in the table LRU list */
+/**********************************************************************//**
Renames a table object.
@return TRUE if success */
UNIV_INTERN
@@ -1543,6 +1567,8 @@ extern dict_sys_t* dict_sys;
/** the data dictionary rw-latch protecting dict_sys */
extern rw_lock_t dict_operation_lock;
+typedef std::map<table_id_t, ib_uint64_t> autoinc_map_t;
+
/* Dictionary system struct */
struct dict_sys_t{
ib_mutex_t mutex; /*!< mutex protecting the data
@@ -1577,6 +1603,8 @@ struct dict_sys_t{
UT_LIST_BASE_NODE_T(dict_table_t)
table_non_LRU; /*!< List of tables that can't be
evicted from the cache */
+ autoinc_map_t* autoinc_map; /*!< Map to store table id and autoinc
+ when table is evicted */
};
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic
index 21747fdceac..a5df9f7b6b4 100644
--- a/storage/innobase/include/ibuf0ibuf.ic
+++ b/storage/innobase/include/ibuf0ibuf.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1997, 2015, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -128,7 +128,8 @@ ibuf_should_try(
&& ibuf->max_size != 0
&& !dict_index_is_clust(index)
&& index->table->quiesce == QUIESCE_NONE
- && (ignore_sec_unique || !dict_index_is_unique(index)));
+ && (ignore_sec_unique || !dict_index_is_unique(index))
+ && srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE);
}
/******************************************************************//**
diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h
index 57b29fff663..9329a0effb4 100644
--- a/storage/innobase/include/os0sync.h
+++ b/storage/innobase/include/os0sync.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -38,6 +38,26 @@ Created 9/6/1995 Heikki Tuuri
#include "ut0lst.h"
#include "sync0types.h"
+#if defined __i386__ || defined __x86_64__ || defined _M_IX86 \
+ || defined _M_X64 || defined __WIN__
+
+#define IB_STRONG_MEMORY_MODEL
+
+#endif /* __i386__ || __x86_64__ || _M_IX86 || _M_X64 || __WIN__ */
+
+#ifdef HAVE_WINDOWS_ATOMICS
+typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
+ on LONG variable */
+#elif defined(HAVE_ATOMIC_BUILTINS) && !defined(HAVE_ATOMIC_BUILTINS_BYTE)
+typedef ulint lock_word_t;
+#else
+
+#define IB_LOCK_WORD_IS_BYTE
+
+typedef byte lock_word_t;
+
+#endif /* HAVE_WINDOWS_ATOMICS */
+
#ifdef __WIN__
/** Native event (slow)*/
typedef HANDLE os_native_event_t;
@@ -429,14 +449,61 @@ amount to decrement. */
# define os_atomic_decrement_uint64(ptr, amount) \
os_atomic_decrement(ptr, amount)
-/**********************************************************//**
-Returns the old value of *ptr, atomically sets *ptr to new_val */
-
-# define os_atomic_test_and_set_byte(ptr, new_val) \
- __sync_lock_test_and_set(ptr, (byte) new_val)
-
-# define os_atomic_test_and_set_ulint(ptr, new_val) \
- __sync_lock_test_and_set(ptr, new_val)
+# if defined(HAVE_IB_GCC_ATOMIC_TEST_AND_SET)
+
+/** Do an atomic test-and-set.
+@param[in,out] ptr Memory location to set to non-zero
+@return the previous value */
+inline
+lock_word_t
+os_atomic_test_and_set(volatile lock_word_t* ptr)
+{
+ return(__atomic_test_and_set(ptr, __ATOMIC_ACQUIRE));
+}
+
+/** Do an atomic clear.
+@param[in,out] ptr Memory location to set to zero */
+inline
+void
+os_atomic_clear(volatile lock_word_t* ptr)
+{
+ __atomic_clear(ptr, __ATOMIC_RELEASE);
+}
+
+# elif defined(IB_STRONG_MEMORY_MODEL)
+
+/** Do an atomic test and set.
+@param[in,out] ptr Memory location to set to non-zero
+@return the previous value */
+inline
+lock_word_t
+os_atomic_test_and_set(volatile lock_word_t* ptr)
+{
+ return(__sync_lock_test_and_set(ptr, 1));
+}
+
+/** Do an atomic release.
+
+In theory __sync_lock_release should be used to release the lock.
+Unfortunately, it does not work properly alone. The workaround is
+that more conservative __sync_lock_test_and_set is used instead.
+
+Performance regression was observed at some conditions for Intel
+architecture. Disable release barrier on Intel architecture for now.
+@param[in,out] ptr Memory location to write to
+@return the previous value */
+inline
+lock_word_t
+os_atomic_clear(volatile lock_word_t* ptr)
+{
+ return(__sync_lock_test_and_set(ptr, 0));
+}
+
+# else
+
+# error "Unsupported platform"
+
+# endif /* HAVE_IB_GCC_ATOMIC_TEST_AND_SET */
#elif defined(HAVE_IB_SOLARIS_ATOMICS)
@@ -511,14 +578,51 @@ amount to decrement. */
# define os_atomic_decrement_uint64(ptr, amount) \
os_atomic_increment_uint64(ptr, -(amount))
-/**********************************************************//**
-Returns the old value of *ptr, atomically sets *ptr to new_val */
-
-# define os_atomic_test_and_set_byte(ptr, new_val) \
- atomic_swap_uchar(ptr, new_val)
-
-# define os_atomic_test_and_set_ulint(ptr, new_val) \
- atomic_swap_ulong(ptr, new_val)
+# ifdef IB_LOCK_WORD_IS_BYTE
+
+/** Do an atomic xchg and set to non-zero.
+@param[in,out] ptr Memory location to set to non-zero
+@return the previous value */
+inline
+lock_word_t
+os_atomic_test_and_set(volatile lock_word_t* ptr)
+{
+ return(atomic_swap_uchar(ptr, 1));
+}
+
+/** Do an atomic xchg and set to zero.
+@param[in,out] ptr Memory location to set to zero
+@return the previous value */
+inline
+lock_word_t
+os_atomic_clear(volatile lock_word_t* ptr)
+{
+ return(atomic_swap_uchar(ptr, 0));
+}
+
+# else
+
+/** Do an atomic xchg and set to non-zero.
+@param[in,out] ptr Memory location to set to non-zero
+@return the previous value */
+inline
+lock_word_t
+os_atomic_test_and_set(volatile lock_word_t* ptr)
+{
+ return(atomic_swap_ulong(ptr, 1));
+}
+
+/** Do an atomic xchg and set to zero.
+@param[in,out] ptr Memory location to set to zero
+@return the previous value */
+inline
+lock_word_t
+os_atomic_clear(volatile lock_word_t* ptr)
+{
+ return(atomic_swap_ulong(ptr, 0));
+}
+
+# endif /* IB_LOCK_WORD_IS_BYTE */
#elif defined(HAVE_WINDOWS_ATOMICS)
@@ -633,16 +737,27 @@ amount to decrement. There is no atomic substract function on Windows */
(ib_int64_t*) ptr, \
-(ib_int64_t) amount) - amount))
-/**********************************************************//**
-Returns the old value of *ptr, atomically sets *ptr to new_val.
-InterlockedExchange() operates on LONG, and the LONG will be
-clobbered */
-
-# define os_atomic_test_and_set_byte(ptr, new_val) \
- ((byte) InterlockedExchange(ptr, new_val))
-
-# define os_atomic_test_and_set_ulong(ptr, new_val) \
- InterlockedExchange(ptr, new_val)
+/** Do an atomic test and set.
+InterlockedExchange() operates on LONG, and the LONG will be clobbered
+@param[in,out] ptr Memory location to set to non-zero
+@return the previous value */
+inline
+lock_word_t
+os_atomic_test_and_set(volatile lock_word_t* ptr)
+{
+ return(InterlockedExchange(ptr, 1));
+}
+
+/** Do an atomic release.
+InterlockedExchange() operates on LONG, and the LONG will be clobbered
+@param[in,out] ptr Memory location to set to zero
+@return the previous value */
+inline
+lock_word_t
+os_atomic_clear(volatile lock_word_t* ptr)
+{
+ return(InterlockedExchange(ptr, 0));
+}
#else
# define IB_ATOMICS_STARTUP_MSG \
@@ -692,7 +807,7 @@ for synchronization */
} while (0);
/** barrier definitions for memory ordering */
-#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__
+#ifdef IB_STRONG_MEMORY_MODEL
/* Performance regression was observed at some conditions for Intel
architecture. Disable memory barrier for Intel architecture for now. */
# define os_rmb
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 7a6c9f93e3d..6e2f76af30d 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All rights reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All rights reserved.
Copyright (c) 2008, 2009, Google Inc.
Copyright (c) 2009, Percona Inc.
@@ -187,6 +187,9 @@ extern char* srv_arch_dir;
recovery and open all tables in RO mode instead of RW mode. We don't
sync the max trx id to disk either. */
extern my_bool srv_read_only_mode;
+/** Set if InnoDB operates in read-only mode or innodb-force-recovery
+is greater than SRV_FORCE_NO_TRX_UNDO. */
+extern my_bool high_level_read_only;
/** store to its own file each table created by an user; data
dictionary tables are in the system tablespace 0 */
extern my_bool srv_file_per_table;
@@ -217,6 +220,7 @@ OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads.
Currently we support native aio on windows and linux */
extern my_bool srv_use_native_aio;
+extern my_bool srv_numa_interleave;
#ifdef __WIN__
extern ibool srv_use_native_conditions;
#endif /* __WIN__ */
@@ -875,6 +879,7 @@ struct srv_slot_t{
#else /* !UNIV_HOTBACKUP */
# define srv_use_adaptive_hash_indexes FALSE
# define srv_use_native_aio FALSE
+# define srv_numa_interleave FALSE
# define srv_force_recovery 0UL
# define srv_set_io_thread_op_info(t,info) ((void) 0)
# define srv_reset_io_thread_op_info() ((void) 0)
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index 82fb353a41b..d6f8d8f5e4c 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Copyright (c) 2012, Facebook Inc.
@@ -46,15 +46,6 @@ Created 9/5/1995 Heikki Tuuri
extern "C" my_bool timed_mutexes;
#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
-#ifdef HAVE_WINDOWS_ATOMICS
-typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
- on LONG variable */
-#elif defined(HAVE_ATOMIC_BUILTINS) && !defined(HAVE_ATOMIC_BUILTINS_BYTE)
-typedef ulint lock_word_t;
-#else
-typedef byte lock_word_t;
-#endif
-
#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
/* By default, buffer mutexes and rwlocks will be excluded from
diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic
index 616e53d4aac..9a062db71a8 100644
--- a/storage/innobase/include/sync0sync.ic
+++ b/storage/innobase/include/sync0sync.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -74,17 +74,13 @@ Performs an atomic test-and-set instruction to the lock_word field of a
mutex.
@return the previous value of lock_word: 0 or 1 */
UNIV_INLINE
-byte
+lock_word_t
ib_mutex_test_and_set(
-/*===============*/
+/*==================*/
ib_mutex_t* mutex) /*!< in: mutex */
{
#if defined(HAVE_ATOMIC_BUILTINS)
-# if defined(HAVE_ATOMIC_BUILTINS_BYTE)
- return(os_atomic_test_and_set_byte(&mutex->lock_word, 1));
-# else
- return(os_atomic_test_and_set_ulint(&mutex->lock_word, 1));
-# endif
+ return(os_atomic_test_and_set(&mutex->lock_word));
#else
ibool ret;
@@ -100,7 +96,7 @@ ib_mutex_test_and_set(
}
return((byte) ret);
-#endif
+#endif /* HAVE_ATOMIC_BUILTINS */
}
/******************************************************************//**
@@ -113,19 +109,12 @@ mutex_reset_lock_word(
ib_mutex_t* mutex) /*!< in: mutex */
{
#if defined(HAVE_ATOMIC_BUILTINS)
- /* In theory __sync_lock_release should be used to release the lock.
- Unfortunately, it does not work properly alone. The workaround is
- that more conservative __sync_lock_test_and_set is used instead. */
-# if defined(HAVE_ATOMIC_BUILTINS_BYTE)
- os_atomic_test_and_set_byte(&mutex->lock_word, 0);
-# else
- os_atomic_test_and_set_ulint(&mutex->lock_word, 0);
-# endif
+ os_atomic_clear(&mutex->lock_word);
#else
mutex->lock_word = 0;
os_fast_mutex_unlock(&(mutex->os_fast_mutex));
-#endif
+#endif /* HAVE_ATOMIC_BUILTINS */
}
/******************************************************************//**
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index 9d047c4cc3b..b5e4df316ad 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -2634,8 +2634,8 @@ lock_rec_inherit_to_gap(
/* If srv_locks_unsafe_for_binlog is TRUE or session is using
READ COMMITTED isolation level, we do not want locks set
by an UPDATE or a DELETE to be inherited as gap type locks. But we
- DO want S-locks set by a consistency constraint to be inherited also
- then. */
+ DO want S-locks/X-locks(taken for replace) set by a consistency
+ constraint to be inherited also then */
for (lock = lock_rec_get_first(block, heap_no);
lock != NULL;
@@ -2645,7 +2645,8 @@ lock_rec_inherit_to_gap(
&& !((srv_locks_unsafe_for_binlog
|| lock->trx->isolation_level
<= TRX_ISO_READ_COMMITTED)
- && lock_get_mode(lock) == LOCK_X)) {
+ && lock_get_mode(lock) ==
+ (lock->trx->duplicates ? LOCK_S : LOCK_X))) {
lock_rec_add_to_queue(
LOCK_REC | LOCK_GAP | lock_get_mode(lock),
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index d0e0453849e..3ff4a9d7d1e 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -3406,11 +3406,7 @@ loop:
lsn = log_sys->lsn;
- ut_ad(srv_force_recovery != SRV_FORCE_NO_LOG_REDO
- || lsn == log_sys->last_checkpoint_lsn + LOG_BLOCK_HDR_SIZE);
-
- if ((srv_force_recovery != SRV_FORCE_NO_LOG_REDO
- && lsn != log_sys->last_checkpoint_lsn)
+ if (lsn != log_sys->last_checkpoint_lsn
#ifdef UNIV_LOG_ARCHIVE
|| (srv_log_archive_on
&& lsn != log_sys->archived_lsn + LOG_BLOCK_HDR_SIZE)
diff --git a/storage/innobase/row/row0ins.cc b/storage/innobase/row/row0ins.cc
index 9370f8cef34..f0f7e5fcdf0 100644
--- a/storage/innobase/row/row0ins.cc
+++ b/storage/innobase/row/row0ins.cc
@@ -2709,6 +2709,8 @@ row_ins_sec_index_entry_low(
goto func_exit;
}
+ DEBUG_SYNC_C("row_ins_sec_index_entry_dup_locks_created");
+
/* We did not find a duplicate and we have now
locked with s-locks the necessary records to
prevent any insertion of a duplicate by another
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index f4ea8895d2f..ad35307c8b2 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, 2009 Google Inc.
Copyright (c) 2009, Percona Inc.
@@ -126,6 +126,9 @@ UNIV_INTERN ulint srv_file_format = 0;
UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
set it to the highest format we support. */
UNIV_INTERN ulint srv_max_file_format_at_startup = UNIV_FORMAT_MAX;
+/** Set if InnoDB operates in read-only mode or innodb-force-recovery
+is greater than SRV_FORCE_NO_TRX_UNDO. */
+UNIV_INTERN my_bool high_level_read_only;
#if UNIV_FORMAT_A
# error "UNIV_FORMAT_A must be 0!"
@@ -144,6 +147,7 @@ OS (provided we compiled Innobase with it in), otherwise we will
use simulated aio we build below with threads.
Currently we support native aio on windows and linux */
UNIV_INTERN my_bool srv_use_native_aio = TRUE;
+UNIV_INTERN my_bool srv_numa_interleave = FALSE;
#ifdef __WIN__
/* Windows native condition variables. We use runtime loading / function
@@ -2581,13 +2585,8 @@ srv_do_purge(
}
n_pages_purged = trx_purge(
- n_use_threads, srv_purge_batch_size, false);
-
- if (!(count++ % TRX_SYS_N_RSEGS)) {
- /* Force a truncate of the history list. */
- n_pages_purged += trx_purge(
- 1, srv_purge_batch_size, true);
- }
+ n_use_threads, srv_purge_batch_size,
+ (++count % TRX_SYS_N_RSEGS) == 0);
*n_total_purged += n_pages_purged;
@@ -2780,8 +2779,17 @@ DECLARE_THREAD(srv_purge_coordinator_thread)(
n_pages_purged = trx_purge(1, srv_purge_batch_size, false);
}
- /* Force a truncate of the history list. */
- n_pages_purged = trx_purge(1, srv_purge_batch_size, true);
+ /* This trx_purge is called to remove any undo records (added by
+ background threads) after completion of the above loop. When
+ srv_fast_shutdown != 0, a large batch size can cause significant
+ delay in shutdown ,so reducing the batch size to magic number 20
+ (which was default in 5.5), which we hope will be sufficient to
+ remove all the undo records */
+ const uint temp_batch_size = 20;
+
+ n_pages_purged = trx_purge(1, srv_purge_batch_size <= temp_batch_size
+ ? srv_purge_batch_size : temp_batch_size,
+ true);
ut_a(n_pages_purged == 0 || srv_fast_shutdown != 0);
/* The task queue should always be empty, independent of fast
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index d888d13d863..8a9afd561a9 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -1545,9 +1545,8 @@ innobase_start_or_create_for_mysql(void)
char* logfile0 = NULL;
size_t dirnamelen;
- if (srv_force_recovery > SRV_FORCE_NO_TRX_UNDO) {
- srv_read_only_mode = true;
- }
+ high_level_read_only = srv_read_only_mode
+ || srv_force_recovery > SRV_FORCE_NO_TRX_UNDO;
if (srv_read_only_mode) {
ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode");
diff --git a/storage/innobase/trx/trx0sys.cc b/storage/innobase/trx/trx0sys.cc
index 5eb3cef46c1..e5f03f4b96a 100644
--- a/storage/innobase/trx/trx0sys.cc
+++ b/storage/innobase/trx/trx0sys.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -29,7 +29,10 @@ Created 3/26/1996 Heikki Tuuri
#include "trx0sys.ic"
#endif
-#ifndef UNIV_HOTBACKUP
+#ifdef UNIV_HOTBACKUP
+#include "fsp0types.h"
+
+#else /* !UNIV_HOTBACKUP */
#include "fsp0fsp.h"
#include "mtr0log.h"
#include "mtr0log.h"
@@ -1115,18 +1118,15 @@ trx_sys_read_pertable_file_format_id(
/* get the file format from the page */
ptr = page + 54;
flags = mach_read_from_4(ptr);
- if (flags == 0) {
- /* file format is Antelope */
- *format_id = 0;
- return(TRUE);
- } else if (flags & 1) {
- /* tablespace flags are ok */
- *format_id = (flags / 32) % 128;
- return(TRUE);
- } else {
+
+ if (!fsp_flags_is_valid(flags) {
/* bad tablespace flags */
return(FALSE);
}
+
+ *format_id = FSP_FLAGS_GET_POST_ANTELOPE(flags);
+
+ return(TRUE);
}