From ff166093741df0bd91ba24e02714ef882073c51b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 11 May 2017 21:12:37 +0300 Subject: MDEV-12674 Innodb_row_lock_current_waits has overflow There is a race condition related to the variable srv_stats.n_lock_wait_current_count, which is only incremented and decremented by the function lock_wait_suspend_thread(), The incrementing is protected by lock_sys->wait_mutex, but the decrementing does not appear to be protected by anything. This mismatch could allow the counter to be corrupted when a transactional InnoDB table or record lock wait is terminating roughly at the same time with the start of a wait on a (possibly different) lock. ib_counter_t: Remove some unused methods. Prevent instantiation for N=1. Add an inc() method that takes a slot index as a parameter. single_indexer_t: Remove. simple_counter: A new counter wrapper. Optionally use atomic memory operations for modifying the counter. Aligned to the cache line size. lsn_ctr_1_t, ulint_ctr_1_t, int64_ctr_1_t: Define as simple_counter. These counters are either only incremented (and we do not care about losing some increment operations), or the increment/decrement operations are protected by some mutex. srv_stats_t::os_log_pending_writes: Document that the number is protected by log_sys->mutex. srv_stats_t::n_lock_wait_current_count: Use simple_counter, that is, atomic inc() and dec() operations. lock_wait_suspend_thread(): Release the mutexes before incrementing the counters. Avoid acquiring the lock mutex if the lock wait has already been resolved. Atomically increment and decrement srv_stats.n_lock_wait_current_count. row_insert_for_mysql(), row_update_for_mysql(), row_update_cascade_for_mysql(): Use the inc() method with the trx->id as the slot index. This is a non-functional change, just using inc() instead of add(1). buf_LRU_get_free_block(): Replace the method add(index, n) with inc(). There is no slot index in the simple_counter. --- storage/xtradb/buf/buf0lru.cc | 5 +- storage/xtradb/include/os0sync.h | 58 +++++++++++++++++++--- storage/xtradb/include/srv0srv.h | 16 +++--- storage/xtradb/include/ut0counter.h | 98 ++++++++----------------------------- storage/xtradb/lock/lock0wait.cc | 22 ++++++--- storage/xtradb/row/row0mysql.cc | 25 ++++------ 6 files changed, 107 insertions(+), 117 deletions(-) (limited to 'storage/xtradb') diff --git a/storage/xtradb/buf/buf0lru.cc b/storage/xtradb/buf/buf0lru.cc index 1aa2e7a9eb7..d31773c96cc 100644 --- a/storage/xtradb/buf/buf0lru.cc +++ b/storage/xtradb/buf/buf0lru.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -1501,7 +1502,7 @@ loop: n_iterations++; - srv_stats.buf_pool_wait_free.add(n_iterations, 1); + srv_stats.buf_pool_wait_free.inc(); /* In case of backoff, do not ever attempt single page flushes and wait for the cleaner to free some pages instead. */ @@ -1595,7 +1596,7 @@ loop: ++flush_failures; } - srv_stats.buf_pool_wait_free.add(n_iterations, 1); + srv_stats.buf_pool_wait_free.inc(); n_iterations++; diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h index 48c56a73369..b152ab53e68 100644 --- a/storage/xtradb/include/os0sync.h +++ b/storage/xtradb/include/os0sync.h @@ -2,6 +2,7 @@ Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2008, Google Inc. +Copyright (c) 2017, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -38,12 +39,12 @@ Created 9/6/1995 Heikki Tuuri #include "ut0lst.h" #include "sync0types.h" -#if defined __i386__ || defined __x86_64__ || defined _M_IX86 \ - || defined _M_X64 || defined __WIN__ - -#define IB_STRONG_MEMORY_MODEL - -#endif /* __i386__ || __x86_64__ || _M_IX86 || _M_X64 || __WIN__ */ +/** CPU cache line size */ +#ifdef __powerpc__ +#define CACHE_LINE_SIZE 128 +#else +#define CACHE_LINE_SIZE 64 +#endif #ifdef HAVE_WINDOWS_ATOMICS typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates @@ -940,6 +941,51 @@ for synchronization */ "Memory barrier is not used" #endif + +/** Simple counter aligned to CACHE_LINE_SIZE +@tparam Type the integer type of the counter +@tparam atomic whether to use atomic memory access */ +template +struct MY_ALIGNED(CACHE_LINE_SIZE) simple_counter +{ + /** Increment the counter */ + Type inc() { return add(1); } + /** Decrement the counter */ + Type dec() { return sub(1); } + + /** Add to the counter + @param[in] i amount to be added + @return the value of the counter after adding */ + Type add(Type i) + { + compile_time_assert(!atomic || sizeof(Type) == sizeof(ulint)); + if (atomic) { + return os_atomic_increment_ulint(&m_counter, i); + } else { + return m_counter += i; + } + } + /** Subtract from the counter + @param[in] i amount to be subtracted + @return the value of the counter after adding */ + Type sub(Type i) + { + compile_time_assert(!atomic || sizeof(Type) == sizeof(ulint)); + if (atomic) { + return os_atomic_decrement_ulint(&m_counter, i); + } else { + return m_counter -= i; + } + } + + /** @return the value of the counter (non-atomic access)! */ + operator Type() const { return m_counter; } + +private: + /** The counter */ + Type m_counter; +}; + #ifndef UNIV_NONINL #include "os0sync.ic" #endif diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h index 0923c9a88d7..1ca8e6de591 100644 --- a/storage/xtradb/include/srv0srv.h +++ b/storage/xtradb/include/srv0srv.h @@ -3,7 +3,7 @@ Copyright (c) 1995, 2016, Oracle and/or its affiliates. All rights reserved. Copyright (c) 2008, 2009, Google Inc. Copyright (c) 2009, Percona Inc. -Copyright (c) 2013, 2017, MariaDB Corporation Ab. All Rights Reserved. +Copyright (c) 2013, 2017, MariaDB Corporation. Portions of this file contain modifications contributed and copyrighted by Google, Inc. Those modifications are gratefully acknowledged and are described @@ -55,11 +55,10 @@ Created 10/10/1995 Heikki Tuuri /* Global counters used inside InnoDB. */ struct srv_stats_t { - typedef ib_counter_t lsn_ctr_1_t; - typedef ib_counter_t ulint_ctr_1_t; - typedef ib_counter_t lint_ctr_1_t; typedef ib_counter_t ulint_ctr_64_t; - typedef ib_counter_t ib_int64_ctr_1_t; + typedef simple_counter lsn_ctr_1_t; + typedef simple_counter ulint_ctr_1_t; + typedef simple_counter ib_int64_ctr_1_t; /** Count the amount of data written in total (in bytes) */ ulint_ctr_1_t data_written; @@ -73,8 +72,9 @@ struct srv_stats_t { /** Amount of data written to the log files in bytes */ lsn_ctr_1_t os_log_written; - /** Number of writes being done to the log files */ - lint_ctr_1_t os_log_pending_writes; + /** Number of writes being done to the log files. + Protected by log_sys->write_mutex. */ + ulint_ctr_1_t os_log_pending_writes; /** We increase this counter, when we don't have enough space in the log buffer and have to flush it */ @@ -113,7 +113,7 @@ struct srv_stats_t { ulint_ctr_1_t n_lock_wait_count; /** Number of threads currently waiting on database locks */ - lint_ctr_1_t n_lock_wait_current_count; + simple_counter n_lock_wait_current_count; /** Number of rows read. */ ulint_ctr_64_t n_rows_read; diff --git a/storage/xtradb/include/ut0counter.h b/storage/xtradb/include/ut0counter.h index 63a133a175d..edc0db3b03d 100644 --- a/storage/xtradb/include/ut0counter.h +++ b/storage/xtradb/include/ut0counter.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -30,13 +31,7 @@ Created 2012/04/12 by Sunny Bains #include "univ.i" #include #include "os0thread.h" - -/** CPU cache line size */ -#ifdef __powerpc__ -#define CACHE_LINE_SIZE 128 -#else -#define CACHE_LINE_SIZE 64 -#endif +#include "os0sync.h" /** Default number of slots to use in ib_counter_t */ #define IB_N_SLOTS 64 @@ -44,8 +39,6 @@ Created 2012/04/12 by Sunny Bains /** Get the offset into the counter array. */ template struct generic_indexer_t { - /** Default constructor/destructor should be OK. */ - /** @return offset within m_counter */ size_t offset(size_t index) const UNIV_NOTHROW { return(((index % N) + 1) * (CACHE_LINE_SIZE / sizeof(Type))); @@ -58,8 +51,6 @@ struct generic_indexer_t { use the thread id. */ template struct get_sched_indexer_t : public generic_indexer_t { - /** Default constructor/destructor should be OK. */ - /* @return result from sched_getcpu(), the thread id if it fails. */ size_t get_rnd_index() const UNIV_NOTHROW { @@ -76,31 +67,17 @@ struct get_sched_indexer_t : public generic_indexer_t { /** Use the thread id to index into the counter array. */ template struct thread_id_indexer_t : public generic_indexer_t { - /** Default constructor/destructor should are OK. */ - /* @return a random number, currently we use the thread id. Where thread id is represented as a pointer, it may not work as effectively. */ size_t get_rnd_index() const UNIV_NOTHROW { return((lint) os_thread_get_curr_id()); } -}; - -/** For counters wher N=1 */ -template -struct single_indexer_t { - /** Default constructor/destructor should are OK. */ - - /** @return offset within m_counter */ - size_t offset(size_t index) const UNIV_NOTHROW { - ut_ad(N == 1); - return((CACHE_LINE_SIZE / sizeof(Type))); - } - /* @return 1 */ - size_t get_rnd_index() const UNIV_NOTHROW { - ut_ad(N == 1); - return(1); + /** @return a random offset to the array */ + size_t get_rnd_offset() const UNIV_NOTHROW + { + return(generic_indexer_t::offset(get_rnd_index())); } }; @@ -112,17 +89,11 @@ template < typename Type, int N = IB_N_SLOTS, template class Indexer = thread_id_indexer_t> -class ib_counter_t { -public: - ib_counter_t() { memset(m_counter, 0x0, sizeof(m_counter)); } - +struct MY_ALIGNED(CACHE_LINE_SIZE) ib_counter_t +{ +#ifdef UNIV_DEBUG ~ib_counter_t() { - ut_ad(validate()); - } - - bool validate() UNIV_NOTHROW { -#ifdef UNIV_DEBUG size_t n = (CACHE_LINE_SIZE / sizeof(Type)); /* Check that we aren't writing outside our defined bounds. */ @@ -131,27 +102,23 @@ public: ut_ad(m_counter[i + j] == 0); } } -#endif /* UNIV_DEBUG */ - return(true); } +#endif /* UNIV_DEBUG */ - /** If you can't use a good index id. Increment by 1. */ + /** Increment the counter by 1. */ void inc() UNIV_NOTHROW { add(1); } - /** If you can't use a good index id. - * @param n - is the amount to increment */ - void add(Type n) UNIV_NOTHROW { - size_t i = m_policy.offset(m_policy.get_rnd_index()); - - ut_ad(i < UT_ARR_SIZE(m_counter)); + /** Increment the counter by 1. + @param[in] index a reasonably thread-unique identifier */ + void inc(size_t index) UNIV_NOTHROW { add(index, 1); } - m_counter[i] += n; - } + /** Add to the counter. + @param[in] n amount to be added */ + void add(Type n) UNIV_NOTHROW { add(m_policy.get_rnd_offset(), n); } - /** Use this if you can use a unique indentifier, saves a - call to get_rnd_index(). - @param i - index into a slot - @param n - amount to increment */ + /** Add to the counter. + @param[in] index a reasonably thread-unique identifier + @param[in] n amount to be added */ void add(size_t index, Type n) UNIV_NOTHROW { size_t i = m_policy.offset(index); @@ -160,31 +127,6 @@ public: m_counter[i] += n; } - /** If you can't use a good index id. Decrement by 1. */ - void dec() UNIV_NOTHROW { sub(1); } - - /** If you can't use a good index id. - * @param - n is the amount to decrement */ - void sub(Type n) UNIV_NOTHROW { - size_t i = m_policy.offset(m_policy.get_rnd_index()); - - ut_ad(i < UT_ARR_SIZE(m_counter)); - - m_counter[i] -= n; - } - - /** Use this if you can use a unique indentifier, saves a - call to get_rnd_index(). - @param i - index into a slot - @param n - amount to decrement */ - void sub(size_t index, Type n) UNIV_NOTHROW { - size_t i = m_policy.offset(index); - - ut_ad(i < UT_ARR_SIZE(m_counter)); - - m_counter[i] -= n; - } - /* @return total value - not 100% accurate, since it is not atomic. */ operator Type() const UNIV_NOTHROW { Type total = 0; diff --git a/storage/xtradb/lock/lock0wait.cc b/storage/xtradb/lock/lock0wait.cc index 4ca048a50d5..5b67fbe72b7 100644 --- a/storage/xtradb/lock/lock0wait.cc +++ b/storage/xtradb/lock/lock0wait.cc @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2014, 2017, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -248,6 +249,9 @@ lock_wait_suspend_thread( slot = lock_wait_table_reserve_slot(thr, lock_wait_timeout); + lock_wait_mutex_exit(); + trx_mutex_exit(trx); + if (thr->lock_state == QUE_THR_LOCK_ROW) { srv_stats.n_lock_wait_count.inc(); srv_stats.n_lock_wait_current_count.inc(); @@ -259,19 +263,21 @@ lock_wait_suspend_thread( } } - lock_wait_mutex_exit(); - trx_mutex_exit(trx); - ulint lock_type = ULINT_UNDEFINED; - lock_mutex_enter(); - + /* The wait_lock can be cleared by another thread when the + lock is released. But the wait can only be initiated by the + current thread which owns the transaction. Only acquire the + mutex if the wait_lock is still active. */ if (const lock_t* wait_lock = trx->lock.wait_lock) { - lock_type = lock_get_type_low(wait_lock); + lock_mutex_enter(); + wait_lock = trx->lock.wait_lock; + if (wait_lock) { + lock_type = lock_get_type_low(wait_lock); + } + lock_mutex_exit(); } - lock_mutex_exit(); - had_dict_lock = trx->dict_operation_lock_mode; switch (had_dict_lock) { diff --git a/storage/xtradb/row/row0mysql.cc b/storage/xtradb/row/row0mysql.cc index 68b4e1ecea5..c0c2b6fbabe 100644 --- a/storage/xtradb/row/row0mysql.cc +++ b/storage/xtradb/row/row0mysql.cc @@ -1452,11 +1452,10 @@ error_exit: que_thr_stop_for_mysql_no_error(thr, trx); if (UNIV_LIKELY(!(trx->fake_changes))) { - if (table->is_system_db) { - srv_stats.n_system_rows_inserted.add((size_t)trx->id, 1); + srv_stats.n_system_rows_inserted.inc(size_t(trx->id)); } else { - srv_stats.n_rows_inserted.add((size_t)trx->id, 1); + srv_stats.n_rows_inserted.inc(size_t(trx->id)); } if (prebuilt->clust_index_was_generated) { @@ -1858,17 +1857,15 @@ run_again: dict_table_n_rows_dec(prebuilt->table); if (table->is_system_db) { - srv_stats.n_system_rows_deleted.add( - (size_t)trx->id, 1); + srv_stats.n_system_rows_deleted.inc(size_t(trx->id)); } else { - srv_stats.n_rows_deleted.add((size_t)trx->id, 1); + srv_stats.n_rows_deleted.inc(size_t(trx->id)); } } else { if (table->is_system_db) { - srv_stats.n_system_rows_updated.add( - (size_t)trx->id, 1); + srv_stats.n_system_rows_updated.inc(size_t(trx->id)); } else { - srv_stats.n_rows_updated.add((size_t)trx->id, 1); + srv_stats.n_rows_updated.inc(size_t(trx->id)); } } @@ -2106,17 +2103,15 @@ run_again: dict_table_n_rows_dec(table); if (table->is_system_db) { - srv_stats.n_system_rows_deleted.add( - (size_t)trx->id, 1); + srv_stats.n_system_rows_deleted.inc(size_t(trx->id)); } else { - srv_stats.n_rows_deleted.add((size_t)trx->id, 1); + srv_stats.n_rows_deleted.inc(size_t(trx->id)); } } else { if (table->is_system_db) { - srv_stats.n_system_rows_updated.add( - (size_t)trx->id, 1); + srv_stats.n_system_rows_updated.inc(size_t(trx->id)); } else { - srv_stats.n_rows_updated.add((size_t)trx->id, 1); + srv_stats.n_rows_updated.inc(size_t(trx->id)); } } -- cgit v1.2.1