diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2022-04-27 10:43:00 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2022-04-27 10:43:00 +0300 |
commit | 133c2129cdbb77d8fd55fb303d6f73e1cd3c025c (patch) | |
tree | 3a4fe134a4db2511d6081cdfc6fdff550c25c620 /storage/innobase/include | |
parent | 6948abb94c6739101320d12ddec1d2daae929cc2 (diff) | |
parent | 638afc4acf86b32b74b3b37314f2dbd048062814 (diff) | |
download | mariadb-git-133c2129cdbb77d8fd55fb303d6f73e1cd3c025c.tar.gz |
Merge 10.7 into 10.8
Diffstat (limited to 'storage/innobase/include')
28 files changed, 356 insertions, 424 deletions
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index f5f1c972957..6bcc71702d3 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1994, 2019, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -459,27 +459,18 @@ that the mtr has an x-latch on the page where the cursor is positioned, but no latch on the whole tree. @return TRUE if success, i.e., the page did not become too empty */ ibool -btr_cur_optimistic_delete_func( +btr_cur_optimistic_delete( /*===========================*/ btr_cur_t* cursor, /*!< in: cursor on the record to delete; cursor stays valid: if deletion succeeds, on function exit it points to the successor of the deleted record */ -# ifdef UNIV_DEBUG ulint flags, /*!< in: BTR_CREATE_FLAG or 0 */ -# endif /* UNIV_DEBUG */ mtr_t* mtr) /*!< in: mtr; if this function returns TRUE on a leaf page of a secondary index, the mtr must be committed before latching any further pages */ MY_ATTRIBUTE((nonnull, warn_unused_result)); -# ifdef UNIV_DEBUG -# define btr_cur_optimistic_delete(cursor, flags, mtr) \ - btr_cur_optimistic_delete_func(cursor, flags, mtr) -# else /* UNIV_DEBUG */ -# define btr_cur_optimistic_delete(cursor, flags, mtr) \ - btr_cur_optimistic_delete_func(cursor, mtr) -# endif /* UNIV_DEBUG */ /*************************************************************//** Removes the record on which the tree cursor is positioned. Tries to compress the page if its fillfactor drops below a threshold diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index d8e01fa37fa..278a8f56524 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -1576,7 +1576,7 @@ public: static constexpr uint32_t READ_AHEAD_PAGES= 64; /** Buffer pool mutex */ - MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t mutex; + alignas(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t mutex; /** Number of pending LRU flush; protected by mutex. */ ulint n_flush_LRU_; /** broadcast when n_flush_LRU reaches 0; protected by mutex */ @@ -1758,7 +1758,7 @@ public: /** mutex protecting flush_list, buf_page_t::set_oldest_modification() and buf_page_t::list pointers when !oldest_modification() */ - MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t flush_list_mutex; + alignas(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t flush_list_mutex; /** "hazard pointer" for flush_list scans; protected by flush_list_mutex */ FlushHp flush_hp; /** modified blocks (a subset of LRU) */ diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h index 6bc46dbbc04..38df7cbe462 100644 --- a/storage/innobase/include/buf0types.h +++ b/storage/innobase/include/buf0types.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1995, 2015, Oracle and/or its affiliates. All rights reserved. -Copyright (c) 2019, 2021, MariaDB Corporation. +Copyright (c) 2019, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -131,7 +131,7 @@ public: /** Retrieve the fold value. @return fold value */ - ulint fold() const { return (space() << 20) + space() + page_no(); } + ulint fold() const { return (ulint{space()} << 20) + space() + page_no(); } /** Reset the page number only. @param[in] page_no page number */ diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h index 5c79458e5a9..ae688bf85fe 100644 --- a/storage/innobase/include/data0type.h +++ b/storage/innobase/include/data0type.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2020, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -24,9 +24,7 @@ Data types Created 1/16/1996 Heikki Tuuri *******************************************************/ -#ifndef data0type_h -#define data0type_h - +#pragma once #include "univ.i" /** Special length indicating a missing instantly added column */ @@ -196,9 +194,6 @@ constexpr uint8_t DATA_MBR_LEN= uint8_t(SPDIMS * 2 * sizeof(double)); /** system-versioned user data column */ #define DATA_VERSIONED (DATA_VERS_START|DATA_VERS_END) -/** Check whether locking is disabled (never). */ -#define dict_table_is_locking_disabled(table) false - /*-------------------------------------------*/ /* This many bytes we need to store the type information affecting the @@ -325,7 +320,6 @@ dtype_get_prtype( /*********************************************************************//** Compute the mbminlen and mbmaxlen members of a data type structure. */ -UNIV_INLINE void dtype_get_mblen( /*============*/ @@ -589,5 +583,3 @@ static const byte REC_INFO_METADATA_ALTER = REC_INFO_METADATA_ADD | REC_INFO_DELETED_FLAG; #include "data0type.inl" - -#endif diff --git a/storage/innobase/include/data0type.inl b/storage/innobase/include/data0type.inl index 06d90959855..329cee5d190 100644 --- a/storage/innobase/include/data0type.inl +++ b/storage/innobase/include/data0type.inl @@ -68,30 +68,6 @@ dtype_get_mysql_type( Compute the mbminlen and mbmaxlen members of a data type structure. */ UNIV_INLINE void -dtype_get_mblen( -/*============*/ - ulint mtype, /*!< in: main type */ - ulint prtype, /*!< in: precise type (and collation) */ - unsigned*mbminlen, /*!< out: minimum length of a - multi-byte character */ - unsigned*mbmaxlen) /*!< out: maximum length of a - multi-byte character */ -{ - if (dtype_is_string_type(mtype)) { - innobase_get_cset_width(dtype_get_charset_coll(prtype), - mbminlen, mbmaxlen); - ut_ad(*mbminlen <= *mbmaxlen); - ut_ad(*mbminlen < DATA_MBMAX); - ut_ad(*mbmaxlen < DATA_MBMAX); - } else { - *mbminlen = *mbmaxlen = 0; - } -} - -/*********************************************************************//** -Compute the mbminlen and mbmaxlen members of a data type structure. */ -UNIV_INLINE -void dtype_set_mblen( /*============*/ dtype_t* type) /*!< in/out: type */ @@ -374,16 +350,6 @@ dtype_get_fixed_size_low( } else if (!comp) { return static_cast<unsigned>(len); } else { -#ifdef UNIV_DEBUG - unsigned i_mbminlen, i_mbmaxlen; - - innobase_get_cset_width( - dtype_get_charset_coll(prtype), - &i_mbminlen, &i_mbmaxlen); - - ut_ad(i_mbminlen == mbminlen); - ut_ad(i_mbmaxlen == mbmaxlen); -#endif /* UNIV_DEBUG */ if (mbminlen == mbmaxlen) { return static_cast<unsigned>(len); } diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h index 244d178da3a..fcd911e1b2b 100644 --- a/storage/innobase/include/dict0dict.h +++ b/storage/innobase/include/dict0dict.h @@ -1351,7 +1351,7 @@ class dict_sys_t std::atomic<ulonglong> latch_ex_wait_start; /** the rw-latch protecting the data dictionary cache */ - MY_ALIGNED(CACHE_LINE_SIZE) srw_lock latch; + alignas(CPU_LEVEL1_DCACHE_LINESIZE) srw_lock latch; #ifdef UNIV_DEBUG /** whether latch is being held in exclusive mode (by any thread) */ bool latch_ex; diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h index bfa3742f683..c023447a4a2 100644 --- a/storage/innobase/include/dict0mem.h +++ b/storage/innobase/include/dict0mem.h @@ -1175,6 +1175,9 @@ public: /** @return whether this is the change buffer */ bool is_ibuf() const { return UNIV_UNLIKELY(type & DICT_IBUF); } + /** @return whether this index requires locking */ + bool has_locking() const { return !is_ibuf(); } + /** @return whether this is a normal B-tree index (not the change buffer, not SPATIAL or FULLTEXT) */ bool is_btree() const { @@ -1397,6 +1400,20 @@ public: rollback of TRX_UNDO_EMPTY. The BTR_SEG_LEAF is freed and reinitialized. @param thr query thread */ void clear(que_thr_t *thr); + + /** Check whether the online log is dummy value to indicate + whether table undergoes active DDL. + @retval true if online log is dummy value */ + bool online_log_is_dummy() const + { + return online_log == reinterpret_cast<const row_log_t*>(this); + } + + /** Assign clustered index online log to dummy value */ + void online_log_make_dummy() + { + online_log= reinterpret_cast<row_log_t*>(this); + } }; /** Detach a virtual column from an index. @@ -1965,10 +1982,10 @@ struct dict_table_t { #ifdef UNIV_DEBUG /** @return whether the current thread holds the lock_mutex */ bool lock_mutex_is_owner() const - { return lock_mutex_owner == os_thread_get_curr_id(); } + { return lock_mutex_owner == pthread_self(); } /** @return whether the current thread holds the stats_mutex (lock_mutex) */ bool stats_mutex_is_owner() const - { return lock_mutex_owner == os_thread_get_curr_id(); } + { return lock_mutex_owner == pthread_self(); } #endif /* UNIV_DEBUG */ void lock_mutex_init() { lock_mutex.init(); } void lock_mutex_destroy() { lock_mutex.destroy(); } @@ -1977,20 +1994,20 @@ struct dict_table_t { { ut_ad(!lock_mutex_is_owner()); lock_mutex.wr_lock(); - ut_ad(!lock_mutex_owner.exchange(os_thread_get_curr_id())); + ut_ad(!lock_mutex_owner.exchange(pthread_self())); } /** Try to acquire lock_mutex */ bool lock_mutex_trylock() { ut_ad(!lock_mutex_is_owner()); bool acquired= lock_mutex.wr_lock_try(); - ut_ad(!acquired || !lock_mutex_owner.exchange(os_thread_get_curr_id())); + ut_ad(!acquired || !lock_mutex_owner.exchange(pthread_self())); return acquired; } /** Release lock_mutex */ void lock_mutex_unlock() { - ut_ad(lock_mutex_owner.exchange(0) == os_thread_get_curr_id()); + ut_ad(lock_mutex_owner.exchange(0) == pthread_self()); lock_mutex.wr_unlock(); } #ifndef SUX_LOCK_GENERIC @@ -2292,7 +2309,7 @@ private: srw_spin_mutex lock_mutex; #ifdef UNIV_DEBUG /** The owner of lock_mutex (0 if none) */ - Atomic_relaxed<os_thread_id_t> lock_mutex_owner{0}; + Atomic_relaxed<pthread_t> lock_mutex_owner{0}; #endif public: /** Autoinc counter value to give to the next inserted row. */ @@ -2373,6 +2390,12 @@ public: return false; } + /** @return whether a DDL operation is in progress on this table */ + bool is_active_ddl() const + { + return UT_LIST_GET_FIRST(indexes)->online_log; + } + /** @return whether the name is mysql.innodb_index_stats or mysql.innodb_table_stats */ bool is_stats_table() const; diff --git a/storage/innobase/include/dict0stats_bg.h b/storage/innobase/include/dict0stats_bg.h index 5604420134b..d9a2f6282a1 100644 --- a/storage/innobase/include/dict0stats_bg.h +++ b/storage/innobase/include/dict0stats_bg.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2012, 2017, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -28,7 +28,6 @@ Created Apr 26, 2012 Vasil Dimov #define dict0stats_bg_h #include "dict0types.h" -#include "os0thread.h" #ifdef HAVE_PSI_INTERFACE extern mysql_pfs_key_t recalc_pool_mutex_key; diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 8a959402c0b..789d926d8b3 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -409,7 +409,7 @@ private: static constexpr uint32_t PENDING= ~(STOPPING | CLOSING | NEEDS_FSYNC); /** latch protecting all page allocation bitmap pages */ srw_lock latch; - os_thread_id_t latch_owner; + pthread_t latch_owner; ut_d(Atomic_relaxed<uint32_t> latch_count;) public: /** MariaDB encryption data */ @@ -987,20 +987,20 @@ public: #ifdef UNIV_DEBUG bool is_latched() const { return latch_count != 0; } #endif - bool is_owner() const { return latch_owner == os_thread_get_curr_id(); } + bool is_owner() const { return latch_owner == pthread_self(); } /** Acquire the allocation latch in exclusive mode */ void x_lock() { latch.wr_lock(SRW_LOCK_CALL); ut_ad(!latch_owner); - latch_owner= os_thread_get_curr_id(); + latch_owner= pthread_self(); ut_ad(!latch_count.fetch_add(1)); } /** Release the allocation latch from exclusive mode */ void x_unlock() { ut_ad(latch_count.fetch_sub(1) == 1); - ut_ad(latch_owner == os_thread_get_curr_id()); + ut_ad(latch_owner == pthread_self()); latch_owner= 0; latch.wr_unlock(); } diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h index 2dd7c571386..3d217dc3243 100644 --- a/storage/innobase/include/ha_prototypes.h +++ b/storage/innobase/include/ha_prototypes.h @@ -140,15 +140,6 @@ uint8_t get_innobase_type_from_mysql_type(unsigned *unsigned_flag, const Field *field); /******************************************************************//** -Get the variable length bounds of the given character set. */ -void -innobase_get_cset_width( -/*====================*/ - ulint cset, /*!< in: MySQL charset-collation code */ - unsigned*mbminlen, /*!< out: minimum length of a char (in bytes) */ - unsigned*mbmaxlen); /*!< out: maximum length of a char (in bytes) */ - -/******************************************************************//** Compares NUL-terminated UTF-8 strings case insensitively. @return 0 if a=b, <0 if a<b, >1 if a>b */ int diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index e4ceff6dec2..a11bc60e7a0 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -374,18 +374,18 @@ lock_clust_rec_read_check_and_lock_alt( LOCK_REC_NOT_GAP */ que_thr_t* thr) /*!< in: query thread */ MY_ATTRIBUTE((warn_unused_result)); -/*********************************************************************//** -Locks the specified database table in the mode given. If the lock cannot -be granted immediately, the query thread is put to wait. -@return DB_SUCCESS, DB_LOCK_WAIT, or DB_DEADLOCK */ -dberr_t -lock_table( -/*=======*/ - dict_table_t* table, /*!< in/out: database table - in dictionary cache */ - lock_mode mode, /*!< in: lock mode */ - que_thr_t* thr) /*!< in: query thread */ - MY_ATTRIBUTE((warn_unused_result)); + +/** Acquire a table lock. +@param table table to be locked +@param fktable pointer to table, in case of a FOREIGN key check +@param mode lock mode +@param thr SQL execution thread +@retval DB_SUCCESS if the lock was acquired +@retval DB_DEADLOCK if a deadlock occurred, or fktable && *fktable != table +@retval DB_LOCK_WAIT if lock_wait() must be invoked */ +dberr_t lock_table(dict_table_t *table, dict_table_t *const*fktable, + lock_mode mode, que_thr_t *thr) + MY_ATTRIBUTE((warn_unused_result)); /** Create a table lock object for a resurrected transaction. @param table table to be X-locked @@ -426,6 +426,11 @@ lock_rec_unlock( and release possible other transactions waiting because of these locks. */ void lock_release(trx_t* trx); +/** Release the explicit locks of a committing transaction while +dict_sys.latch is exclusively locked, +and release possible other transactions waiting because of these locks. */ +void lock_release_on_drop(trx_t *trx); + /** Release non-exclusive locks on XA PREPARE, and release possible other transactions waiting because of these locks. */ void lock_release_on_prepare(trx_t *trx); @@ -684,10 +689,10 @@ private: bool m_initialised; /** mutex proteting the locks */ - MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) srw_spin_lock latch; + alignas(CPU_LEVEL1_DCACHE_LINESIZE) srw_spin_lock latch; #ifdef UNIV_DEBUG /** The owner of exclusive latch (0 if none); protected by latch */ - std::atomic<os_thread_id_t> writer{0}; + std::atomic<pthread_t> writer{0}; /** Number of shared latches */ std::atomic<ulint> readers{0}; #endif @@ -707,7 +712,7 @@ public: hash_table prdt_page_hash; /** mutex covering lock waits; @see trx_lock_t::wait_lock */ - MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t wait_mutex; + alignas(CPU_LEVEL1_DCACHE_LINESIZE) mysql_mutex_t wait_mutex; private: /** The increment of wait_count for a wait. Anything smaller is a pending wait count. */ @@ -751,14 +756,14 @@ public: mysql_mutex_assert_not_owner(&wait_mutex); ut_ad(!is_writer()); latch.wr_lock(); - ut_ad(!writer.exchange(os_thread_get_curr_id(), + ut_ad(!writer.exchange(pthread_self(), std::memory_order_relaxed)); } /** Release exclusive lock_sys.latch */ void wr_unlock() { ut_ad(writer.exchange(0, std::memory_order_relaxed) == - os_thread_get_curr_id()); + pthread_self()); latch.wr_unlock(); } /** Acquire shared lock_sys.latch */ @@ -784,7 +789,7 @@ public: { ut_ad(!is_writer()); if (!latch.wr_lock_try()) return false; - ut_ad(!writer.exchange(os_thread_get_curr_id(), + ut_ad(!writer.exchange(pthread_self(), std::memory_order_relaxed)); return true; } @@ -808,9 +813,9 @@ public: bool is_writer() const { # ifdef SUX_LOCK_GENERIC - return writer.load(std::memory_order_relaxed) == os_thread_get_curr_id(); + return writer.load(std::memory_order_relaxed) == pthread_self(); # else - return writer.load(std::memory_order_relaxed) == os_thread_get_curr_id() || + return writer.load(std::memory_order_relaxed) == pthread_self() || (xtest() && !latch.is_locked_or_waiting()); # endif } diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index 4d9ad3ddfd8..39c2fb8b01e 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -180,7 +180,7 @@ struct log_t private: /** The log sequence number of the last change of durable InnoDB files */ - MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) + alignas(CPU_LEVEL1_DCACHE_LINESIZE) std::atomic<lsn_t> lsn; /** the first guaranteed-durable log sequence number */ std::atomic<lsn_t> flushed_to_disk_lsn; @@ -201,7 +201,7 @@ typedef srw_lock log_rwlock_t; public: /** rw-lock protecting buf */ - MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) log_rwlock_t latch; + alignas(CPU_LEVEL1_DCACHE_LINESIZE) log_rwlock_t latch; private: /** Last written LSN */ lsn_t write_lsn; @@ -219,7 +219,7 @@ public: private: /** spin lock protecting lsn, buf_free in append_prepare() */ - MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) pthread_mutex_t lsn_lock; + alignas(CPU_LEVEL1_DCACHE_LINESIZE) pthread_mutex_t lsn_lock; void init_lsn_lock() { pthread_mutex_init(&lsn_lock, LSN_LOCK_ATTR); } void lock_lsn() { pthread_mutex_lock(&lsn_lock); } void unlock_lsn() { pthread_mutex_unlock(&lsn_lock); } diff --git a/storage/innobase/include/os0thread.h b/storage/innobase/include/os0thread.h deleted file mode 100644 index b2971462a70..00000000000 --- a/storage/innobase/include/os0thread.h +++ /dev/null @@ -1,42 +0,0 @@ -/***************************************************************************** - -Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/os0thread.h -The interface to the operating system -process and thread control primitives - -Created 9/8/1995 Heikki Tuuri -*******************************************************/ - -#pragma once -#include "univ.i" - -#ifdef _WIN32 -typedef DWORD os_thread_id_t; /*!< In Windows the thread id - is an unsigned long int */ -#else - -typedef pthread_t os_thread_id_t; /*!< In Unix we use the thread - handle itself as the id of - the thread */ -#endif /* _WIN32 */ - -#define os_thread_eq(a,b) IF_WIN(a == b, pthread_equal(a, b)) -#define os_thread_get_curr_id() IF_WIN(GetCurrentThreadId(), pthread_self()) diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h index 37d97ebcaac..bc02fc065f5 100644 --- a/storage/innobase/include/read0types.h +++ b/storage/innobase/include/read0types.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2021, MariaDB Corporation. +Copyright (c) 2018, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -28,12 +28,9 @@ Created 2/16/1997 Heikki Tuuri #include "dict0mem.h" #include "trx0types.h" +#include "srw_lock.h" #include <algorithm> -#ifdef UNIV_PFS_MUTEX -extern mysql_pfs_key_t read_view_mutex_key; -#endif - /** Read view lists the trx ids of those transactions for which a consistent read should not see the modifications to the database. @@ -44,7 +41,7 @@ class ReadViewBase The read should not see any transaction with trx id >= this value. In other words, this is the "high water mark". */ - trx_id_t m_low_limit_id; + trx_id_t m_low_limit_id= 0; /** The read should see all trx ids which are strictly @@ -70,9 +67,6 @@ protected: trx_id_t up_limit_id() const { return m_up_limit_id; } public: - ReadViewBase(): m_low_limit_id(0) {} - - /** Append state from another view. @@ -206,7 +200,7 @@ class ReadView: public ReadViewBase std::atomic<bool> m_open; /** For synchronisation with purge coordinator. */ - mutable mysql_mutex_t m_mutex; + mutable srw_mutex m_mutex; /** trx id of creating transaction. @@ -215,9 +209,12 @@ class ReadView: public ReadViewBase trx_id_t m_creator_trx_id; public: - ReadView(): m_open(false) - { mysql_mutex_init(read_view_mutex_key, &m_mutex, nullptr); } - ~ReadView() { mysql_mutex_destroy(&m_mutex); } + ReadView() + { + memset(reinterpret_cast<void*>(this), 0, sizeof *this); + m_mutex.init(); + } + ~ReadView() { m_mutex.destroy(); } /** @@ -265,12 +262,12 @@ public: */ void print_limits(FILE *file) const { - mysql_mutex_lock(&m_mutex); + m_mutex.wr_lock(); if (is_open()) fprintf(file, "Trx read view will not see trx with" " id >= " TRX_ID_FMT ", sees < " TRX_ID_FMT "\n", low_limit_id(), up_limit_id()); - mysql_mutex_unlock(&m_mutex); + m_mutex.wr_unlock(); } @@ -289,10 +286,10 @@ public: */ void append_to(ReadViewBase *to) const { - mysql_mutex_lock(&m_mutex); + m_mutex.wr_lock(); if (is_open()) to->append(*this); - mysql_mutex_unlock(&m_mutex); + m_mutex.wr_unlock(); } /** diff --git a/storage/innobase/include/row0log.h b/storage/innobase/include/row0log.h index 732ef494326..469f1f8a356 100644 --- a/storage/innobase/include/row0log.h +++ b/storage/innobase/include/row0log.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -24,15 +24,15 @@ Modification log for online index creation and online table rebuild Created 2011-05-26 Marko Makela *******************************************************/ -#ifndef row0log_h -#define row0log_h +#pragma once #include "que0types.h" #include "mtr0types.h" #include "row0types.h" #include "rem0types.h" -#include "data0types.h" +#include "dict0dict.h" #include "trx0types.h" +#include "trx0undo.h" class ut_stage_alter_t; @@ -74,37 +74,23 @@ row_log_free( /******************************************************//** Free the row log for an index on which online creation was aborted. */ -UNIV_INLINE -void -row_log_abort_sec( -/*==============*/ - dict_index_t* index) /*!< in/out: index (x-latched) */ - MY_ATTRIBUTE((nonnull)); - -/******************************************************//** -Try to log an operation to a secondary index that is -(or was) being created. -@retval true if the operation was logged or can be ignored -@retval false if online index creation is not taking place */ -UNIV_INLINE -bool -row_log_online_op_try( -/*==================*/ - dict_index_t* index, /*!< in/out: index, S or X latched */ - const dtuple_t* tuple, /*!< in: index tuple */ - trx_id_t trx_id) /*!< in: transaction ID for insert, - or 0 for delete */ - MY_ATTRIBUTE((nonnull, warn_unused_result)); -/******************************************************//** -Logs an operation to a secondary index that is (or was) being created. */ -void -row_log_online_op( -/*==============*/ - dict_index_t* index, /*!< in/out: index, S or X latched */ - const dtuple_t* tuple, /*!< in: index tuple (NULL=empty the index) */ - trx_id_t trx_id) /*!< in: transaction ID for insert, - or 0 for delete */ - ATTRIBUTE_COLD; +inline void row_log_abort_sec(dict_index_t *index) +{ + ut_ad(index->lock.have_u_or_x()); + ut_ad(!index->is_clust()); + dict_index_set_online_status(index, ONLINE_INDEX_ABORTED); + row_log_free(index->online_log); + index->online_log= nullptr; +} + +/** Logs an operation to a secondary index that is (or was) being created. +@param index index, S or X latched +@param tuple index tuple +@param trx_id transaction ID for insert, or 0 for delete +@retval false if row_log_apply() failure happens +or true otherwise */ +bool row_log_online_op(dict_index_t *index, const dtuple_t *tuple, + trx_id_t trx_id) ATTRIBUTE_COLD; /******************************************************//** Gets the error status of the online index rebuild log. @@ -185,22 +171,6 @@ row_log_table_insert( dict_index_t* index, /*!< in/out: clustered index, S-latched or X-latched */ const rec_offs* offsets);/*!< in: rec_get_offsets(rec,index) */ -/******************************************************//** -Notes that a BLOB is being freed during online ALTER TABLE. */ -void -row_log_table_blob_free( -/*====================*/ - dict_index_t* index, /*!< in/out: clustered index, X-latched */ - ulint page_no)/*!< in: starting page number of the BLOB */ - ATTRIBUTE_COLD __attribute__((nonnull)); -/******************************************************//** -Notes that a BLOB is being allocated during online ALTER TABLE. */ -void -row_log_table_blob_alloc( -/*=====================*/ - dict_index_t* index, /*!< in/out: clustered index, X-latched */ - ulint page_no)/*!< in: starting page number of the BLOB */ - ATTRIBUTE_COLD __attribute__((nonnull)); /** Apply the row_log_table log to a table upon completing rebuild. @param[in] thr query graph @@ -252,6 +222,11 @@ row_log_apply( @return number of n_core_fields */ unsigned row_log_get_n_core_fields(const dict_index_t *index); +/** Get the error code of online log for the index +@param index online index +@return error code present in online log */ +dberr_t row_log_get_error(const dict_index_t *index); + #ifdef HAVE_PSI_STAGE_INTERFACE /** Estimate how much work is to be done by the log apply phase of an ALTER TABLE for this index. @@ -262,7 +237,3 @@ ulint row_log_estimate_work( const dict_index_t* index); #endif /* HAVE_PSI_STAGE_INTERFACE */ - -#include "row0log.inl" - -#endif /* row0log.h */ diff --git a/storage/innobase/include/row0log.inl b/storage/innobase/include/row0log.inl deleted file mode 100644 index f9f3dd006bf..00000000000 --- a/storage/innobase/include/row0log.inl +++ /dev/null @@ -1,80 +0,0 @@ -/***************************************************************************** - -Copyright (c) 2011, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2020, MariaDB Corporation. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., -51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA - -*****************************************************************************/ - -/**************************************************//** -@file include/row0log.ic -Modification log for online index creation and online table rebuild - -Created 2012-10-18 Marko Makela -*******************************************************/ - -#include "dict0dict.h" - -/******************************************************//** -Free the row log for an index on which online creation was aborted. */ -UNIV_INLINE -void -row_log_abort_sec( -/*===============*/ - dict_index_t* index) /*!< in/out: index (x-latched) */ -{ - ut_ad(index->lock.have_u_or_x()); - ut_ad(!dict_index_is_clust(index)); - dict_index_set_online_status(index, ONLINE_INDEX_ABORTED); - row_log_free(index->online_log); - index->online_log = NULL; -} - -/******************************************************//** -Try to log an operation to a secondary index that is -(or was) being created. -@retval true if the operation was logged or can be ignored -@retval false if online index creation is not taking place */ -UNIV_INLINE -bool -row_log_online_op_try( -/*==================*/ - dict_index_t* index, /*!< in/out: index, S or X latched */ - const dtuple_t* tuple, /*!< in: index tuple */ - trx_id_t trx_id) /*!< in: transaction ID for insert, - or 0 for delete */ -{ - ut_ad(index->lock.have_any()); - - switch (dict_index_get_online_status(index)) { - case ONLINE_INDEX_COMPLETE: - /* This is a normal index. Do not log anything. - The caller must perform the operation on the - index tree directly. */ - return(false); - case ONLINE_INDEX_CREATION: - /* The index is being created online. Log the - operation. */ - row_log_online_op(index, tuple, trx_id); - break; - case ONLINE_INDEX_ABORTED: - case ONLINE_INDEX_ABORTED_DROPPED: - /* The index was created online, but the operation was - aborted. Do not log the operation and tell the caller - to skip the operation. */ - break; - } - - return(true); -} diff --git a/storage/innobase/include/row0merge.h b/storage/innobase/include/row0merge.h index 580dbc65c00..ec435df17d8 100644 --- a/storage/innobase/include/row0merge.h +++ b/storage/innobase/include/row0merge.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2021, MariaDB Corporation. +Copyright (c) 2015, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -24,8 +24,7 @@ Index build routines using a merge sort Created 13/06/2005 Jan Lindstrom *******************************************************/ -#ifndef row0merge_h -#define row0merge_h +#pragma once #include "que0types.h" #include "trx0types.h" @@ -36,7 +35,8 @@ Created 13/06/2005 Jan Lindstrom #include "row0mysql.h" #include "lock0types.h" #include "srv0srv.h" -#include "ut0stage.h" + +class ut_stage_alter_t; /* Reserve free space from every block for key_version */ #define ROW_MERGE_RESERVE_SIZE 4 @@ -483,5 +483,3 @@ public: /** Init temporary files for each index */ void init_tmp_file(); }; - -#endif /* row0merge.h */ diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 0ed08009f7d..ffcbd15ace2 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -55,7 +55,7 @@ Created 10/10/1995 Heikki Tuuri /** Simple non-atomic counter @tparam Type the integer type of the counter */ template <typename Type> -struct MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) simple_counter +struct alignas(CPU_LEVEL1_DCACHE_LINESIZE) simple_counter { /** Increment the counter */ Type inc() { return add(1); } diff --git a/storage/innobase/include/sux_lock.h b/storage/innobase/include/sux_lock.h index 17a484c732e..2c0167ac651 100644 --- a/storage/innobase/include/sux_lock.h +++ b/storage/innobase/include/sux_lock.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 2020, 2021, MariaDB Corporation. +Copyright (c) 2020, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -19,7 +19,6 @@ this program; if not, write to the Free Software Foundation, Inc., #pragma once #include "srw_lock.h" #include "my_atomic_wrapper.h" -#include "os0thread.h" #ifdef UNIV_DEBUG # include <unordered_set> #endif @@ -36,19 +35,19 @@ class sux_lock final /** Numbers of U and X locks. Protected by lock. */ uint32_t recursive; /** The owner of the U or X lock (0 if none); protected by lock */ - std::atomic<os_thread_id_t> writer; + std::atomic<pthread_t> writer; /** Special writer!=0 value to indicate that the lock is non-recursive and will be released by an I/O thread */ #if defined __linux__ || defined _WIN32 - static constexpr os_thread_id_t FOR_IO= os_thread_id_t(~0UL); + static constexpr pthread_t FOR_IO= pthread_t(~0UL); #else -# define FOR_IO ((os_thread_id_t) ~0UL) /* it could be a pointer */ +# define FOR_IO ((pthread_t) ~0UL) /* it could be a pointer */ #endif #ifdef UNIV_DEBUG /** Protects readers */ mutable srw_mutex readers_lock; /** Threads that hold the lock in shared mode */ - std::atomic<std::unordered_multiset<os_thread_id_t>*> readers; + std::atomic<std::unordered_multiset<pthread_t>*> readers; #endif /** The multiplier in recursive for X locks */ @@ -109,7 +108,7 @@ public: /** Acquire a recursive lock */ template<bool allow_readers> void writer_recurse() { - ut_ad(writer == os_thread_get_curr_id()); + ut_ad(writer == pthread_self()); ut_d(auto rec= (recursive / (allow_readers ? RECURSIVE_U : RECURSIVE_X)) & RECURSIVE_MAX); ut_ad(allow_readers ? recursive : rec); @@ -120,14 +119,14 @@ public: private: /** Transfer the ownership of a write lock to another thread @param id the new owner of the U or X lock */ - void set_new_owner(os_thread_id_t id) + void set_new_owner(pthread_t id) { IF_DBUG(DBUG_ASSERT(writer.exchange(id, std::memory_order_relaxed)), writer.store(id, std::memory_order_relaxed)); } /** Assign the ownership of a write lock to a thread @param id the owner of the U or X lock */ - void set_first_owner(os_thread_id_t id) + void set_first_owner(pthread_t id) { IF_DBUG(DBUG_ASSERT(!writer.exchange(id, std::memory_order_relaxed)), writer.store(id, std::memory_order_relaxed)); @@ -136,12 +135,12 @@ private: /** Register the current thread as a holder of a shared lock */ void s_lock_register() { - const os_thread_id_t id= os_thread_get_curr_id(); + const pthread_t id= pthread_self(); readers_lock.wr_lock(); auto r= readers.load(std::memory_order_relaxed); if (!r) { - r= new std::unordered_multiset<os_thread_id_t>(); + r= new std::unordered_multiset<pthread_t>(); readers.store(r, std::memory_order_relaxed); } r->emplace(id); @@ -152,12 +151,12 @@ private: public: /** In crash recovery or the change buffer, claim the ownership of the exclusive block lock to the current thread */ - void claim_ownership() { set_new_owner(os_thread_get_curr_id()); } + void claim_ownership() { set_new_owner(pthread_self()); } /** @return whether the current thread is holding X or U latch */ bool have_u_or_x() const { - if (os_thread_get_curr_id() != writer.load(std::memory_order_relaxed)) + if (pthread_self() != writer.load(std::memory_order_relaxed)) return false; ut_ad(recursive); return true; @@ -175,7 +174,7 @@ public: if (auto r= readers.load(std::memory_order_relaxed)) { readers_lock.wr_lock(); - bool found= r->find(os_thread_get_curr_id()) != r->end(); + bool found= r->find(pthread_self()) != r->end(); readers_lock.wr_unlock(); return found; } @@ -233,7 +232,7 @@ public: void s_unlock() { #ifdef UNIV_DEBUG - const os_thread_id_t id= os_thread_get_curr_id(); + const pthread_t id= pthread_self(); auto r= readers.load(std::memory_order_relaxed); ut_ad(r); readers_lock.wr_lock(); @@ -250,7 +249,7 @@ public: void u_or_x_unlock(bool allow_readers, bool claim_ownership= false) { ut_d(auto owner= writer.load(std::memory_order_relaxed)); - ut_ad(owner == os_thread_get_curr_id() || + ut_ad(owner == pthread_self() || (owner == FOR_IO && claim_ownership && recursive == (allow_readers ? RECURSIVE_U : RECURSIVE_X))); ut_d(auto rec= (recursive / (allow_readers ? RECURSIVE_U : RECURSIVE_X)) & @@ -314,7 +313,7 @@ inline void sux_lock<ssux_lock>::s_lock(const char *file, unsigned line) template<> inline void sux_lock<ssux_lock>::u_lock(const char *file, unsigned line) { - os_thread_id_t id= os_thread_get_curr_id(); + pthread_t id= pthread_self(); if (writer.load(std::memory_order_relaxed) == id) writer_recurse<true>(); else @@ -329,7 +328,7 @@ inline void sux_lock<ssux_lock>::u_lock(const char *file, unsigned line) template<> inline void sux_lock<ssux_lock>::x_lock(const char *file, unsigned line) { - os_thread_id_t id= os_thread_get_curr_id(); + pthread_t id= pthread_self(); if (writer.load(std::memory_order_relaxed) == id) writer_recurse<false>(); else @@ -371,7 +370,7 @@ inline void sux_lock<ssux>::unlock_shared() { s_unlock(); } template<typename ssux> inline void sux_lock<ssux>::u_lock() { - os_thread_id_t id= os_thread_get_curr_id(); + pthread_t id= pthread_self(); if (writer.load(std::memory_order_relaxed) == id) writer_recurse<true>(); else @@ -385,7 +384,7 @@ template<typename ssux> inline void sux_lock<ssux>::u_lock() template<typename ssux> inline void sux_lock<ssux>::x_lock(bool for_io) { - os_thread_id_t id= os_thread_get_curr_id(); + pthread_t id= pthread_self(); if (writer.load(std::memory_order_relaxed) == id) { ut_ad(!for_io); @@ -409,7 +408,7 @@ template<typename ssux> inline void sux_lock<ssux>::u_x_upgrade() template<typename ssux> inline bool sux_lock<ssux>::x_lock_upgraded() { - os_thread_id_t id= os_thread_get_curr_id(); + pthread_t id= pthread_self(); if (writer.load(std::memory_order_relaxed) == id) { ut_ad(recursive); @@ -436,7 +435,7 @@ template<typename ssux> inline bool sux_lock<ssux>::x_lock_upgraded() template<typename ssux> inline bool sux_lock<ssux>::u_lock_try(bool for_io) { - os_thread_id_t id= os_thread_get_curr_id(); + pthread_t id= pthread_self(); if (writer.load(std::memory_order_relaxed) == id) { if (for_io) @@ -456,7 +455,7 @@ template<typename ssux> inline bool sux_lock<ssux>::u_lock_try(bool for_io) template<typename ssux> inline bool sux_lock<ssux>::x_lock_try() { - os_thread_id_t id= os_thread_get_curr_id(); + pthread_t id= pthread_self(); if (writer.load(std::memory_order_relaxed) == id) { writer_recurse<false>(); diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h index b3f2fbeedf3..ef9111845a6 100644 --- a/storage/innobase/include/trx0purge.h +++ b/storage/innobase/include/trx0purge.h @@ -125,7 +125,7 @@ class purge_sys_t { public: /** latch protecting view, m_enabled */ - MY_ALIGNED(CACHE_LINE_SIZE) mutable srw_spin_lock latch; + alignas(CPU_LEVEL1_DCACHE_LINESIZE) mutable srw_spin_lock latch; private: /** The purge will not remove undo logs which are >= this view */ ReadViewBase view; diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h index 86f305fe04b..a56752fc91d 100644 --- a/storage/innobase/include/trx0rec.h +++ b/storage/innobase/include/trx0rec.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -82,6 +82,7 @@ trx_undo_rec_get_pars( undo_no_t* undo_no, /*!< out: undo log record number */ table_id_t* table_id) /*!< out: table id */ MY_ATTRIBUTE((nonnull)); + /*******************************************************************//** Builds a row reference from an undo log record. @return pointer to remaining part of undo record */ @@ -208,37 +209,48 @@ fetching the purge record */ the undo log (which is the after image for an update) */ #define TRX_UNDO_GET_OLD_V_VALUE 0x2 -/*******************************************************************//** -Build a previous version of a clustered index record. The caller must -hold a latch on the index page of the clustered index record. +/** Build a previous version of a clustered index record. The caller +must hold a latch on the index page of the clustered index record. +@param index_rec clustered index record in the index tree +@param index_mtr mtr which contains the latch to index_rec page + and purge_view +@param rec version of a clustered index record +@param index clustered index +@param offsets rec_get_offsets(rec, index) +@param heap memory heap from which the memory needed is + allocated +@param old_vers previous version or NULL if rec is the + first inserted version, or if history data + has been deleted (an error), or if the purge + could have removed the version + though it has not yet done so +@param v_heap memory heap used to create vrow + dtuple if it is not yet created. This heap + diffs from "heap" above in that it could be + prebuilt->old_vers_heap for selection +@param vrow virtual column info, if any +@param v_status status determine if it is going into this + function by purge thread or not. + And if we read "after image" of undo log +@param undo_block undo log block which was cached during + online dml apply or nullptr @retval true if previous version was built, or if it was an insert or the table has been rebuilt @retval false if the previous version is earlier than purge_view, -which means that it may have been removed */ +or being purged, which means that it may have been removed */ bool trx_undo_prev_version_build( -/*========================*/ - const rec_t* index_rec,/*!< in: clustered index record in the - index tree */ - mtr_t* index_mtr,/*!< in: mtr which contains the latch to - index_rec page and purge_view */ - const rec_t* rec, /*!< in: version of a clustered index record */ - dict_index_t* index, /*!< in: clustered index */ - rec_offs* offsets,/*!< in/out: rec_get_offsets(rec, index) */ - mem_heap_t* heap, /*!< in: memory heap from which the memory - needed is allocated */ - rec_t** old_vers,/*!< out, own: previous version, or NULL if - rec is the first inserted version, or if - history data has been deleted */ - mem_heap_t* v_heap, /* !< in: memory heap used to create vrow - dtuple if it is not yet created. This heap - diffs from "heap" above in that it could be - prebuilt->old_vers_heap for selection */ - dtuple_t** vrow, /*!< out: virtual column info, if any */ - ulint v_status); - /*!< in: status determine if it is going - into this function by purge thread or not. - And if we read "after image" of undo log */ + const rec_t *index_rec, + mtr_t *index_mtr, + const rec_t *rec, + dict_index_t *index, + rec_offs *offsets, + mem_heap_t *heap, + rec_t **old_vers, + mem_heap_t *v_heap, + dtuple_t **vrow, + ulint v_status, + const buf_block_t *undo_block= nullptr); /** Read from an undo log record a non-virtual column value. @param[in,out] ptr pointer to remaining part of the undo record diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h index 3b1ea54afda..60f59321b29 100644 --- a/storage/innobase/include/trx0rseg.h +++ b/storage/innobase/include/trx0rseg.h @@ -69,7 +69,7 @@ void trx_temp_rseg_create(); #define TRX_RSEG_MAX_N_TRXS (TRX_RSEG_N_SLOTS / 2) /** The rollback segment memory object */ -struct MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) trx_rseg_t +struct alignas(CPU_LEVEL1_DCACHE_LINESIZE) trx_rseg_t { /** tablespace containing the rollback segment; constant after init() */ fil_space_t *space; diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index cbac3fd3a94..1f33a9db091 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -40,7 +40,6 @@ Created 3/26/1996 Heikki Tuuri #ifdef UNIV_PFS_MUTEX extern mysql_pfs_key_t trx_sys_mutex_key; -extern mysql_pfs_key_t rw_trx_hash_element_mutex_key; #endif /** Checks if a page address is the trx sys header page. @@ -335,16 +334,14 @@ trx_t* current_trx(); struct rw_trx_hash_element_t { - rw_trx_hash_element_t(): trx(0) + rw_trx_hash_element_t() { - mysql_mutex_init(rw_trx_hash_element_mutex_key, &mutex, nullptr); + memset(reinterpret_cast<void*>(this), 0, sizeof *this); + mutex.init(); } - ~rw_trx_hash_element_t() - { - mysql_mutex_destroy(&mutex); - } + ~rw_trx_hash_element_t() { mutex.destroy(); } trx_id_t id; /* lf_hash_init() relies on this to be first in the struct */ @@ -357,7 +354,7 @@ struct rw_trx_hash_element_t */ Atomic_counter<trx_id_t> no; trx_t *trx; - mysql_mutex_t mutex; + srw_mutex mutex; }; @@ -526,10 +523,10 @@ class rw_trx_hash_t static my_bool debug_iterator(rw_trx_hash_element_t *element, debug_iterator_arg<T> *arg) { - mysql_mutex_lock(&element->mutex); + element->mutex.wr_lock(); if (element->trx) validate_element(element->trx); - mysql_mutex_unlock(&element->mutex); + element->mutex.wr_unlock(); return arg->action(element, arg->argument); } #endif @@ -631,7 +628,7 @@ public: sizeof(trx_id_t))); if (element) { - mysql_mutex_lock(&element->mutex); + element->mutex.wr_lock(); lf_hash_search_unpin(pins); if ((trx= element->trx)) { DBUG_ASSERT(trx_id == trx->id); @@ -652,7 +649,7 @@ public: trx->reference(); } } - mysql_mutex_unlock(&element->mutex); + element->mutex.wr_unlock(); } if (!caller_trx) lf_hash_put_pins(pins); @@ -686,9 +683,9 @@ public: void erase(trx_t *trx) { ut_d(validate_element(trx)); - mysql_mutex_lock(&trx->rw_trx_hash_element->mutex); - trx->rw_trx_hash_element->trx= 0; - mysql_mutex_unlock(&trx->rw_trx_hash_element->mutex); + trx->rw_trx_hash_element->mutex.wr_lock(); + trx->rw_trx_hash_element->trx= nullptr; + trx->rw_trx_hash_element->mutex.wr_unlock(); int res= lf_hash_delete(&hash, get_pins(trx), reinterpret_cast<const void*>(&trx->id), sizeof(trx_id_t)); @@ -722,12 +719,12 @@ public: May return element with committed transaction. If caller doesn't like to see committed transactions, it has to skip those under element mutex: - mysql_mutex_lock(&element->mutex); + element->mutex.wr_lock(); if (trx_t trx= element->trx) { // trx is protected against commit in this branch } - mysql_mutex_unlock(&element->mutex); + element->mutex.wr_unlock(); May miss concurrently inserted transactions. @@ -833,8 +830,8 @@ public: void unfreeze() const { mysql_mutex_unlock(&mutex); } private: - alignas(CACHE_LINE_SIZE) mutable mysql_mutex_t mutex; - alignas(CACHE_LINE_SIZE) ilist<trx_t> trx_list; + alignas(CPU_LEVEL1_DCACHE_LINESIZE) mutable mysql_mutex_t mutex; + alignas(CPU_LEVEL1_DCACHE_LINESIZE) ilist<trx_t> trx_list; }; /** The transaction system central memory data structure. */ @@ -844,7 +841,7 @@ class trx_sys_t The smallest number not yet assigned as a transaction id or transaction number. Accessed and updated with atomic operations. */ - MY_ALIGNED(CACHE_LINE_SIZE) Atomic_counter<trx_id_t> m_max_trx_id; + alignas(CPU_LEVEL1_DCACHE_LINESIZE) Atomic_counter<trx_id_t> m_max_trx_id; /** @@ -855,7 +852,8 @@ class trx_sys_t @sa assign_new_trx_no() @sa snapshot_ids() */ - MY_ALIGNED(CACHE_LINE_SIZE) std::atomic<trx_id_t> m_rw_trx_hash_version; + alignas(CPU_LEVEL1_DCACHE_LINESIZE) + std::atomic<trx_id_t> m_rw_trx_hash_version; bool m_initialised; @@ -875,7 +873,7 @@ public: Works faster when it is on it's own cache line (tested). */ - MY_ALIGNED(CACHE_LINE_SIZE) rw_trx_hash_t rw_trx_hash; + alignas(CPU_LEVEL1_DCACHE_LINESIZE) rw_trx_hash_t rw_trx_hash; #ifdef WITH_WSREP @@ -1180,11 +1178,11 @@ private: { if (element->id < *id) { - mysql_mutex_lock(&element->mutex); + element->mutex.wr_lock(); /* We don't care about read-only transactions here. */ if (element->trx && element->trx->rsegs.m_redo.rseg) *id= element->id; - mysql_mutex_unlock(&element->mutex); + element->mutex.wr_unlock(); } return 0; } diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 64b0923229b..bf9a2acd622 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -388,13 +388,13 @@ struct trx_lock_t only be modified by the thread that is serving the running transaction. */ - /** Pre-allocated record locks */ - struct { - ib_lock_t lock; byte pad[256]; - } rec_pool[8]; + /** Pre-allocated record locks */ + struct { + alignas(CPU_LEVEL1_DCACHE_LINESIZE) ib_lock_t lock; + } rec_pool[8]; - /** Pre-allocated table locks */ - ib_lock_t table_pool[8]; + /** Pre-allocated table locks */ + ib_lock_t table_pool[8]; /** Memory heap for trx_locks. Protected by lock_sys.assert_locked() and lock_sys.is_writer() || trx->mutex_is_owner(). */ @@ -562,7 +562,7 @@ no longer be associated with a session when the server is restarted. A session may be served by at most one thread at a time. The serving thread of a session might change in some MySQL implementations. -Therefore we do not have os_thread_get_curr_id() assertions in the code. +Therefore we do not have pthread_self() assertions in the code. Normally, only the thread that is currently associated with a running transaction may access (read and modify) the trx object, and it may do @@ -623,6 +623,7 @@ private: that it is no longer "active". */ + alignas(CPU_LEVEL1_DCACHE_LINESIZE) Atomic_counter<int32_t> n_ref; @@ -640,7 +641,7 @@ private: srw_spin_mutex mutex; #ifdef UNIV_DEBUG /** The owner of mutex (0 if none); protected by mutex */ - std::atomic<os_thread_id_t> mutex_owner{0}; + std::atomic<pthread_t> mutex_owner{0}; #endif /* UNIV_DEBUG */ public: void mutex_init() { mutex.init(); } @@ -651,14 +652,14 @@ public: { ut_ad(!mutex_is_owner()); mutex.wr_lock(); - ut_ad(!mutex_owner.exchange(os_thread_get_curr_id(), + ut_ad(!mutex_owner.exchange(pthread_self(), std::memory_order_relaxed)); } /** Release the mutex */ void mutex_unlock() { ut_ad(mutex_owner.exchange(0, std::memory_order_relaxed) - == os_thread_get_curr_id()); + == pthread_self()); mutex.wr_unlock(); } #ifndef SUX_LOCK_GENERIC @@ -669,7 +670,7 @@ public: bool mutex_is_owner() const { return mutex_owner.load(std::memory_order_relaxed) == - os_thread_get_curr_id(); + pthread_self(); } #endif /* UNIV_DEBUG */ @@ -738,7 +739,7 @@ public: /** The locks of the transaction. Protected by lock_sys.latch (insertions also by trx_t::mutex). */ - trx_lock_t lock; + alignas(CPU_LEVEL1_DCACHE_LINESIZE) trx_lock_t lock; #ifdef WITH_WSREP /** whether wsrep_on(mysql_thd) held at the start of transaction */ @@ -805,8 +806,12 @@ public: flush the log in trx_commit_complete_for_mysql() */ ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */ - bool dict_operation; /**< whether this modifies InnoDB - data dictionary */ + /** whether this modifies InnoDB dictionary tables */ + bool dict_operation; +#ifdef UNIV_DEBUG + /** copy of dict_operation during commit() */ + bool was_dict_operation; +#endif /** whether dict_sys.latch is held exclusively; protected by dict_sys.latch */ bool dict_operation_lock_mode; @@ -900,6 +905,10 @@ public: bool auto_commit; /*!< true if it is an autocommit */ bool will_lock; /*!< set to inform trx_start_low() that the transaction may acquire locks */ + /* True if transaction has to read the undo log and + log the DML changes for online DDL table */ + bool apply_online_log = false; + /*------------------------------*/ fts_trx_t* fts_trx; /*!< FTS information, or NULL if transaction hasn't modified tables @@ -973,9 +982,12 @@ public: @retval false if the rollback was aborted by shutdown */ inline bool rollback_finish(); private: + /** Apply any changes to tables for which online DDL is in progress. */ + ATTRIBUTE_COLD void apply_log(); /** Process tables that were modified by the committing transaction. */ inline void commit_tables(); - /** Mark a transaction committed in the main memory data structures. */ + /** Mark a transaction committed in the main memory data structures. + @param mtr mini-transaction (if there are any persistent modifications) */ inline void commit_in_memory(const mtr_t *mtr); /** Write log for committing the transaction. */ void commit_persist(); @@ -1065,6 +1077,7 @@ public: ut_ad(!autoinc_locks || ib_vector_is_empty(autoinc_locks)); ut_ad(UT_LIST_GET_LEN(lock.evicted_tables) == 0); ut_ad(!dict_operation); + ut_ad(!apply_online_log); } /** This has to be invoked on SAVEPOINT or at the end of a statement. diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h index 62662ffe221..111369e6a0f 100644 --- a/storage/innobase/include/trx0undo.h +++ b/storage/innobase/include/trx0undo.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2021, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -326,6 +326,105 @@ struct trx_undo_t { /*!< undo log objects in the rollback segment are chained into lists */ }; + +/** Cache a pointer to an undo record in a latched buffer pool page, +parse the undo log record and store the record type, update vector +and compiler information */ +class UndorecApplier +{ + /** undo log block which was latched */ + const buf_block_t *block; + /** Undo log record pointer */ + trx_undo_rec_t *undo_rec; + /** Offset of the undo log record within the block */ + ulint offset; + /** Transaction id of the undo log */ + trx_id_t trx_id; + /** Undo log record type */ + ulint type; + /** compiler information */ + ulint cmpl_info; + /** Update vector */ + upd_t *update; + /** memory heap which can be used to build previous version of + the index record and its offsets */ + mem_heap_t *heap; + /** mini-transaction for accessing B-tree pages */ + mtr_t mtr; + +public: + UndorecApplier(const buf_block_t *block, trx_id_t trx_id) + : block(block), trx_id(trx_id) + { + ut_ad(block->page.lock.have_any()); + heap= mem_heap_create(100); + } + + /** Assign the undo log block */ + void assign_block(const buf_block_t *undo_block) + { + block= undo_block; + } + + /** Assign the undo log record and offset */ + void assign_rec(trx_undo_rec_t *rec); + + /** Handle the DML undo log and apply it on online indexes */ + void apply_undo_rec(); + + ~UndorecApplier() + { + mem_heap_free(heap); + } + +private: + /** Handle the insert undo log and apply it on online indexes + @param tuple row reference from undo log record + @param clust_index clustered index */ + void log_insert(const dtuple_t &tuple, dict_index_t *clust_index); + + /** Handle the update, delete undo log and apply it on online + indexes. + @param tuple row reference from undo log record + @param clust_index clustered index */ + void log_update(const dtuple_t &tuple, dict_index_t *clust_index); + + /** Check whether the given roll pointer is generated by + the current undo log record information stored. + @return true if roll pointer matches with current undo log info */ + bool is_same(roll_ptr_t roll_ptr) const + { + uint16_t offset= static_cast<uint16_t>(roll_ptr); + uint32_t page_no= static_cast<uint32_t>(roll_ptr >> 16); + return page_no == block->page.id().page_no() && offset == this->offset; + } + + /** Clear the undo log record information */ + void clear_undo_rec() + { + undo_rec= nullptr; + cmpl_info= 0; + type= 0; + update= nullptr; + offset= 0; + mem_heap_empty(heap); + } + + /** Get the correct version of the clustered index record that + was modified by the current undo log record. Because there could + be the multiple successive updates of the same record within the + same transaction. + @param tuple tuple contains primary key value + @param index clustered index + @param[out] clust_rec current clustered index record + @param offsets offsets points to the record + @return clustered index record which was changed by + the undo log record or nullptr when there is no clustered + index record changed by undo log record */ + const rec_t* get_old_rec(const dtuple_t &tuple, dict_index_t *index, + const rec_t **clust_rec, rec_offs **offsets); +}; + #endif /* !UNIV_INNOCHECKSUM */ /** The offset of the undo log page header on pages of the undo log */ diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i index 3b82ae63c9f..8fe5c45bb8a 100644 --- a/storage/innobase/include/univ.i +++ b/storage/innobase/include/univ.i @@ -510,7 +510,6 @@ extern mysql_pfs_key_t fts_cache_mutex_key; extern mysql_pfs_key_t fts_cache_init_mutex_key; extern mysql_pfs_key_t fts_delete_mutex_key; extern mysql_pfs_key_t fts_doc_id_mutex_key; -extern mysql_pfs_key_t fts_pll_tokenize_mutex_key; extern mysql_pfs_key_t ibuf_bitmap_mutex_key; extern mysql_pfs_key_t ibuf_mutex_key; extern mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key; @@ -529,8 +528,6 @@ extern mysql_pfs_key_t trx_pool_mutex_key; extern mysql_pfs_key_t trx_pool_manager_mutex_key; extern mysql_pfs_key_t lock_wait_mutex_key; extern mysql_pfs_key_t srv_threads_mutex_key; -extern mysql_pfs_key_t thread_mutex_key; -extern mysql_pfs_key_t row_drop_list_mutex_key; # endif /* UNIV_PFS_MUTEX */ # ifdef UNIV_PFS_RWLOCK diff --git a/storage/innobase/include/ut0counter.h b/storage/innobase/include/ut0counter.h index 448768ec29a..d6589cc4fd3 100644 --- a/storage/innobase/include/ut0counter.h +++ b/storage/innobase/include/ut0counter.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2012, 2015, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2017, 2019, MariaDB Corporation. +Copyright (c) 2017, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -28,16 +28,9 @@ Created 2012/04/12 by Sunny Bains #ifndef ut0counter_h #define ut0counter_h -#include "os0thread.h" +#include "univ.i" #include "my_rdtsc.h" -/** CPU cache line size */ -#ifdef CPU_LEVEL1_DCACHE_LINESIZE -# define CACHE_LINE_SIZE CPU_LEVEL1_DCACHE_LINESIZE -#else -# error CPU_LEVEL1_DCACHE_LINESIZE is undefined -#endif /* CPU_LEVEL1_DCACHE_LINESIZE */ - /** Use the result of my_timer_cycles(), which mainly uses RDTSC for cycles as a random value. See the comments for my_timer_cycles() */ /** @return result from RDTSC or similar functions. */ @@ -53,7 +46,7 @@ get_rnd_value() /* We may go here if my_timer_cycles() returns 0, so we have to have the plan B for the counter. */ #if !defined(_WIN32) - return (size_t)os_thread_get_curr_id(); + return (size_t)pthread_self(); #else LARGE_INTEGER cnt; QueryPerformanceCounter(&cnt); @@ -71,19 +64,18 @@ be zero-initialized by the run-time environment. @see srv_stats */ template <typename Type> struct ib_atomic_counter_element_t { - MY_ALIGNED(CACHE_LINE_SIZE) Atomic_relaxed<Type> value; + alignas(CPU_LEVEL1_DCACHE_LINESIZE) Atomic_relaxed<Type> value; }; template <typename Type> struct ib_counter_element_t { - MY_ALIGNED(CACHE_LINE_SIZE) Type value; + alignas(CPU_LEVEL1_DCACHE_LINESIZE) Type value; }; /** Class for using fuzzy counters. The counter is multi-instance relaxed atomic so the results are not guaranteed to be 100% accurate but close -enough. Creates an array of counters and separates each element by the -CACHE_LINE_SIZE bytes */ +enough. */ template <typename Type, template <typename T> class Element = ib_atomic_counter_element_t, int N = 128 > @@ -123,9 +115,9 @@ struct ib_counter_t { } private: - static_assert(sizeof(Element<Type>) == CACHE_LINE_SIZE, ""); + static_assert(sizeof(Element<Type>) == CPU_LEVEL1_DCACHE_LINESIZE, ""); /** Array of counter elements */ - MY_ALIGNED(CACHE_LINE_SIZE) Element<Type> m_counter[N]; + alignas(CPU_LEVEL1_DCACHE_LINESIZE) Element<Type> m_counter[N]; }; #endif /* ut0counter_h */ diff --git a/storage/innobase/include/ut0pool.h b/storage/innobase/include/ut0pool.h index 56ffbee48f3..bce443a51b2 100644 --- a/storage/innobase/include/ut0pool.h +++ b/storage/innobase/include/ut0pool.h @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2013, 2014, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2018, 2021, MariaDB Corporation. +Copyright (c) 2018, 2022, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -31,7 +31,7 @@ Created 2012-Feb-26 Sunny Bains #include <queue> #include <functional> -#include "ut0new.h" +#include <my_global.h> /** Allocate the memory for the object in blocks. We keep the objects sorted on pointer so that they are closer together in case they have to be iterated @@ -41,8 +41,6 @@ struct Pool { typedef Type value_type; - // FIXME: Add an assertion to check alignment and offset is - // as we expect it. Also, sizeof(void*) can be 8, can we impove on this. struct Element { Pool* m_pool; value_type m_type; @@ -57,17 +55,30 @@ struct Pool { m_size(size), m_last() { + ut_ad(ut_is_2pow(size)); ut_a(size >= sizeof(Element)); + static_assert(!(sizeof(Element) % CPU_LEVEL1_DCACHE_LINESIZE), + "alignment"); m_lock_strategy.create(); ut_a(m_start == 0); - m_start = reinterpret_cast<Element*>(ut_zalloc_nokey(m_size)); +#ifdef _MSC_VER + m_start = static_cast<Element*>( + _aligned_malloc(m_size, CPU_LEVEL1_DCACHE_LINESIZE)); +#else + void* start; + ut_a(!posix_memalign(&start, CPU_LEVEL1_DCACHE_LINESIZE, + m_size)); + m_start = static_cast<Element*>(start); +#endif + memset_aligned<CPU_LEVEL1_DCACHE_LINESIZE>( + m_start, 0, m_size); m_last = m_start; - m_end = &m_start[m_size / sizeof(*m_start)]; + m_end = &m_start[m_size / sizeof *m_start]; /* Note: Initialise only a small subset, even though we have allocated all the memory. This is required only because PFS @@ -90,7 +101,7 @@ struct Pool { Factory::destroy(&elem->m_type); } - ut_free(m_start); + IF_WIN(_aligned_free,free)(m_start); m_end = m_last = m_start = 0; m_size = 0; } |