diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2018-08-16 08:54:58 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2018-08-16 08:54:58 +0300 |
commit | 1eb2d8f6e868e71e5e848824e759d7d10aef4558 (patch) | |
tree | 822e15c8481032685842e7eb96df3fa860438f9a /storage | |
parent | 197aa0d879c70861d39d1aa6720394ebbaa0274a (diff) | |
parent | 05153a670de6eee0959ce564e11bb2f7ca820d42 (diff) | |
download | mariadb-git-1eb2d8f6e868e71e5e848824e759d7d10aef4558.tar.gz |
Merge 10.2 into 10.3
Diffstat (limited to 'storage')
-rw-r--r-- | storage/innobase/fil/fil0fil.cc | 49 | ||||
-rw-r--r-- | storage/innobase/include/fil0fil.h | 3 | ||||
-rw-r--r-- | storage/innobase/include/lock0lock.h | 63 | ||||
-rw-r--r-- | storage/innobase/include/lock0priv.h | 139 | ||||
-rw-r--r-- | storage/innobase/include/lock0priv.ic | 12 | ||||
-rw-r--r-- | storage/innobase/include/lock0types.h | 192 | ||||
-rw-r--r-- | storage/innobase/include/log0recv.h | 18 | ||||
-rw-r--r-- | storage/innobase/include/trx0sys.h | 7 | ||||
-rw-r--r-- | storage/innobase/include/trx0trx.h | 22 | ||||
-rw-r--r-- | storage/innobase/include/ut0pool.h | 20 | ||||
-rw-r--r-- | storage/innobase/lock/lock0lock.cc | 85 | ||||
-rw-r--r-- | storage/innobase/log/log0recv.cc | 93 | ||||
-rw-r--r-- | storage/innobase/mtr/mtr0log.cc | 6 | ||||
-rw-r--r-- | storage/innobase/page/page0cur.cc | 5 | ||||
-rw-r--r-- | storage/innobase/trx/trx0trx.cc | 47 | ||||
-rw-r--r-- | storage/rocksdb/build_rocksdb.cmake | 26 | ||||
-rw-r--r-- | storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt | 2 | ||||
-rw-r--r-- | storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test | 7 |
18 files changed, 396 insertions, 400 deletions
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index 7ae2a306f01..ac7a84c90f8 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -739,42 +739,35 @@ retry: return(true); } -/** Close a file node. -@param[in,out] node File node */ -static -void -fil_node_close_file( - fil_node_t* node) +/** Close the file handle. */ +void fil_node_t::close() { bool ret; - ut_ad(mutex_own(&(fil_system.mutex))); - ut_a(node->is_open()); - ut_a(node->n_pending == 0); - ut_a(node->n_pending_flushes == 0); - ut_a(!node->being_extended); - ut_a(node->modification_counter == node->flush_counter - || node->space->purpose == FIL_TYPE_TEMPORARY + ut_ad(mutex_own(&fil_system.mutex)); + ut_a(is_open()); + ut_a(n_pending == 0); + ut_a(n_pending_flushes == 0); + ut_a(!being_extended); + ut_a(modification_counter == flush_counter + || space->purpose == FIL_TYPE_TEMPORARY || srv_fast_shutdown == 2 || !srv_was_started); - ret = os_file_close(node->handle); + ret = os_file_close(handle); ut_a(ret); - /* printf("Closing file %s\n", node->name); */ + /* printf("Closing file %s\n", name); */ - node->handle = OS_FILE_CLOSED; - ut_ad(!node->is_open()); + handle = OS_FILE_CLOSED; + ut_ad(!is_open()); ut_a(fil_system.n_open > 0); fil_system.n_open--; fil_n_file_opened--; - if (fil_space_belongs_in_lru(node->space)) { - + if (fil_space_belongs_in_lru(space)) { ut_a(UT_LIST_GET_LEN(fil_system.LRU) > 0); - - /* The node is in the LRU list, remove it */ - UT_LIST_REMOVE(fil_system.LRU, node); + UT_LIST_REMOVE(fil_system.LRU, this); } } @@ -810,7 +803,7 @@ fil_try_to_close_file_in_LRU( && node->n_pending_flushes == 0 && !node->being_extended) { - fil_node_close_file(node); + node->close(); return(true); } @@ -1240,7 +1233,7 @@ fil_node_close_to_free( ut_a(!node->being_extended); if (node->is_open()) { - /* We fool the assertion in fil_node_close_file() to think + /* We fool the assertion in fil_node_t::close() to think there are no unflushed modifications in the file */ node->modification_counter = node->flush_counter; @@ -1259,7 +1252,7 @@ fil_node_close_to_free( UT_LIST_REMOVE(fil_system.unflushed_spaces, space); } - fil_node_close_file(node); + node->close(); } } @@ -1750,7 +1743,7 @@ void fil_space_t::close() node != NULL; node = UT_LIST_GET_NEXT(chain, node)) { if (node->is_open()) { - fil_node_close_file(node); + node->close(); } } @@ -1911,7 +1904,7 @@ fil_close_all_files(void) node = UT_LIST_GET_NEXT(chain, node)) { if (node->is_open()) { - fil_node_close_file(node); + node->close(); } } @@ -1958,7 +1951,7 @@ fil_close_log_files( node = UT_LIST_GET_NEXT(chain, node)) { if (node->is_open()) { - fil_node_close_file(node); + node->close(); } } diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index 968fd1573c6..eb00239ca4e 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -327,6 +327,9 @@ struct fil_node_t { { return(handle != OS_FILE_CLOSED); } + + /** Close the file handle. */ + void close(); }; /** Value of fil_node_t::magic_n */ diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h index e4b8947ac3a..0f6fe158264 100644 --- a/storage/innobase/include/lock0lock.h +++ b/storage/innobase/include/lock0lock.h @@ -806,69 +806,6 @@ lock_trx_has_expl_x_lock( MY_ATTRIBUTE((nonnull, warn_unused_result)); #endif /* UNIV_DEBUG */ -/** -Allocate cached locks for the transaction. -@param trx allocate cached record locks for this transaction */ -void -lock_trx_alloc_locks(trx_t* trx); - -/** Lock modes and types */ -/* @{ */ -#define LOCK_MODE_MASK 0xFUL /*!< mask used to extract mode from the - type_mode field in a lock */ -/** Lock types */ -/* @{ */ -#define LOCK_TABLE 16U /*!< table lock */ -#define LOCK_REC 32U /*!< record lock */ -#define LOCK_TYPE_MASK 0xF0UL /*!< mask used to extract lock type from the - type_mode field in a lock */ -#if LOCK_MODE_MASK & LOCK_TYPE_MASK -# error "LOCK_MODE_MASK & LOCK_TYPE_MASK" -#endif - -#define LOCK_WAIT 256U /*!< Waiting lock flag; when set, it - means that the lock has not yet been - granted, it is just waiting for its - turn in the wait queue */ -/* Precise modes */ -#define LOCK_ORDINARY 0 /*!< this flag denotes an ordinary - next-key lock in contrast to LOCK_GAP - or LOCK_REC_NOT_GAP */ -#define LOCK_GAP 512U /*!< when this bit is set, it means that the - lock holds only on the gap before the record; - for instance, an x-lock on the gap does not - give permission to modify the record on which - the bit is set; locks of this type are created - when records are removed from the index chain - of records */ -#define LOCK_REC_NOT_GAP 1024U /*!< this bit means that the lock is only on - the index record and does NOT block inserts - to the gap before the index record; this is - used in the case when we retrieve a record - with a unique key, and is also used in - locking plain SELECTs (not part of UPDATE - or DELETE) when the user has set the READ - COMMITTED isolation level */ -#define LOCK_INSERT_INTENTION 2048U/*!< this bit is set when we place a waiting - gap type record lock request in order to let - an insert of an index record to wait until - there are no conflicting locks by other - transactions on the gap; note that this flag - remains set when the waiting lock is granted, - or if the lock is inherited to a neighboring - record */ -#define LOCK_PREDICATE 8192U /*!< Predicate lock */ -#define LOCK_PRDT_PAGE 16384U /*!< Page lock */ - - -#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_PREDICATE|LOCK_PRDT_PAGE)&LOCK_MODE_MASK -# error -#endif -#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_PREDICATE|LOCK_PRDT_PAGE)&LOCK_TYPE_MASK -# error -#endif -/* @} */ - /** Lock operation struct */ struct lock_op_t{ dict_table_t* table; /*!< table to be locked */ diff --git a/storage/innobase/include/lock0priv.h b/storage/innobase/include/lock0priv.h index f5b2b51acfc..d9377e97d40 100644 --- a/storage/innobase/include/lock0priv.h +++ b/storage/innobase/include/lock0priv.h @@ -42,19 +42,6 @@ those functions in lock/ */ #define UINT32_MAX (4294967295U) #endif -/** A table lock */ -struct lock_table_t { - dict_table_t* table; /*!< database table in dictionary - cache */ - UT_LIST_NODE_T(lock_t) - locks; /*!< list of locks on the same - table */ - /** Print the table lock into the given output stream - @param[in,out] out the output stream - @return the given output stream. */ - std::ostream& print(std::ostream& out) const; -}; - /** Print the table lock into the given output stream @param[in,out] out the output stream @return the given output stream. */ @@ -77,131 +64,11 @@ operator<<(std::ostream& out, const lock_table_t& lock) return(lock.print(out)); } -/** Record lock for a page */ -struct lock_rec_t { - ib_uint32_t space; /*!< space id */ - ib_uint32_t page_no; /*!< page number */ - ib_uint32_t n_bits; /*!< number of bits in the lock - bitmap; NOTE: the lock bitmap is - placed immediately after the - lock struct */ - - /** Print the record lock into the given output stream - @param[in,out] out the output stream - @return the given output stream. */ - std::ostream& print(std::ostream& out) const; -}; - -/** Print the record lock into the given output stream -@param[in,out] out the output stream -@return the given output stream. */ -inline -std::ostream& lock_rec_t::print(std::ostream& out) const -{ - out << "[lock_rec_t: space=" << space << ", page_no=" << page_no - << ", n_bits=" << n_bits << "]"; - return(out); -} - -inline -std::ostream& -operator<<(std::ostream& out, const lock_rec_t& lock) -{ - return(lock.print(out)); -} - -/** Lock struct; protected by lock_sys.mutex */ -struct lock_t { - trx_t* trx; /*!< transaction owning the - lock */ - UT_LIST_NODE_T(lock_t) - trx_locks; /*!< list of the locks of the - transaction */ - - dict_index_t* index; /*!< index for a record lock */ - - lock_t* hash; /*!< hash chain node for a record - lock. The link node in a singly linked - list, used during hashing. */ - - /* Statistics for how long lock has been held and time - how long this lock had to be waited before it was granted */ - time_t requested_time; /*!< Lock request time */ - ulint wait_time; /*!< Time waited this lock or 0 */ - - union { - lock_table_t tab_lock;/*!< table lock */ - lock_rec_t rec_lock;/*!< record lock */ - } un_member; /*!< lock details */ - - ib_uint32_t type_mode; /*!< lock type, mode, LOCK_GAP or - LOCK_REC_NOT_GAP, - LOCK_INSERT_INTENTION, - wait flag, ORed */ - - /** Determine if the lock object is a record lock. - @return true if record lock, false otherwise. */ - bool is_record_lock() const - { - return(type() == LOCK_REC); - } - - bool is_waiting() const - { - return(type_mode & LOCK_WAIT); - } - - bool is_gap() const - { - return(type_mode & LOCK_GAP); - } - - bool is_record_not_gap() const - { - return(type_mode & LOCK_REC_NOT_GAP); - } - - bool is_insert_intention() const - { - return(type_mode & LOCK_INSERT_INTENTION); - } - - ulint type() const { - return(type_mode & LOCK_TYPE_MASK); - } - - enum lock_mode mode() const - { - return(static_cast<enum lock_mode>(type_mode & LOCK_MODE_MASK)); - } - - /** Print the lock object into the given output stream. - @param[in,out] out the output stream - @return the given output stream. */ - std::ostream& print(std::ostream& out) const; - - /** Convert the member 'type_mode' into a human readable string. - @return human readable string */ - std::string type_mode_string() const; - - const char* type_string() const - { - switch (type_mode & LOCK_TYPE_MASK) { - case LOCK_REC: - return("LOCK_REC"); - case LOCK_TABLE: - return("LOCK_TABLE"); - default: - ut_error; - } - } -}; - /** Convert the member 'type_mode' into a human readable string. @return human readable string */ inline std::string -lock_t::type_mode_string() const +ib_lock_t::type_mode_string() const { std::ostringstream sout; sout << type_string(); @@ -227,7 +94,7 @@ lock_t::type_mode_string() const inline std::ostream& -lock_t::print(std::ostream& out) const +ib_lock_t::print(std::ostream& out) const { out << "[lock_t: type_mode=" << type_mode << "(" << type_mode_string() << ")"; @@ -244,7 +111,7 @@ lock_t::print(std::ostream& out) const inline std::ostream& -operator<<(std::ostream& out, const lock_t& lock) +operator<<(std::ostream& out, const ib_lock_t& lock) { return(lock.print(out)); } diff --git a/storage/innobase/include/lock0priv.ic b/storage/innobase/include/lock0priv.ic index 150a80b7be4..b5be9076644 100644 --- a/storage/innobase/include/lock0priv.ic +++ b/storage/innobase/include/lock0priv.ic @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 2007, 2014, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -388,17 +389,10 @@ lock_table_has( const dict_table_t* table, /*!< in: table */ lock_mode in_mode)/*!< in: lock mode */ { - if (trx->lock.table_locks.empty()) { - return(NULL); - } - - typedef lock_pool_t::const_reverse_iterator iterator; - - iterator end = trx->lock.table_locks.rend(); - /* Look for stronger locks the same trx already has on the table */ - for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) { + for (lock_list::const_iterator it = trx->lock.table_locks.begin(), + end = trx->lock.table_locks.end(); it != end; ++it) { const lock_t* lock = *it; diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h index 792a5f21acb..1aac5d20a59 100644 --- a/storage/innobase/include/lock0types.h +++ b/storage/innobase/include/lock0types.h @@ -1,6 +1,7 @@ /***************************************************************************** Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2018, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -72,6 +73,195 @@ const char* lock_mode_string(enum lock_mode mode) } } -typedef UT_LIST_BASE_NODE_T(lock_t) trx_lock_list_t; +/** A table lock */ +struct lock_table_t { + dict_table_t* table; /*!< database table in dictionary + cache */ + UT_LIST_NODE_T(ib_lock_t) + locks; /*!< list of locks on the same + table */ + /** Print the table lock into the given output stream + @param[in,out] out the output stream + @return the given output stream. */ + std::ostream& print(std::ostream& out) const; +}; + +/** Record lock for a page */ +struct lock_rec_t { + ib_uint32_t space; /*!< space id */ + ib_uint32_t page_no; /*!< page number */ + ib_uint32_t n_bits; /*!< number of bits in the lock + bitmap; NOTE: the lock bitmap is + placed immediately after the + lock struct */ + + /** Print the record lock into the given output stream + @param[in,out] out the output stream + @return the given output stream. */ + std::ostream& print(std::ostream& out) const; +}; + +/** Print the record lock into the given output stream +@param[in,out] out the output stream +@return the given output stream. */ +inline +std::ostream& lock_rec_t::print(std::ostream& out) const +{ + out << "[lock_rec_t: space=" << space << ", page_no=" << page_no + << ", n_bits=" << n_bits << "]"; + return(out); +} + +inline +std::ostream& +operator<<(std::ostream& out, const lock_rec_t& lock) +{ + return(lock.print(out)); +} + +#define LOCK_MODE_MASK 0xFUL /*!< mask used to extract mode from the + type_mode field in a lock */ +/** Lock types */ +/* @{ */ +#define LOCK_TABLE 16U /*!< table lock */ +#define LOCK_REC 32U /*!< record lock */ +#define LOCK_TYPE_MASK 0xF0UL /*!< mask used to extract lock type from the + type_mode field in a lock */ +#if LOCK_MODE_MASK & LOCK_TYPE_MASK +# error "LOCK_MODE_MASK & LOCK_TYPE_MASK" +#endif + +#define LOCK_WAIT 256U /*!< Waiting lock flag; when set, it + means that the lock has not yet been + granted, it is just waiting for its + turn in the wait queue */ +/* Precise modes */ +#define LOCK_ORDINARY 0 /*!< this flag denotes an ordinary + next-key lock in contrast to LOCK_GAP + or LOCK_REC_NOT_GAP */ +#define LOCK_GAP 512U /*!< when this bit is set, it means that the + lock holds only on the gap before the record; + for instance, an x-lock on the gap does not + give permission to modify the record on which + the bit is set; locks of this type are created + when records are removed from the index chain + of records */ +#define LOCK_REC_NOT_GAP 1024U /*!< this bit means that the lock is only on + the index record and does NOT block inserts + to the gap before the index record; this is + used in the case when we retrieve a record + with a unique key, and is also used in + locking plain SELECTs (not part of UPDATE + or DELETE) when the user has set the READ + COMMITTED isolation level */ +#define LOCK_INSERT_INTENTION 2048U/*!< this bit is set when we place a waiting + gap type record lock request in order to let + an insert of an index record to wait until + there are no conflicting locks by other + transactions on the gap; note that this flag + remains set when the waiting lock is granted, + or if the lock is inherited to a neighboring + record */ +#define LOCK_PREDICATE 8192U /*!< Predicate lock */ +#define LOCK_PRDT_PAGE 16384U /*!< Page lock */ + + +#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_PREDICATE|LOCK_PRDT_PAGE)&LOCK_MODE_MASK +# error +#endif +#if (LOCK_WAIT|LOCK_GAP|LOCK_REC_NOT_GAP|LOCK_INSERT_INTENTION|LOCK_PREDICATE|LOCK_PRDT_PAGE)&LOCK_TYPE_MASK +# error +#endif +/* @} */ + +/** Lock struct; protected by lock_sys.mutex */ +struct ib_lock_t +{ + trx_t* trx; /*!< transaction owning the + lock */ + UT_LIST_NODE_T(ib_lock_t) + trx_locks; /*!< list of the locks of the + transaction */ + + dict_index_t* index; /*!< index for a record lock */ + + ib_lock_t* hash; /*!< hash chain node for a record + lock. The link node in a singly linked + list, used during hashing. */ + + /* Statistics for how long lock has been held and time + how long this lock had to be waited before it was granted */ + time_t requested_time; /*!< Lock request time */ + ulint wait_time; /*!< Time waited this lock or 0 */ + + union { + lock_table_t tab_lock;/*!< table lock */ + lock_rec_t rec_lock;/*!< record lock */ + } un_member; /*!< lock details */ + + ib_uint32_t type_mode; /*!< lock type, mode, LOCK_GAP or + LOCK_REC_NOT_GAP, + LOCK_INSERT_INTENTION, + wait flag, ORed */ + + /** Determine if the lock object is a record lock. + @return true if record lock, false otherwise. */ + bool is_record_lock() const + { + return(type() == LOCK_REC); + } + + bool is_waiting() const + { + return(type_mode & LOCK_WAIT); + } + + bool is_gap() const + { + return(type_mode & LOCK_GAP); + } + + bool is_record_not_gap() const + { + return(type_mode & LOCK_REC_NOT_GAP); + } + + bool is_insert_intention() const + { + return(type_mode & LOCK_INSERT_INTENTION); + } + + ulint type() const { + return(type_mode & LOCK_TYPE_MASK); + } + + enum lock_mode mode() const + { + return(static_cast<enum lock_mode>(type_mode & LOCK_MODE_MASK)); + } + + /** Print the lock object into the given output stream. + @param[in,out] out the output stream + @return the given output stream. */ + std::ostream& print(std::ostream& out) const; + + /** Convert the member 'type_mode' into a human readable string. + @return human readable string */ + std::string type_mode_string() const; + + const char* type_string() const + { + switch (type_mode & LOCK_TYPE_MASK) { + case LOCK_REC: + return("LOCK_REC"); + case LOCK_TABLE: + return("LOCK_TABLE"); + default: + ut_error; + } + } +}; + +typedef UT_LIST_BASE_NODE_T(ib_lock_t) trx_lock_list_t; #endif /* lock0types_h */ diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h index 15ad34ba9a5..eda991661c1 100644 --- a/storage/innobase/include/log0recv.h +++ b/storage/innobase/include/log0recv.h @@ -139,9 +139,21 @@ bool recv_parse_log_recs(lsn_t checkpoint_lsn, store_t store, bool apply); /** Moves the parsing buffer data left to the buffer start. */ void recv_sys_justify_left_parsing_buf(); -/** Backup function checks whether the space id belongs to -the skip table list given in the mariabackup option. */ -extern bool(*check_if_backup_includes)(ulint space_id); +/** Report optimized DDL operation (without redo log), corresponding to MLOG_INDEX_LOAD. +@param[in] space_id tablespace identifier +*/ +extern void(*log_optimized_ddl_op)(ulint space_id); + +/** Report an operation to create, delete, or rename a file during backup. +@param[in] space_id tablespace identifier +@param[in] flags tablespace flags (NULL if not create) +@param[in] name file name (not NUL-terminated) +@param[in] len length of name, in bytes +@param[in] new_name new file name (NULL if not rename) +@param[in] new_len length of new_name, in bytes (0 if NULL) */ +extern void (*log_file_op)(ulint space_id, const byte* flags, + const byte* name, ulint len, + const byte* new_name, ulint new_len); /** Block of log record data */ struct recv_data_t{ diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h index 69374ab3fba..6af212d35ff 100644 --- a/storage/innobase/include/trx0sys.h +++ b/storage/innobase/include/trx0sys.h @@ -646,8 +646,11 @@ public: { mutex_enter(&element->mutex); lf_hash_search_unpin(pins); - if ((trx= element->trx)) - { + trx= element->trx; + if (!trx); + else if (UNIV_UNLIKELY(trx_id != trx->id)) + trx= NULL; + else { if (do_ref_count) trx->reference(); ut_d(validate_element(trx)); diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h index 76d4e9caba0..d6a8b8c771b 100644 --- a/storage/innobase/include/trx0trx.h +++ b/storage/innobase/include/trx0trx.h @@ -476,7 +476,9 @@ Check transaction state */ @param t transaction handle */ #define assert_trx_is_free(t) do { \ ut_ad(trx_state_eq((t), TRX_STATE_NOT_STARTED)); \ - ut_ad(!trx->has_logged()); \ + ut_ad(!(t)->id); \ + ut_ad(!(t)->has_logged()); \ + ut_ad(!(t)->is_referenced()); \ ut_ad(!(t)->read_view.is_open()); \ ut_ad((t)->lock.wait_thr == NULL); \ ut_ad(UT_LIST_GET_LEN((t)->lock.trx_locks) == 0); \ @@ -517,7 +519,7 @@ The transaction must have mysql_thd assigned. */ # define assert_trx_nonlocking_or_in_list(trx) ((void)0) #endif /* UNIV_DEBUG */ -typedef std::vector<ib_lock_t*, ut_allocator<ib_lock_t*> > lock_pool_t; +typedef std::vector<ib_lock_t*, ut_allocator<ib_lock_t*> > lock_list; /*******************************************************************//** Latching protocol for trx_lock_t::que_state. trx_lock_t::que_state @@ -579,13 +581,19 @@ struct trx_lock_t { only be modified by the thread that is serving the running transaction. */ - lock_pool_t rec_pool; /*!< Pre-allocated record locks */ + /** Pre-allocated record locks */ + struct { + ib_lock_t lock; byte pad[256]; + } rec_pool[8]; - lock_pool_t table_pool; /*!< Pre-allocated table locks */ + /** Pre-allocated table locks */ + ib_lock_t table_pool[8]; - ulint rec_cached; /*!< Next free rec lock in pool */ + /** Next available rec_pool[] entry */ + unsigned rec_cached; - ulint table_cached; /*!< Next free table lock in pool */ + /** Next available table_pool[] entry */ + unsigned table_cached; mem_heap_t* lock_heap; /*!< memory heap for trx_locks; protected by lock_sys.mutex */ @@ -595,7 +603,7 @@ struct trx_lock_t { and lock_sys.mutex; removals are protected by lock_sys.mutex */ - lock_pool_t table_locks; /*!< All table locks requested by this + lock_list table_locks; /*!< All table locks requested by this transaction, including AUTOINC locks */ bool cancel; /*!< true if the transaction is being diff --git a/storage/innobase/include/ut0pool.h b/storage/innobase/include/ut0pool.h index 6367b53dbe6..d3ea733a440 100644 --- a/storage/innobase/include/ut0pool.h +++ b/storage/innobase/include/ut0pool.h @@ -125,8 +125,7 @@ struct Pool { elem = NULL; } - m_lock_strategy.exit(); - +#if defined HAVE_valgrind || defined __SANITIZE_ADDRESS__ if (elem) { /* Unpoison the memory for AddressSanitizer */ MEM_UNDEFINED(&elem->m_type, sizeof elem->m_type); @@ -135,10 +134,11 @@ struct Pool { actually initialized; we checked that by UNIV_MEM_ASSERT_RW() in mem_free() below. */ UNIV_MEM_VALID(&elem->m_type, sizeof elem->m_type); - return &elem->m_type; } +#endif - return NULL; + m_lock_strategy.exit(); + return elem ? &elem->m_type : NULL; } /** Add the object to the pool. @@ -151,8 +151,12 @@ struct Pool { elem = reinterpret_cast<Element*>(p - sizeof(*elem)); UNIV_MEM_ASSERT_RW(&elem->m_type, sizeof elem->m_type); - elem->m_pool->put(elem); + elem->m_pool->m_lock_strategy.enter(); + + elem->m_pool->putl(elem); MEM_NOACCESS(&elem->m_type, sizeof elem->m_type); + + elem->m_pool->m_lock_strategy.exit(); } protected: @@ -170,17 +174,13 @@ private: /** Release the object to the free pool @param elem element to free */ - void put(Element* elem) + void putl(Element* elem) { - m_lock_strategy.enter(); - ut_ad(elem >= m_start && elem < m_last); ut_ad(Factory::debug(&elem->m_type)); m_pqueue.push(elem); - - m_lock_strategy.exit(); } /** Initialise the elements. diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index 461dec3ee29..07ae85ced5a 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -59,18 +59,6 @@ ulong innodb_lock_schedule_algorithm; /** The value of innodb_deadlock_detect */ my_bool innobase_deadlock_detect; -/** Total number of cached record locks */ -static const ulint REC_LOCK_CACHE = 8; - -/** Maximum record lock size in bytes */ -static const ulint REC_LOCK_SIZE = sizeof(ib_lock_t) + 256; - -/** Total number of cached table locks */ -static const ulint TABLE_LOCK_CACHE = 8; - -/** Size in bytes, of the table lock instance */ -static const ulint TABLE_LOCK_SIZE = sizeof(ib_lock_t); - /*********************************************************************//** Checks if a waiting record lock request still has to wait in a queue. @return lock that is causing the wait */ @@ -1409,13 +1397,13 @@ lock_rec_create_low( } } - if (trx->lock.rec_cached >= trx->lock.rec_pool.size() - || sizeof *lock + n_bytes > REC_LOCK_SIZE) { + if (trx->lock.rec_cached >= UT_ARR_SIZE(trx->lock.rec_pool) + || sizeof *lock + n_bytes > sizeof *trx->lock.rec_pool) { lock = static_cast<lock_t*>( mem_heap_alloc(trx->lock.lock_heap, sizeof *lock + n_bytes)); } else { - lock = trx->lock.rec_pool[trx->lock.rec_cached++]; + lock = &trx->lock.rec_pool[trx->lock.rec_cached++].lock; } lock->trx = trx; @@ -3520,8 +3508,9 @@ lock_table_create( ib_vector_push(trx->autoinc_locks, &lock); - } else if (trx->lock.table_cached < trx->lock.table_pool.size()) { - lock = trx->lock.table_pool[trx->lock.table_cached++]; + } else if (trx->lock.table_cached + < UT_ARR_SIZE(trx->lock.table_pool)) { + lock = &trx->lock.table_pool[trx->lock.table_cached++]; } else { lock = static_cast<lock_t*>( @@ -4373,24 +4362,15 @@ lock_trx_table_locks_remove( ut_ad(trx_mutex_own(trx)); } - typedef lock_pool_t::reverse_iterator iterator; - - iterator end = trx->lock.table_locks.rend(); - - for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) { - + for (lock_list::iterator it = trx->lock.table_locks.begin(), + end = trx->lock.table_locks.end(); it != end; ++it) { const lock_t* lock = *it; - if (lock == NULL) { - continue; - } - - ut_a(trx == lock->trx); - ut_a(lock_get_type_low(lock) & LOCK_TABLE); - ut_a(lock->un_member.tab_lock.table != NULL); + ut_ad(!lock || trx == lock->trx); + ut_ad(!lock || lock_get_type_low(lock) & LOCK_TABLE); + ut_ad(!lock || lock->un_member.tab_lock.table); if (lock == lock_to_remove) { - *it = NULL; if (!trx->lock.cancel) { @@ -4807,11 +4787,8 @@ lock_trx_table_locks_find( trx_mutex_enter(trx); - typedef lock_pool_t::const_reverse_iterator iterator; - - iterator end = trx->lock.table_locks.rend(); - - for (iterator it = trx->lock.table_locks.rbegin(); it != end; ++it) { + for (lock_list::const_iterator it = trx->lock.table_locks.begin(), + end = trx->lock.table_locks.end(); it != end; ++it) { const lock_t* lock = *it; @@ -6337,6 +6314,9 @@ lock_trx_release_locks( /*--------------------------------------*/ trx_mutex_enter(trx); trx->state = TRX_STATE_COMMITTED_IN_MEMORY; + /* Ensure that rw_trx_hash_t::find() will no longer find + this transaction. */ + trx->id = 0; trx_mutex_exit(trx); /*--------------------------------------*/ @@ -6547,10 +6527,8 @@ lock_trx_has_sys_table_locks( lock_mutex_enter(); - typedef lock_pool_t::const_reverse_iterator iterator; - - iterator end = trx->lock.table_locks.rend(); - iterator it = trx->lock.table_locks.rbegin(); + const lock_list::const_iterator end = trx->lock.table_locks.end(); + lock_list::const_iterator it = trx->lock.table_locks.begin(); /* Find a valid mode. Note: ib_vector_size() can be 0. */ @@ -7102,33 +7080,6 @@ DeadlockChecker::check_and_resolve(const lock_t* lock, trx_t* trx) return(victim_trx); } -/** -Allocate cached locks for the transaction. -@param trx allocate cached record locks for this transaction */ -void -lock_trx_alloc_locks(trx_t* trx) -{ - ulint sz = REC_LOCK_SIZE * REC_LOCK_CACHE; - byte* ptr = reinterpret_cast<byte*>(ut_malloc_nokey(sz)); - - /* We allocate one big chunk and then distribute it among - the rest of the elements. The allocated chunk pointer is always - at index 0. */ - - for (ulint i = 0; i < REC_LOCK_CACHE; ++i, ptr += REC_LOCK_SIZE) { - trx->lock.rec_pool.push_back( - reinterpret_cast<ib_lock_t*>(ptr)); - } - - sz = TABLE_LOCK_SIZE * TABLE_LOCK_CACHE; - ptr = reinterpret_cast<byte*>(ut_malloc_nokey(sz)); - - for (ulint i = 0; i < TABLE_LOCK_CACHE; ++i, ptr += TABLE_LOCK_SIZE) { - trx->lock.table_pool.push_back( - reinterpret_cast<ib_lock_t*>(ptr)); - } - -} /*************************************************************//** Updates the lock table when a page is split and merged to two pages. */ diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc index 6be5040da56..3d240dc58fe 100644 --- a/storage/innobase/log/log0recv.cc +++ b/storage/innobase/log/log0recv.cc @@ -169,9 +169,21 @@ typedef std::map< static recv_spaces_t recv_spaces; -/** Backup function checks whether the space id belongs to -the skip table list given in the mariabackup option. */ -bool(*check_if_backup_includes)(ulint space_id); +/** Report optimized DDL operation (without redo log), corresponding to MLOG_INDEX_LOAD. +@param[in] space_id tablespace identifier +*/ +void (*log_optimized_ddl_op)(ulint space_id); + +/** Report an operation to create, delete, or rename a file during backup. +@param[in] space_id tablespace identifier +@param[in] flags tablespace flags (NULL if not create) +@param[in] name file name (not NUL-terminated) +@param[in] len length of name, in bytes +@param[in] new_name new file name (NULL if not rename) +@param[in] new_len length of new_name, in bytes (0 if NULL) */ +void (*log_file_op)(ulint space_id, const byte* flags, + const byte* name, ulint len, + const byte* new_name, ulint new_len); /** Process a file name from a MLOG_FILE_* record. @param[in,out] name file name @@ -381,9 +393,13 @@ fil_name_parse( fil_name_process( reinterpret_cast<char*>(ptr), len, space_id, true); - - break; + /* fall through */ case MLOG_FILE_CREATE2: + if (log_file_op) { + log_file_op(space_id, + type == MLOG_FILE_CREATE2 ? ptr - 4 : NULL, + ptr, len, NULL, 0); + } break; case MLOG_FILE_RENAME2: if (corrupt) { @@ -424,6 +440,11 @@ fil_name_parse( reinterpret_cast<char*>(new_name), new_len, space_id, false); + if (log_file_op) { + log_file_op(space_id, NULL, + ptr, len, new_name, new_len); + } + if (!apply) { break; } @@ -716,6 +737,15 @@ loop: OS_FILE_LOG_BLOCK_SIZE, true); } } + + ulint dl = log_block_get_data_len(buf); + if (dl < LOG_BLOCK_HDR_SIZE + || (dl > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE + && dl != OS_FILE_LOG_BLOCK_SIZE)) { + recv_sys->found_corrupt_log = true; + end_lsn = *start_lsn; + break; + } } if (recv_sys->report(ut_time())) { @@ -2125,7 +2155,8 @@ recv_parse_log_rec( case MLOG_MULTI_REC_END | MLOG_SINGLE_REC_FLAG: case MLOG_DUMMY_RECORD | MLOG_SINGLE_REC_FLAG: case MLOG_CHECKPOINT | MLOG_SINGLE_REC_FLAG: - ib::error() << "Incorrect log record type:" << *ptr; + ib::error() << "Incorrect log record type " + << ib::hex(unsigned(*ptr)); recv_sys->found_corrupt_log = true; return(0); } @@ -2144,7 +2175,6 @@ recv_parse_log_rec( *type, new_ptr, end_ptr, *space, *page_no, apply, NULL, NULL); if (UNIV_UNLIKELY(new_ptr == NULL)) { - return(0); } @@ -2201,30 +2231,30 @@ recv_report_corrupt_log( ib::error() << "############### CORRUPT LOG RECORD FOUND ##################"; + const ulint ptr_offset = ulint(ptr - recv_sys->buf); + ib::info() << "Log record type " << type << ", page " << space << ":" << page_no << ". Log parsing proceeded successfully up to " << recv_sys->recovered_lsn << ". Previous log record type " << recv_previous_parsed_rec_type << ", is multi " << recv_previous_parsed_rec_is_multi << " Recv offset " - << (ptr - recv_sys->buf) << ", prev " + << ptr_offset << ", prev " << recv_previous_parsed_rec_offset; ut_ad(ptr <= recv_sys->buf + recv_sys->len); const ulint limit = 100; - const ulint before - = std::min(recv_previous_parsed_rec_offset, limit); - const ulint after - = std::min(recv_sys->len - ulint(ptr - recv_sys->buf), limit); + const ulint prev_offset = std::min(recv_previous_parsed_rec_offset, + ptr_offset); + const ulint before = std::min(prev_offset, limit); + const ulint after = std::min(recv_sys->len - ptr_offset, limit); ib::info() << "Hex dump starting " << before << " bytes before and" " ending " << after << " bytes after the corrupted record:"; - ut_print_buf(stderr, - recv_sys->buf - + recv_previous_parsed_rec_offset - before, - ulint(ptr - recv_sys->buf) + before + after - - recv_previous_parsed_rec_offset); + const byte* start = recv_sys->buf + prev_offset - before; + + ut_print_buf(stderr, start, ulint(ptr - start) + after); putc('\n', stderr); if (!srv_force_recovery) { @@ -2295,13 +2325,8 @@ loop: len = recv_parse_log_rec(&type, ptr, end_ptr, &space, &page_no, apply, &body); - if (len == 0) { - return(false); - } - if (recv_sys->found_corrupt_log) { - recv_report_corrupt_log( - ptr, type, space, page_no); + recv_report_corrupt_log(ptr, type, space, page_no); return(true); } @@ -2309,6 +2334,10 @@ loop: return(true); } + if (len == 0) { + return(false); + } + new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len); if (new_recovered_lsn > recv_sys->scanned_lsn) { @@ -2396,11 +2425,8 @@ loop: /* fall through */ case MLOG_INDEX_LOAD: if (type == MLOG_INDEX_LOAD) { - if (check_if_backup_includes - && !check_if_backup_includes(space)) { - ut_ad(srv_operation - == SRV_OPERATION_BACKUP); - return true; + if (log_optimized_ddl_op) { + log_optimized_ddl_op(space); } } /* fall through */ @@ -2433,13 +2459,10 @@ loop: &type, ptr, end_ptr, &space, &page_no, false, &body); - if (len == 0) { - return(false); - } - if (recv_sys->found_corrupt_log || type == MLOG_CHECKPOINT - || (*ptr & MLOG_SINGLE_REC_FLAG)) { + || (ptr != end_ptr + && (*ptr & MLOG_SINGLE_REC_FLAG))) { recv_sys->found_corrupt_log = true; recv_report_corrupt_log( ptr, type, space, page_no); @@ -2450,6 +2473,10 @@ loop: return(true); } + if (len == 0) { + return(false); + } + recv_previous_parsed_rec_type = type; recv_previous_parsed_rec_offset = recv_sys->recovered_offset + total_len; diff --git a/storage/innobase/mtr/mtr0log.cc b/storage/innobase/mtr/mtr0log.cc index b789e24f19c..6baf1f06bf9 100644 --- a/storage/innobase/mtr/mtr0log.cc +++ b/storage/innobase/mtr/mtr0log.cc @@ -98,7 +98,11 @@ mlog_parse_initial_log_record( } *type = mlog_id_t(*ptr & ~MLOG_SINGLE_REC_FLAG); - ut_ad(*type <= MLOG_BIGGEST_TYPE || EXTRA_CHECK_MLOG_NUMBER(*type)); + if (UNIV_UNLIKELY(*type > MLOG_BIGGEST_TYPE + && !EXTRA_CHECK_MLOG_NUMBER(*type))) { + recv_sys->found_corrupt_log = true; + return NULL; + } ptr++; diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc index 67ae193e820..36b9a135b10 100644 --- a/storage/innobase/page/page0cur.cc +++ b/storage/innobase/page/page0cur.cc @@ -2256,7 +2256,10 @@ page_cur_parse_delete_rec( offset = mach_read_from_2(ptr); ptr += 2; - ut_a(offset <= srv_page_size); + if (UNIV_UNLIKELY(offset >= srv_page_size)) { + recv_sys->found_corrupt_log = true; + return NULL; + } if (block) { page_t* page = buf_block_get_frame(block); diff --git a/storage/innobase/trx/trx0trx.cc b/storage/innobase/trx/trx0trx.cc index 5b3e4f04bd5..30c19669cee 100644 --- a/storage/innobase/trx/trx0trx.cc +++ b/storage/innobase/trx/trx0trx.cc @@ -118,8 +118,6 @@ trx_init( /*=====*/ trx_t* trx) { - trx->id = 0; - trx->no = TRX_ID_MAX; trx->state = TRX_STATE_NOT_STARTED; @@ -197,11 +195,7 @@ struct TrxFactory { the constructors of the trx_t members. */ new(&trx->mod_tables) trx_mod_tables_t(); - new(&trx->lock.rec_pool) lock_pool_t(); - - new(&trx->lock.table_pool) lock_pool_t(); - - new(&trx->lock.table_locks) lock_pool_t(); + new(&trx->lock.table_locks) lock_list(); new(&trx->read_view) ReadView(); @@ -225,8 +219,6 @@ struct TrxFactory { &trx_named_savept_t::trx_savepoints); mutex_create(LATCH_ID_TRX, &trx->mutex); - - lock_trx_alloc_locks(trx); } /** Release resources held by the transaction object. @@ -256,27 +248,7 @@ struct TrxFactory { ut_ad(!trx->read_view.is_open()); - if (!trx->lock.rec_pool.empty()) { - - /* See lock_trx_alloc_locks() why we only free - the first element. */ - - ut_free(trx->lock.rec_pool[0]); - } - - if (!trx->lock.table_pool.empty()) { - - /* See lock_trx_alloc_locks() why we only free - the first element. */ - - ut_free(trx->lock.table_pool[0]); - } - - trx->lock.rec_pool.~lock_pool_t(); - - trx->lock.table_pool.~lock_pool_t(); - - trx->lock.table_locks.~lock_pool_t(); + trx->lock.table_locks.~lock_list(); trx->read_view.~ReadView(); } @@ -412,7 +384,12 @@ trx_t *trx_create() /* Should have been either just initialized or .clear()ed by trx_free(). */ - ut_a(trx->mod_tables.size() == 0); + ut_ad(trx->mod_tables.empty()); + ut_ad(trx->lock.table_locks.empty()); + ut_ad(UT_LIST_GET_LEN(trx->lock.trx_locks) == 0); + ut_ad(trx->lock.n_rec_locks == 0); + ut_ad(trx->lock.table_cached == 0); + ut_ad(trx->lock.rec_cached == 0); #ifdef WITH_WSREP trx->wsrep_event = NULL; @@ -993,8 +970,6 @@ trx_start_low( trx_sys.register_rw(trx); } else { - trx->id = 0; - if (!trx_is_autocommit_non_locking(trx)) { /* If this is a read-only transaction that is writing @@ -1250,9 +1225,6 @@ trx_update_mod_tables_timestamp( /*============================*/ trx_t* trx) /*!< in: transaction */ { - - ut_ad(trx->id != 0); - /* consider using trx->start_time if calling time() is too expensive here */ time_t now = ut_time(); @@ -1325,7 +1297,10 @@ trx_commit_in_memory( trx_sys.deregister_rw(trx); } + /* trx->id will be cleared in lock_trx_release_locks(trx). */ + ut_ad(trx->read_only || !trx->rsegs.m_redo.rseg || trx->id); lock_trx_release_locks(trx); + ut_ad(trx->id == 0); /* Remove the transaction from the list of active transactions now that it no longer holds any user locks. */ diff --git a/storage/rocksdb/build_rocksdb.cmake b/storage/rocksdb/build_rocksdb.cmake index 8f1a89d3f9f..d854bdaa03a 100644 --- a/storage/rocksdb/build_rocksdb.cmake +++ b/storage/rocksdb/build_rocksdb.cmake @@ -376,9 +376,31 @@ SET(SOURCES) FOREACH(s ${ROCKSDB_SOURCES}) list(APPEND SOURCES ${ROCKSDB_SOURCE_DIR}/${s}) ENDFOREACH() -IF(MSVC) + +if(MSVC) add_definitions(-DHAVE_SSE42 -DHAVE_PCLMUL) -ENDIF() +else() + set(CMAKE_REQUIRED_FLAGS "-msse4.2 -mpclmul ${CXX11_FLAGS}") + + CHECK_CXX_SOURCE_COMPILES(" +#include <cstdint> +#include <nmmintrin.h> +#include <wmmintrin.h> +int main() { + volatile uint32_t x = _mm_crc32_u32(0, 0); + const auto a = _mm_set_epi64x(0, 0); + const auto b = _mm_set_epi64x(0, 0); + const auto c = _mm_clmulepi64_si128(a, b, 0x00); + auto d = _mm_cvtsi128_si64(c); +} +" HAVE_SSE42) + if(HAVE_SSE42) + set_source_files_properties(${ROCKSDB_SOURCE_DIR}/util/crc32c.cc + PROPERTIES COMPILE_FLAGS "-DHAVE_SSE42 -DHAVE_PCLMUL -msse4.2 -mpclmul") + endif() + unset(CMAKE_REQUIRED_FLAGS) +endif() + IF(CMAKE_VERSION VERSION_GREATER "2.8.10") STRING(TIMESTAMP GIT_DATE_TIME "%Y-%m-%d %H:%M:%S") ENDIF() diff --git a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt index 83ed8522e72..06917181cd6 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt +++ b/storage/rocksdb/mysql-test/rocksdb/t/autoinc_debug-master.opt @@ -1 +1 @@ ---binlog-format=row +--binlog-format=row --rocksdb-flush-log-at-trx-commit=1 diff --git a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test index 1e349d0ff18..3f085269365 100644 --- a/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test +++ b/storage/rocksdb/mysql-test/rocksdb/t/bulk_load_errors.test @@ -60,6 +60,13 @@ SELECT * FROM t1; --source include/wait_until_count_sessions.inc +# Note: in MariaDB, session count will be decremented *before* +# myrocks::rocksdb_close_connection is called. This causes a race condition: +# we may grep the error log before bulk load is finalized. +# To prevent that, do a soft restart of the server (I wasnt able to find +# any other reliable way) +--source include/restart_mysqld_with_option.inc + --let SEARCH_FILE=$LOG2 --let SEARCH_PATTERN=RocksDB: Error [0-9]+ finalizing last SST file while disconnecting --source include/search_pattern_in_file.inc |