diff options
Diffstat (limited to 'storage')
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 37 | ||||
-rw-r--r-- | storage/innobase/lock/lock0lock.cc | 173 | ||||
-rw-r--r-- | storage/innobase/row/row0import.cc | 85 | ||||
-rw-r--r-- | storage/innobase/row/row0merge.cc | 2 |
4 files changed, 170 insertions, 127 deletions
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 9bb1235cbbc..57d8f23b662 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -18873,23 +18873,28 @@ static void bg_wsrep_kill_trx( if (thd) { wsrep_thd_LOCK(thd); - victim_trx = thd_to_trx(thd); - lock_mutex_enter(); - trx_mutex_enter(victim_trx); - wsrep_thd_UNLOCK(thd); - if (victim_trx->id != arg->trx_id) - { - trx_mutex_exit(victim_trx); - lock_mutex_exit(); - victim_trx = NULL; + victim_trx= thd_to_trx(thd); + /* Victim trx might not exist e.g. on MDL-conflict. */ + if (victim_trx) { + lock_mutex_enter(); + trx_mutex_enter(victim_trx); + if (victim_trx->id != arg->trx_id || + victim_trx->state == TRX_STATE_COMMITTED_IN_MEMORY) + { + /* Victim was meanwhile rolled back or + committed */ + lock_mutex_exit(); + trx_mutex_exit(victim_trx); + goto no_victim; + } + } else { +no_victim: + wsrep_thd_UNLOCK(thd); + /* find_thread_by_id() acquired THD::LOCK_kill_data */ wsrep_thd_kill_UNLOCK(thd); + goto ret; } - } - - if (!victim_trx) { - /* it can happen that trx_id was meanwhile rolled back */ - DBUG_PRINT("wsrep", ("no thd for conflicting lock")); - goto ret; + wsrep_thd_UNLOCK(thd); } WSREP_DEBUG("BF kill (" ULINTPF ", seqno: " INT64PF @@ -19044,7 +19049,7 @@ static void bg_wsrep_kill_trx( } ret_awake: - awake = true; + awake= true; ret_unlock: trx_mutex_exit(victim_trx); diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc index ee57a493119..bd09f3b81ca 100644 --- a/storage/innobase/lock/lock0lock.cc +++ b/storage/innobase/lock/lock0lock.cc @@ -637,71 +637,81 @@ lock_rec_get_insert_intention( return(lock->type_mode & LOCK_INSERT_INTENTION); } +#ifdef UNIV_DEBUG #ifdef WITH_WSREP -/** Check if both conflicting lock and other record lock are brute force -(BF). This case is a bug so report lock information and wsrep state. -@param[in] lock_rec1 conflicting waiting record lock or NULL -@param[in] lock_rec2 other waiting record lock -@param[in] trx1 lock_rec1 can be NULL, trx +/** Check if both conflicting lock transaction and other transaction +requesting record lock are brute force (BF). If they are check is +this BF-BF wait correct and if not report BF wait and assert. + +@param[in] lock_rec other waiting record lock +@param[in] trx trx requesting conflicting record lock */ -static void wsrep_assert_no_bf_bf_wait( - const lock_t* lock_rec1, - const lock_t* lock_rec2, - const trx_t* trx1) +static void wsrep_assert_no_bf_bf_wait(const lock_t *lock, const trx_t *trx) { - ut_ad(!lock_rec1 || lock_get_type_low(lock_rec1) == LOCK_REC); - ut_ad(lock_get_type_low(lock_rec2) == LOCK_REC); + ut_ad(lock_get_type_low(lock) == LOCK_REC); ut_ad(lock_mutex_own()); + trx_t* lock_trx= lock->trx; /* Note that we are holding lock_sys->mutex, thus we should not acquire THD::LOCK_thd_data mutex below to avoid mutexing order violation. */ - if (!trx1->is_wsrep() || !lock_rec2->trx->is_wsrep()) + if (!trx->is_wsrep() || !lock_trx->is_wsrep()) return; - if (UNIV_LIKELY(!wsrep_thd_is_BF(trx1->mysql_thd, FALSE))) - return; - if (UNIV_LIKELY(!wsrep_thd_is_BF(lock_rec2->trx->mysql_thd, FALSE))) + if (UNIV_LIKELY(!wsrep_thd_is_BF(trx->mysql_thd, FALSE)) + || UNIV_LIKELY(!wsrep_thd_is_BF(lock_trx->mysql_thd, FALSE))) return; - /* if BF - BF order is honored, we can keep trx1 waiting for the lock */ - if (wsrep_trx_order_before(trx1->mysql_thd, lock_rec2->trx->mysql_thd)) + ut_ad(trx->state == TRX_STATE_ACTIVE); + + trx_mutex_enter(lock_trx); + const trx_state_t trx2_state= lock_trx->state; + trx_mutex_exit(lock_trx); + + /* If transaction is already committed in memory or + prepared we should wait. When transaction is committed in + memory we held trx mutex, but not lock_sys->mutex. Therefore, + we could end here before transaction has time to do + lock_release() that is protected with lock_sys->mutex. */ + switch (trx2_state) { + case TRX_STATE_COMMITTED_IN_MEMORY: + case TRX_STATE_PREPARED: return; + case TRX_STATE_ACTIVE: + break; + default: + ut_ad("invalid state" == 0); + } - /* avoiding BF-BF conflict assert, if victim is already aborting - or rolling back for replaying - */ - if (wsrep_trx_is_aborting(lock_rec2->trx->mysql_thd)) + /* If BF - BF order is honored, i.e. trx already holding + record lock should be ordered before this new lock request + we can keep trx waiting for the lock. If conflicting + transaction is already aborting or rolling back for replaying + we can also let new transaction waiting. */ + if (wsrep_trx_order_before(lock_trx->mysql_thd, trx->mysql_thd) + || wsrep_trx_is_aborting(lock_trx->mysql_thd)) return; mtr_t mtr; - if (lock_rec1) { - ib::error() << "Waiting lock on table: " - << lock_rec1->index->table->name - << " index: " - << lock_rec1->index->name() - << " that has conflicting lock "; - lock_rec_print(stderr, lock_rec1, mtr); - } - ib::error() << "Conflicting lock on table: " - << lock_rec2->index->table->name + << lock->index->table->name << " index: " - << lock_rec2->index->name() + << lock->index->name() << " that has lock "; - lock_rec_print(stderr, lock_rec2, mtr); + lock_rec_print(stderr, lock, mtr); ib::error() << "WSREP state: "; - wsrep_report_bf_lock_wait(trx1->mysql_thd, - trx1->id); - wsrep_report_bf_lock_wait(lock_rec2->trx->mysql_thd, - lock_rec2->trx->id); + wsrep_report_bf_lock_wait(trx->mysql_thd, + trx->id); + wsrep_report_bf_lock_wait(lock_trx->mysql_thd, + lock_trx->id); /* BF-BF wait is a bug */ ut_error; } #endif /* WITH_WSREP */ +#endif /* UNIV_DEBUG */ /*********************************************************************//** Checks if a lock request for a new lock has to wait for request lock2. @@ -824,9 +834,11 @@ lock_rec_has_to_wait( return false; } - /* There should not be two conflicting locks that are - brute force. If there is it is a bug. */ - wsrep_assert_no_bf_bf_wait(NULL, lock2, trx); + /* We very well can let bf to wait normally as other + BF will be replayed in case of conflict. For debug + builds we will do additional sanity checks to catch + unsupported bf wait if any. */ + ut_d(wsrep_assert_no_bf_bf_wait(lock2, trx)); #endif /* WITH_WSREP */ return true; @@ -1095,66 +1107,35 @@ lock_rec_other_has_expl_req( #endif /* UNIV_DEBUG */ #ifdef WITH_WSREP -static -void -wsrep_kill_victim( -/*==============*/ - const trx_t * const trx, - const lock_t *lock) +static void wsrep_kill_victim(const trx_t * const trx, const lock_t *lock) { ut_ad(lock_mutex_own()); - ut_ad(trx_mutex_own(lock->trx)); + ut_ad(trx->is_wsrep()); + trx_t* lock_trx = lock->trx; + ut_ad(trx_mutex_own(lock_trx)); + ut_ad(lock_trx != trx); - /* quit for native mysql */ - if (!trx->is_wsrep()) return; + if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE)) + return; - my_bool bf_this = wsrep_thd_is_BF(trx->mysql_thd, FALSE); - my_bool bf_other = wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE); - mtr_t mtr; + if (lock_trx->state == TRX_STATE_COMMITTED_IN_MEMORY + || lock_trx->lock.was_chosen_as_deadlock_victim) + return; - if ((bf_this && !bf_other) || - (bf_this && bf_other && wsrep_trx_order_before( - trx->mysql_thd, lock->trx->mysql_thd))) { + my_bool bf_other = wsrep_thd_is_BF(lock_trx->mysql_thd, FALSE); - if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) { - if (UNIV_UNLIKELY(wsrep_debug)) { - ib::info() << "WSREP: BF victim waiting\n"; - } + if (!bf_other + || wsrep_trx_order_before(trx->mysql_thd, + lock_trx->mysql_thd)) { + + if (lock_trx->lock.que_state == TRX_QUE_LOCK_WAIT) { + if (UNIV_UNLIKELY(wsrep_debug)) + WSREP_INFO("BF victim waiting"); /* cannot release lock, until our lock is in the queue*/ - } else if (lock->trx != trx) { - if (wsrep_log_conflicts) { - if (bf_this) { - ib::info() << "*** Priority TRANSACTION:"; - } else { - ib::info() << "*** Victim TRANSACTION:"; - } - - trx_print_latched(stderr, trx, 3000); - - if (bf_other) { - ib::info() << "*** Priority TRANSACTION:"; - } else { - ib::info() << "*** Victim TRANSACTION:"; - } - trx_print_latched(stderr, lock->trx, 3000); - - ib::info() << "*** WAITING FOR THIS LOCK TO BE GRANTED:"; - - if (lock_get_type(lock) == LOCK_REC) { - lock_rec_print(stderr, lock, mtr); - } else { - lock_table_print(stderr, lock); - } - - ib::info() << " SQL1: " - << wsrep_thd_query(trx->mysql_thd); - ib::info() << " SQL2: " - << wsrep_thd_query(lock->trx->mysql_thd); - } - - wsrep_innobase_kill_one_trx(trx->mysql_thd, - trx, lock->trx, TRUE); + } else { + wsrep_innobase_kill_one_trx(trx->mysql_thd, trx, + lock_trx, true); } } } @@ -2248,10 +2229,6 @@ static void lock_rec_dequeue_from_page(lock_t* in_lock) /* Grant the lock */ ut_ad(lock->trx != in_lock->trx); lock_grant(lock); -#ifdef WITH_WSREP - } else { - wsrep_assert_no_bf_bf_wait(c, lock, c->trx); -#endif /* WITH_WSREP */ } } } else { @@ -4204,10 +4181,6 @@ released: /* Grant the lock */ ut_ad(trx != lock->trx); lock_grant(lock); -#ifdef WITH_WSREP - } else { - wsrep_assert_no_bf_bf_wait(c, lock, c->trx); -#endif /* WITH_WSREP */ } } } else { diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc index 7b888cdecc1..0f9e309accb 100644 --- a/storage/innobase/row/row0import.cc +++ b/storage/innobase/row/row0import.cc @@ -1,7 +1,7 @@ /***************************************************************************** Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved. -Copyright (c) 2015, 2020, MariaDB Corporation. +Copyright (c) 2015, 2021, MariaDB Corporation. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -3370,6 +3370,57 @@ struct fil_iterator_t { byte* crypt_io_buffer; /*!< IO buffer when encrypted */ }; + +/** InnoDB writes page by page when there is page compressed +tablespace involved. It does help to save the disk space when +punch hole is enabled +@param iter Tablespace iterator +@param write_request Request to write into the file +@param offset offset of the file to be written +@param writeptr buffer to be written +@param n_bytes number of bytes to be written +@param try_punch_only Try the range punch only because the + current range is full of empty pages +@return DB_SUCCESS */ +static +dberr_t fil_import_compress_fwrite(const fil_iterator_t &iter, + const IORequest &write_request, + os_offset_t offset, + const byte *writeptr, + ulint n_bytes, + bool try_punch_only=false) +{ + dberr_t err= os_file_punch_hole(iter.file, offset, n_bytes); + if (err != DB_SUCCESS || try_punch_only) + return err; + + for (ulint j= 0; j < n_bytes; j+= srv_page_size) + { + /* Read the original data length from block and + safer to read FIL_PAGE_COMPRESSED_SIZE because it + is not encrypted*/ + ulint n_write_bytes= srv_page_size; + if (j || offset) + { + n_write_bytes= mach_read_from_2(writeptr + j + FIL_PAGE_DATA); + const unsigned ptype= mach_read_from_2(writeptr + j + FIL_PAGE_TYPE); + /* Ignore the empty page */ + if (ptype == 0 && n_write_bytes == 0) + continue; + n_write_bytes+= FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE; + if (ptype == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED) + n_write_bytes+= FIL_PAGE_COMPRESSION_METHOD_SIZE; + } + + err= os_file_write(write_request, iter.filepath, iter.file, + writeptr + j, offset + j, n_write_bytes); + if (err != DB_SUCCESS) + break; + } + + return err; +} + /********************************************************************//** TODO: This can be made parallel trivially by chunking up the file and creating a callback per thread. . Main benefit will be to use multiple CPUs for @@ -3411,7 +3462,10 @@ fil_iterate( /* TODO: For ROW_FORMAT=COMPRESSED tables we do a lot of useless copying for non-index pages. Unfortunately, it is required by buf_zip_decompress() */ - dberr_t err = DB_SUCCESS; + dberr_t err = DB_SUCCESS; + bool page_compressed = false; + bool punch_hole = true; + IORequest write_request(IORequest::WRITE); for (offset = iter.start; offset < iter.end; offset += n_bytes) { if (callback.is_interrupted()) { @@ -3489,9 +3543,8 @@ page_corrupted: goto func_exit; } - const bool page_compressed - = fil_page_is_compressed_encrypted(src) - || fil_page_is_compressed(src); + page_compressed= fil_page_is_compressed_encrypted(src) + || fil_page_is_compressed(src); if (page_compressed && block->page.zip.data) { goto page_corrupted; @@ -3646,13 +3699,23 @@ not_encrypted: } } - /* A page was updated in the set, write back to disk. */ - if (updated) { - IORequest write_request(IORequest::WRITE); + if (page_compressed && punch_hole) { + err = fil_import_compress_fwrite( + iter, write_request, offset, writeptr, n_bytes, + !updated); - err = os_file_write(write_request, - iter.filepath, iter.file, - writeptr, offset, n_bytes); + if (err != DB_SUCCESS) { + punch_hole = false; + if (updated) { + goto normal_write; + } + } + } else if (updated) { + /* A page was updated in the set, write back to disk. */ +normal_write: + err = os_file_write( + write_request, iter.filepath, iter.file, + writeptr, offset, n_bytes); if (err != DB_SUCCESS) { goto func_exit; diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc index 7a9c2cd9240..0638465527c 100644 --- a/storage/innobase/row/row0merge.cc +++ b/storage/innobase/row/row0merge.cc @@ -3900,6 +3900,8 @@ row_merge_drop_indexes( ut_ad(prev); ut_a(table->fts); fts_drop_index(table, index, trx); + row_merge_drop_index_dict( + trx, index->id); /* We can remove a DICT_FTS index from the cache, because we do not allow ADD FULLTEXT INDEX |