summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
Diffstat (limited to 'storage')
-rw-r--r--storage/innobase/handler/ha_innodb.cc37
-rw-r--r--storage/innobase/lock/lock0lock.cc173
-rw-r--r--storage/innobase/row/row0import.cc85
-rw-r--r--storage/innobase/row/row0merge.cc2
4 files changed, 170 insertions, 127 deletions
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 9bb1235cbbc..57d8f23b662 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -18873,23 +18873,28 @@ static void bg_wsrep_kill_trx(
if (thd) {
wsrep_thd_LOCK(thd);
- victim_trx = thd_to_trx(thd);
- lock_mutex_enter();
- trx_mutex_enter(victim_trx);
- wsrep_thd_UNLOCK(thd);
- if (victim_trx->id != arg->trx_id)
- {
- trx_mutex_exit(victim_trx);
- lock_mutex_exit();
- victim_trx = NULL;
+ victim_trx= thd_to_trx(thd);
+ /* Victim trx might not exist e.g. on MDL-conflict. */
+ if (victim_trx) {
+ lock_mutex_enter();
+ trx_mutex_enter(victim_trx);
+ if (victim_trx->id != arg->trx_id ||
+ victim_trx->state == TRX_STATE_COMMITTED_IN_MEMORY)
+ {
+ /* Victim was meanwhile rolled back or
+ committed */
+ lock_mutex_exit();
+ trx_mutex_exit(victim_trx);
+ goto no_victim;
+ }
+ } else {
+no_victim:
+ wsrep_thd_UNLOCK(thd);
+ /* find_thread_by_id() acquired THD::LOCK_kill_data */
wsrep_thd_kill_UNLOCK(thd);
+ goto ret;
}
- }
-
- if (!victim_trx) {
- /* it can happen that trx_id was meanwhile rolled back */
- DBUG_PRINT("wsrep", ("no thd for conflicting lock"));
- goto ret;
+ wsrep_thd_UNLOCK(thd);
}
WSREP_DEBUG("BF kill (" ULINTPF ", seqno: " INT64PF
@@ -19044,7 +19049,7 @@ static void bg_wsrep_kill_trx(
}
ret_awake:
- awake = true;
+ awake= true;
ret_unlock:
trx_mutex_exit(victim_trx);
diff --git a/storage/innobase/lock/lock0lock.cc b/storage/innobase/lock/lock0lock.cc
index ee57a493119..bd09f3b81ca 100644
--- a/storage/innobase/lock/lock0lock.cc
+++ b/storage/innobase/lock/lock0lock.cc
@@ -637,71 +637,81 @@ lock_rec_get_insert_intention(
return(lock->type_mode & LOCK_INSERT_INTENTION);
}
+#ifdef UNIV_DEBUG
#ifdef WITH_WSREP
-/** Check if both conflicting lock and other record lock are brute force
-(BF). This case is a bug so report lock information and wsrep state.
-@param[in] lock_rec1 conflicting waiting record lock or NULL
-@param[in] lock_rec2 other waiting record lock
-@param[in] trx1 lock_rec1 can be NULL, trx
+/** Check if both conflicting lock transaction and other transaction
+requesting record lock are brute force (BF). If they are check is
+this BF-BF wait correct and if not report BF wait and assert.
+
+@param[in] lock_rec other waiting record lock
+@param[in] trx trx requesting conflicting record lock
*/
-static void wsrep_assert_no_bf_bf_wait(
- const lock_t* lock_rec1,
- const lock_t* lock_rec2,
- const trx_t* trx1)
+static void wsrep_assert_no_bf_bf_wait(const lock_t *lock, const trx_t *trx)
{
- ut_ad(!lock_rec1 || lock_get_type_low(lock_rec1) == LOCK_REC);
- ut_ad(lock_get_type_low(lock_rec2) == LOCK_REC);
+ ut_ad(lock_get_type_low(lock) == LOCK_REC);
ut_ad(lock_mutex_own());
+ trx_t* lock_trx= lock->trx;
/* Note that we are holding lock_sys->mutex, thus we should
not acquire THD::LOCK_thd_data mutex below to avoid mutexing
order violation. */
- if (!trx1->is_wsrep() || !lock_rec2->trx->is_wsrep())
+ if (!trx->is_wsrep() || !lock_trx->is_wsrep())
return;
- if (UNIV_LIKELY(!wsrep_thd_is_BF(trx1->mysql_thd, FALSE)))
- return;
- if (UNIV_LIKELY(!wsrep_thd_is_BF(lock_rec2->trx->mysql_thd, FALSE)))
+ if (UNIV_LIKELY(!wsrep_thd_is_BF(trx->mysql_thd, FALSE))
+ || UNIV_LIKELY(!wsrep_thd_is_BF(lock_trx->mysql_thd, FALSE)))
return;
- /* if BF - BF order is honored, we can keep trx1 waiting for the lock */
- if (wsrep_trx_order_before(trx1->mysql_thd, lock_rec2->trx->mysql_thd))
+ ut_ad(trx->state == TRX_STATE_ACTIVE);
+
+ trx_mutex_enter(lock_trx);
+ const trx_state_t trx2_state= lock_trx->state;
+ trx_mutex_exit(lock_trx);
+
+ /* If transaction is already committed in memory or
+ prepared we should wait. When transaction is committed in
+ memory we held trx mutex, but not lock_sys->mutex. Therefore,
+ we could end here before transaction has time to do
+ lock_release() that is protected with lock_sys->mutex. */
+ switch (trx2_state) {
+ case TRX_STATE_COMMITTED_IN_MEMORY:
+ case TRX_STATE_PREPARED:
return;
+ case TRX_STATE_ACTIVE:
+ break;
+ default:
+ ut_ad("invalid state" == 0);
+ }
- /* avoiding BF-BF conflict assert, if victim is already aborting
- or rolling back for replaying
- */
- if (wsrep_trx_is_aborting(lock_rec2->trx->mysql_thd))
+ /* If BF - BF order is honored, i.e. trx already holding
+ record lock should be ordered before this new lock request
+ we can keep trx waiting for the lock. If conflicting
+ transaction is already aborting or rolling back for replaying
+ we can also let new transaction waiting. */
+ if (wsrep_trx_order_before(lock_trx->mysql_thd, trx->mysql_thd)
+ || wsrep_trx_is_aborting(lock_trx->mysql_thd))
return;
mtr_t mtr;
- if (lock_rec1) {
- ib::error() << "Waiting lock on table: "
- << lock_rec1->index->table->name
- << " index: "
- << lock_rec1->index->name()
- << " that has conflicting lock ";
- lock_rec_print(stderr, lock_rec1, mtr);
- }
-
ib::error() << "Conflicting lock on table: "
- << lock_rec2->index->table->name
+ << lock->index->table->name
<< " index: "
- << lock_rec2->index->name()
+ << lock->index->name()
<< " that has lock ";
- lock_rec_print(stderr, lock_rec2, mtr);
+ lock_rec_print(stderr, lock, mtr);
ib::error() << "WSREP state: ";
- wsrep_report_bf_lock_wait(trx1->mysql_thd,
- trx1->id);
- wsrep_report_bf_lock_wait(lock_rec2->trx->mysql_thd,
- lock_rec2->trx->id);
+ wsrep_report_bf_lock_wait(trx->mysql_thd,
+ trx->id);
+ wsrep_report_bf_lock_wait(lock_trx->mysql_thd,
+ lock_trx->id);
/* BF-BF wait is a bug */
ut_error;
}
#endif /* WITH_WSREP */
+#endif /* UNIV_DEBUG */
/*********************************************************************//**
Checks if a lock request for a new lock has to wait for request lock2.
@@ -824,9 +834,11 @@ lock_rec_has_to_wait(
return false;
}
- /* There should not be two conflicting locks that are
- brute force. If there is it is a bug. */
- wsrep_assert_no_bf_bf_wait(NULL, lock2, trx);
+ /* We very well can let bf to wait normally as other
+ BF will be replayed in case of conflict. For debug
+ builds we will do additional sanity checks to catch
+ unsupported bf wait if any. */
+ ut_d(wsrep_assert_no_bf_bf_wait(lock2, trx));
#endif /* WITH_WSREP */
return true;
@@ -1095,66 +1107,35 @@ lock_rec_other_has_expl_req(
#endif /* UNIV_DEBUG */
#ifdef WITH_WSREP
-static
-void
-wsrep_kill_victim(
-/*==============*/
- const trx_t * const trx,
- const lock_t *lock)
+static void wsrep_kill_victim(const trx_t * const trx, const lock_t *lock)
{
ut_ad(lock_mutex_own());
- ut_ad(trx_mutex_own(lock->trx));
+ ut_ad(trx->is_wsrep());
+ trx_t* lock_trx = lock->trx;
+ ut_ad(trx_mutex_own(lock_trx));
+ ut_ad(lock_trx != trx);
- /* quit for native mysql */
- if (!trx->is_wsrep()) return;
+ if (!wsrep_thd_is_BF(trx->mysql_thd, FALSE))
+ return;
- my_bool bf_this = wsrep_thd_is_BF(trx->mysql_thd, FALSE);
- my_bool bf_other = wsrep_thd_is_BF(lock->trx->mysql_thd, FALSE);
- mtr_t mtr;
+ if (lock_trx->state == TRX_STATE_COMMITTED_IN_MEMORY
+ || lock_trx->lock.was_chosen_as_deadlock_victim)
+ return;
- if ((bf_this && !bf_other) ||
- (bf_this && bf_other && wsrep_trx_order_before(
- trx->mysql_thd, lock->trx->mysql_thd))) {
+ my_bool bf_other = wsrep_thd_is_BF(lock_trx->mysql_thd, FALSE);
- if (lock->trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
- if (UNIV_UNLIKELY(wsrep_debug)) {
- ib::info() << "WSREP: BF victim waiting\n";
- }
+ if (!bf_other
+ || wsrep_trx_order_before(trx->mysql_thd,
+ lock_trx->mysql_thd)) {
+
+ if (lock_trx->lock.que_state == TRX_QUE_LOCK_WAIT) {
+ if (UNIV_UNLIKELY(wsrep_debug))
+ WSREP_INFO("BF victim waiting");
/* cannot release lock, until our lock
is in the queue*/
- } else if (lock->trx != trx) {
- if (wsrep_log_conflicts) {
- if (bf_this) {
- ib::info() << "*** Priority TRANSACTION:";
- } else {
- ib::info() << "*** Victim TRANSACTION:";
- }
-
- trx_print_latched(stderr, trx, 3000);
-
- if (bf_other) {
- ib::info() << "*** Priority TRANSACTION:";
- } else {
- ib::info() << "*** Victim TRANSACTION:";
- }
- trx_print_latched(stderr, lock->trx, 3000);
-
- ib::info() << "*** WAITING FOR THIS LOCK TO BE GRANTED:";
-
- if (lock_get_type(lock) == LOCK_REC) {
- lock_rec_print(stderr, lock, mtr);
- } else {
- lock_table_print(stderr, lock);
- }
-
- ib::info() << " SQL1: "
- << wsrep_thd_query(trx->mysql_thd);
- ib::info() << " SQL2: "
- << wsrep_thd_query(lock->trx->mysql_thd);
- }
-
- wsrep_innobase_kill_one_trx(trx->mysql_thd,
- trx, lock->trx, TRUE);
+ } else {
+ wsrep_innobase_kill_one_trx(trx->mysql_thd, trx,
+ lock_trx, true);
}
}
}
@@ -2248,10 +2229,6 @@ static void lock_rec_dequeue_from_page(lock_t* in_lock)
/* Grant the lock */
ut_ad(lock->trx != in_lock->trx);
lock_grant(lock);
-#ifdef WITH_WSREP
- } else {
- wsrep_assert_no_bf_bf_wait(c, lock, c->trx);
-#endif /* WITH_WSREP */
}
}
} else {
@@ -4204,10 +4181,6 @@ released:
/* Grant the lock */
ut_ad(trx != lock->trx);
lock_grant(lock);
-#ifdef WITH_WSREP
- } else {
- wsrep_assert_no_bf_bf_wait(c, lock, c->trx);
-#endif /* WITH_WSREP */
}
}
} else {
diff --git a/storage/innobase/row/row0import.cc b/storage/innobase/row/row0import.cc
index 7b888cdecc1..0f9e309accb 100644
--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2020, MariaDB Corporation.
+Copyright (c) 2015, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -3370,6 +3370,57 @@ struct fil_iterator_t {
byte* crypt_io_buffer; /*!< IO buffer when encrypted */
};
+
+/** InnoDB writes page by page when there is page compressed
+tablespace involved. It does help to save the disk space when
+punch hole is enabled
+@param iter Tablespace iterator
+@param write_request Request to write into the file
+@param offset offset of the file to be written
+@param writeptr buffer to be written
+@param n_bytes number of bytes to be written
+@param try_punch_only Try the range punch only because the
+ current range is full of empty pages
+@return DB_SUCCESS */
+static
+dberr_t fil_import_compress_fwrite(const fil_iterator_t &iter,
+ const IORequest &write_request,
+ os_offset_t offset,
+ const byte *writeptr,
+ ulint n_bytes,
+ bool try_punch_only=false)
+{
+ dberr_t err= os_file_punch_hole(iter.file, offset, n_bytes);
+ if (err != DB_SUCCESS || try_punch_only)
+ return err;
+
+ for (ulint j= 0; j < n_bytes; j+= srv_page_size)
+ {
+ /* Read the original data length from block and
+ safer to read FIL_PAGE_COMPRESSED_SIZE because it
+ is not encrypted*/
+ ulint n_write_bytes= srv_page_size;
+ if (j || offset)
+ {
+ n_write_bytes= mach_read_from_2(writeptr + j + FIL_PAGE_DATA);
+ const unsigned ptype= mach_read_from_2(writeptr + j + FIL_PAGE_TYPE);
+ /* Ignore the empty page */
+ if (ptype == 0 && n_write_bytes == 0)
+ continue;
+ n_write_bytes+= FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE;
+ if (ptype == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED)
+ n_write_bytes+= FIL_PAGE_COMPRESSION_METHOD_SIZE;
+ }
+
+ err= os_file_write(write_request, iter.filepath, iter.file,
+ writeptr + j, offset + j, n_write_bytes);
+ if (err != DB_SUCCESS)
+ break;
+ }
+
+ return err;
+}
+
/********************************************************************//**
TODO: This can be made parallel trivially by chunking up the file and creating
a callback per thread. . Main benefit will be to use multiple CPUs for
@@ -3411,7 +3462,10 @@ fil_iterate(
/* TODO: For ROW_FORMAT=COMPRESSED tables we do a lot of useless
copying for non-index pages. Unfortunately, it is
required by buf_zip_decompress() */
- dberr_t err = DB_SUCCESS;
+ dberr_t err = DB_SUCCESS;
+ bool page_compressed = false;
+ bool punch_hole = true;
+ IORequest write_request(IORequest::WRITE);
for (offset = iter.start; offset < iter.end; offset += n_bytes) {
if (callback.is_interrupted()) {
@@ -3489,9 +3543,8 @@ page_corrupted:
goto func_exit;
}
- const bool page_compressed
- = fil_page_is_compressed_encrypted(src)
- || fil_page_is_compressed(src);
+ page_compressed= fil_page_is_compressed_encrypted(src)
+ || fil_page_is_compressed(src);
if (page_compressed && block->page.zip.data) {
goto page_corrupted;
@@ -3646,13 +3699,23 @@ not_encrypted:
}
}
- /* A page was updated in the set, write back to disk. */
- if (updated) {
- IORequest write_request(IORequest::WRITE);
+ if (page_compressed && punch_hole) {
+ err = fil_import_compress_fwrite(
+ iter, write_request, offset, writeptr, n_bytes,
+ !updated);
- err = os_file_write(write_request,
- iter.filepath, iter.file,
- writeptr, offset, n_bytes);
+ if (err != DB_SUCCESS) {
+ punch_hole = false;
+ if (updated) {
+ goto normal_write;
+ }
+ }
+ } else if (updated) {
+ /* A page was updated in the set, write back to disk. */
+normal_write:
+ err = os_file_write(
+ write_request, iter.filepath, iter.file,
+ writeptr, offset, n_bytes);
if (err != DB_SUCCESS) {
goto func_exit;
diff --git a/storage/innobase/row/row0merge.cc b/storage/innobase/row/row0merge.cc
index 7a9c2cd9240..0638465527c 100644
--- a/storage/innobase/row/row0merge.cc
+++ b/storage/innobase/row/row0merge.cc
@@ -3900,6 +3900,8 @@ row_merge_drop_indexes(
ut_ad(prev);
ut_a(table->fts);
fts_drop_index(table, index, trx);
+ row_merge_drop_index_dict(
+ trx, index->id);
/* We can remove a DICT_FTS
index from the cache, because
we do not allow ADD FULLTEXT INDEX