summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThirunarayanan Balathandayuthapani <thiru@mariadb.com>2022-08-06 01:16:21 +0530
committerThirunarayanan Balathandayuthapani <thiru@mariadb.com>2022-08-19 16:29:23 +0530
commitfd9d44b2b4fca5c113b0c52a0c45b7d2bb70ba4b (patch)
treefa3f7ee5442127bf1d9fc424b8716aafe4b47666
parent75c416d3627650a5b43c70a8150292990206e3e0 (diff)
downloadmariadb-git-fd9d44b2b4fca5c113b0c52a0c45b7d2bb70ba4b.tar.gz
MDEV-29250 InnoDB: Failing assertion: table->get_ref_count() == 0
Reason: ====== This issue is caused by race condition between fulltext DDL and purge thread. DDL sets the signal to stop the purge thread to process the new undo log records and wait for the ongoing processed FTS table undo log records to finish. But in dict_acquire_mdl_shared(),InnoDB release all innodb table related locks before acquiring the mdl. At the same time, DDL assumes that there are no purge threads working on fts table. There is a possiblity that purge thread can skip processing the valid undo log records if it checks purge_sys.must_wait_FTS() twice in different places. Solution: ========== Add the purge_sys.must_wait_FTS() check in dict_acquire_mdl_shared() to avoid the purge thread processing undo log records. dict_open_table_on_id(): return -1 if the purge thread has to wait dict_acquire_mdl_shared(): Added 1 new parameters to indicate that purge thread invoking the function, return -1 if the purge thread has to wait.
-rw-r--r--storage/innobase/dict/dict0dict.cc31
-rw-r--r--storage/innobase/include/dict0dict.h2
-rw-r--r--storage/innobase/row/row0purge.cc3
3 files changed, 26 insertions, 10 deletions
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index 52a7d5a4b36..ffb76de4f7b 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -662,7 +662,7 @@ dict_table_t::parse_name<>(char(&)[NAME_LEN + 1], char(&)[NAME_LEN + 1],
@param[in] table_op operation to perform when opening
@return table object after locking MDL shared
@retval nullptr if the table is not readable, or if trylock && MDL blocked */
-template<bool trylock>
+template<bool trylock, bool purge_thd>
dict_table_t*
dict_acquire_mdl_shared(dict_table_t *table,
THD *thd,
@@ -674,9 +674,11 @@ dict_acquire_mdl_shared(dict_table_t *table,
MDL_context *mdl_context= static_cast<MDL_context*>(thd_mdl_context(thd));
size_t db_len;
+ dict_table_t *not_found= nullptr;
if (trylock)
{
+ static_assert(!trylock || !purge_thd, "usage");
dict_sys.freeze(SRW_LOCK_CALL);
db_len= dict_get_db_name_len(table->name.m_name);
dict_sys.unfreeze();
@@ -748,7 +750,13 @@ retry:
}
}
+retry_table_open:
dict_sys.freeze(SRW_LOCK_CALL);
+ if (purge_thd && purge_sys.must_wait_FTS())
+ {
+ not_found= reinterpret_cast<dict_table_t*>(-1);
+ goto return_without_mdl;
+ }
table= dict_sys.find_table(table_id);
if (table)
table->acquire();
@@ -756,6 +764,11 @@ retry:
{
dict_sys.unfreeze();
dict_sys.lock(SRW_LOCK_CALL);
+ if (purge_thd && purge_sys.must_wait_FTS())
+ {
+ dict_sys.unlock();
+ goto retry_table_open;
+ }
table= dict_load_table_on_id(table_id,
table_op == DICT_TABLE_OP_LOAD_TABLESPACE
? DICT_ERR_IGNORE_RECOVER_LOCK
@@ -777,7 +790,7 @@ return_without_mdl:
mdl_context->release_lock(*mdl);
*mdl= nullptr;
}
- return nullptr;
+ return not_found;
}
size_t db1_len, tbl1_len;
@@ -814,9 +827,9 @@ return_without_mdl:
goto retry;
}
-template dict_table_t* dict_acquire_mdl_shared<false>
+template dict_table_t* dict_acquire_mdl_shared<false, false>
(dict_table_t*,THD*,MDL_ticket**,dict_table_op_t);
-template dict_table_t* dict_acquire_mdl_shared<true>
+template dict_table_t* dict_acquire_mdl_shared<true, false>
(dict_table_t*,THD*,MDL_ticket**,dict_table_op_t);
/** Look up a table by numeric identifier.
@@ -842,13 +855,14 @@ dict_table_open_on_id(table_id_t table_id, bool dict_locked,
{
if (purge_thd && purge_sys.must_wait_FTS())
{
- table= nullptr;
+ table= reinterpret_cast<dict_table_t*>(-1);
goto func_exit;
}
table->acquire();
if (thd && !dict_locked)
- table= dict_acquire_mdl_shared<false>(table, thd, mdl, table_op);
+ table= dict_acquire_mdl_shared<false, purge_thd>(
+ table, thd, mdl, table_op);
}
else if (table_op != DICT_TABLE_OP_OPEN_ONLY_IF_CACHED)
{
@@ -866,7 +880,7 @@ dict_table_open_on_id(table_id_t table_id, bool dict_locked,
if (purge_thd && purge_sys.must_wait_FTS())
{
dict_sys.unlock();
- return nullptr;
+ return reinterpret_cast<dict_table_t*>(-1);
}
table->acquire();
}
@@ -876,7 +890,8 @@ dict_table_open_on_id(table_id_t table_id, bool dict_locked,
if (table && thd)
{
dict_sys.freeze(SRW_LOCK_CALL);
- table= dict_acquire_mdl_shared<false>(table, thd, mdl, table_op);
+ table= dict_acquire_mdl_shared<false, purge_thd>(
+ table, thd, mdl, table_op);
dict_sys.unfreeze();
}
return table;
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index 29673f5bc95..cfaf4fab83e 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -132,7 +132,7 @@ enum dict_table_op_t {
@param[in] table_op operation to perform when opening
@return table object after locking MDL shared
@retval NULL if the table is not readable, or if trylock && MDL blocked */
-template<bool trylock>
+template<bool trylock, bool purge_thd= false>
dict_table_t*
dict_acquire_mdl_shared(dict_table_t *table,
THD *thd,
diff --git a/storage/innobase/row/row0purge.cc b/storage/innobase/row/row0purge.cc
index 47625b91f35..8bbb0a36144 100644
--- a/storage/innobase/row/row0purge.cc
+++ b/storage/innobase/row/row0purge.cc
@@ -941,7 +941,8 @@ try_again:
table_id, false, DICT_TABLE_OP_NORMAL, node->purge_thd,
&node->mdl_ticket);
- if (!node->table && purge_sys.must_wait_FTS()) {
+ if (node->table == reinterpret_cast<dict_table_t*>(-1)) {
+ /* purge stop signal */
goto try_again;
}