summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEugene Kosov <eugene.kosov@mariadb.com>2020-05-29 01:24:50 +0300
committerEugene Kosov <eugene.kosov@mariadb.com>2020-06-10 16:59:07 +0300
commit264a98eaa0a783e1ce76d18b9105ae00dc11098b (patch)
tree389b0b4777fcd049eb9b578ea30fd04d5132a8f2
parentaf194ab5bd538567ef735cfc374ac328052105ed (diff)
downloadmariadb-git-264a98eaa0a783e1ce76d18b9105ae00dc11098b.tar.gz
MDEV-8069 DROP or rebuild of a large table may lock up InnoDB
Problematic mutex is dict_sys.mutex. Idea of the patch: unlink() fd under that mutex while it's still open. This way unlink() will be fast and actual file removal will happen on close(). And close() will be called outside of dict_sys.mutex. This should be safe against crash which may happen between unlink() and close(): file will be removed by OS anyway. The same applies to both *nix and Windows. I created and removed a 4G file on some NVMe SSD on ext4: write(3, "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1"..., 1048576) = 1048576 <0.000519> fdatasync(3) = 0 <3.533763> close(3) = 0 <0.000011> unlink("file") = 0 <0.411563> write(3, "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1"..., 1048576) = 1048576 <0.000520> fdatasync(3) = 0 <3.544938> unlink("file") = 0 <0.000029> close(3) = 0 <0.407057> Such systems can benefit of this patch. fil_node_t::detach(): closes fil_node_t but not file handle, returns that file handle fil_node_t::prepare_to_close_or_deatch(): 'closes' fil_node_t fil_node_t:close_to_free(): new argument detach_handle fil_system_t::detach(): now can detach file handles fil_delete_tablespace(): now can detach file handles row_drop_table_for_mysql(): performs actual file removal
-rw-r--r--storage/innobase/fil/fil0fil.cc86
-rw-r--r--storage/innobase/include/fil0fil.h25
-rw-r--r--storage/innobase/row/row0mysql.cc10
3 files changed, 89 insertions, 32 deletions
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 7c561dd05ce..5ba5b0f703e 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -511,32 +511,41 @@ fail:
/** Close the file handle. */
void fil_node_t::close()
{
- bool ret;
+ prepare_to_close_or_detach();
- ut_ad(mutex_own(&fil_system.mutex));
- ut_a(is_open());
- ut_a(n_pending == 0);
- ut_a(n_pending_flushes == 0);
- ut_a(!being_extended);
- ut_a(!needs_flush
- || space->purpose == FIL_TYPE_TEMPORARY
- || srv_fast_shutdown == 2
- || !srv_was_started);
+ /* printf("Closing file %s\n", name); */
+ int ret= os_file_close(handle);
+ ut_a(ret);
+ handle= OS_FILE_CLOSED;
+}
- ret = os_file_close(handle);
- ut_a(ret);
+pfs_os_file_t fil_node_t::detach()
+{
+ prepare_to_close_or_detach();
- /* printf("Closing file %s\n", name); */
+ pfs_os_file_t result= handle;
+ handle= OS_FILE_CLOSED;
+ return result;
+}
- handle = OS_FILE_CLOSED;
- ut_ad(!is_open());
- ut_a(fil_system.n_open > 0);
- fil_system.n_open--;
+void fil_node_t::prepare_to_close_or_detach()
+{
+ ut_ad(mutex_own(&fil_system.mutex));
+ ut_a(is_open());
+ ut_a(n_pending == 0);
+ ut_a(n_pending_flushes == 0);
+ ut_a(!being_extended);
+ ut_a(!needs_flush || space->purpose == FIL_TYPE_TEMPORARY ||
+ srv_fast_shutdown == 2 || !srv_was_started);
- if (fil_space_belongs_in_lru(space)) {
- ut_a(UT_LIST_GET_LEN(fil_system.LRU) > 0);
- UT_LIST_REMOVE(fil_system.LRU, this);
- }
+ ut_a(fil_system.n_open > 0);
+ fil_system.n_open--;
+
+ if (fil_space_belongs_in_lru(space))
+ {
+ ut_a(UT_LIST_GET_LEN(fil_system.LRU) > 0);
+ UT_LIST_REMOVE(fil_system.LRU, this);
+ }
}
/** Tries to close a file in the LRU list. The caller must hold the fil_sys
@@ -917,7 +926,7 @@ fil_space_extend(
}
/** Prepare to free a file from fil_system. */
-inline void fil_node_t::close_to_free()
+pfs_os_file_t fil_node_t::close_to_free(bool detach_handle)
{
ut_ad(mutex_own(&fil_system.mutex));
ut_a(magic_n == FIL_NODE_MAGIC_N);
@@ -958,15 +967,24 @@ inline void fil_node_t::close_to_free()
}
ut_a(!n_pending_flushes);
ut_a(!being_extended);
+ if (detach_handle)
+ {
+ auto result= handle;
+ handle= OS_FILE_CLOSED;
+ return result;
+ }
bool ret= os_file_close(handle);
ut_a(ret);
handle= OS_FILE_CLOSED;
break;
}
+
+ return OS_FILE_CLOSED;
}
/** Detach a tablespace from the cache and close the files. */
-inline void fil_system_t::detach(fil_space_t *space)
+std::vector<pfs_os_file_t> fil_system_t::detach(fil_space_t *space,
+ bool detach_handle)
{
ut_ad(mutex_own(&fil_system.mutex));
HASH_DELETE(fil_space_t, hash, spaces, space->id, space);
@@ -1000,9 +1018,18 @@ inline void fil_system_t::detach(fil_space_t *space)
n_open--;
}
+ std::vector<pfs_os_file_t> handles;
+ handles.reserve(UT_LIST_GET_LEN(space->chain));
+
for (fil_node_t* node= UT_LIST_GET_FIRST(space->chain); node;
node= UT_LIST_GET_NEXT(chain, node))
- node->close_to_free();
+ {
+ auto handle= node->close_to_free(detach_handle);
+ if (handle != OS_FILE_CLOSED)
+ handles.push_back(handle);
+ }
+
+ return handles;
}
/** Free a tablespace object on which fil_system_t::detach() was invoked.
@@ -2180,11 +2207,14 @@ bool fil_table_accessible(const dict_table_t* table)
/** Delete a tablespace and associated .ibd file.
@param[in] id tablespace identifier
@param[in] if_exists whether to ignore missing tablespace
+@param[in,out] detached_handles return detached handles if not nullptr
@return DB_SUCCESS or error */
-dberr_t fil_delete_tablespace(ulint id, bool if_exists)
+dberr_t fil_delete_tablespace(ulint id, bool if_exists,
+ std::vector<pfs_os_file_t>* detached_handles)
{
char* path = NULL;
ut_ad(!is_system_tablespace(id));
+ ut_ad(!detached_handles || detached_handles->empty());
dberr_t err;
fil_space_t *space = fil_check_pending_operations(id, false, &path);
@@ -2260,7 +2290,11 @@ dberr_t fil_delete_tablespace(ulint id, bool if_exists)
ut_a(s == space);
ut_a(!space->referenced());
ut_a(UT_LIST_GET_LEN(space->chain) == 1);
- fil_system.detach(space);
+ auto handles = fil_system.detach(space,
+ detached_handles != nullptr);
+ if (detached_handles) {
+ *detached_handles = std::move(handles);
+ }
mutex_exit(&fil_system.mutex);
log_mutex_enter();
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index bfacd0cbd2a..62228db822f 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -653,11 +653,19 @@ struct fil_node_t {
/** Close the file handle. */
void close();
- /** Prepare to free a file from fil_system. */
- inline void close_to_free();
+ /** Same as close() but returns file handle instead of closing it. */
+ pfs_os_file_t detach() MY_ATTRIBUTE((warn_unused_result));
+ /** Prepare to free a file from fil_system.
+ @param detach_handle whether to detach instead of closing a handle
+ @return detached handle or OS_FILE_CLOSED */
+ pfs_os_file_t close_to_free(bool detach_handle= false);
/** Update the data structures on I/O completion */
inline void complete_io(bool write= false);
+
+private:
+ /** Does stuff common for close() and detach() */
+ void prepare_to_close_or_detach();
};
/** Value of fil_node_t::magic_n */
@@ -973,8 +981,12 @@ public:
}
#endif
public:
- /** Detach a tablespace from the cache and close the files. */
- inline void detach(fil_space_t *space);
+ /** Detach a tablespace from the cache and close the files.
+ @param space tablespace
+ @param detach_handle whether to detach or close handles
+ @return detached handles or empty vector */
+ std::vector<pfs_os_file_t> detach(fil_space_t *space,
+ bool detach_handle= false);
ib_mutex_t mutex; /*!< The mutex protecting the cache */
fil_space_t* sys_space; /*!< The innodb_system tablespace */
@@ -1285,8 +1297,11 @@ bool fil_table_accessible(const dict_table_t* table)
/** Delete a tablespace and associated .ibd file.
@param[in] id tablespace identifier
@param[in] if_exists whether to ignore missing tablespace
+@param[out] leaked_handles return detached handles here
@return DB_SUCCESS or error */
-dberr_t fil_delete_tablespace(ulint id, bool if_exists= false);
+dberr_t
+fil_delete_tablespace(ulint id, bool if_exists= false,
+ std::vector<pfs_os_file_t> *detached_handles= nullptr);
/** Prepare to truncate an undo tablespace.
@param[in] space_id undo tablespace id
diff --git a/storage/innobase/row/row0mysql.cc b/storage/innobase/row/row0mysql.cc
index eacb718211a..298c2a3c879 100644
--- a/storage/innobase/row/row0mysql.cc
+++ b/storage/innobase/row/row0mysql.cc
@@ -3374,6 +3374,8 @@ row_drop_table_for_mysql(
DBUG_RETURN(DB_TABLE_NOT_FOUND);
}
+ std::vector<pfs_os_file_t> detached_handles;
+
const bool is_temp_name = strstr(table->name.m_name,
"/" TEMP_FILE_PREFIX);
@@ -3757,7 +3759,8 @@ do_drop:
ut_ad(!filepath);
if (space->id != TRX_SYS_SPACE) {
- err = fil_delete_tablespace(space->id);
+ err = fil_delete_tablespace(space->id, false,
+ &detached_handles);
}
break;
@@ -3837,6 +3840,11 @@ funct_exit_all_freed:
row_mysql_unlock_data_dictionary(trx);
}
+ for (const auto& handle : detached_handles) {
+ ut_ad(handle != OS_FILE_CLOSED);
+ os_file_close(handle);
+ }
+
trx->op_info = "";
srv_wake_master_thread();