diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2020-12-11 09:05:26 +0200 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2020-12-11 09:05:26 +0200 |
commit | 8677c14e65f436db00be5aedb1f644fcffc70f7e (patch) | |
tree | 45f9212af72f0163198822f175956259318beb02 | |
parent | 0c7c449267655ed759f223067c5095d7df3665b3 (diff) | |
download | mariadb-git-8677c14e65f436db00be5aedb1f644fcffc70f7e.tar.gz |
MDEV-24391 heap-use-after-free in fil_space_t::flush_low()bb-10.5-MDEV-24391
We observed a race condition that involved two threads
executing fil_flush_file_spaces() and one thread
executing fil_delete_tablespace(). After one of the
fil_flush_file_spaces() observed that
space.needs_flush_not_stopping() is set and was
releasing the fil_system.mutex, the other fil_flush_file_spaces()
would complete the execution of fil_space_t::flush_low() on
the same tablespace. Then, fil_delete_tablespace() would
destroy the object, because the value of fil_space_t::n_pending
did not prevent that. Finally, the fil_flush_file_spaces() would
resume execution and invoke fil_space_t::flush_low() on the freed
object.
This race condition was introduced in
commit 118e258aaac5da75a2ac4556201aaea3688fac67 of MDEV-23855.
fil_space_t::flush(): Add a template parameter that indicates
whether the caller is holding a reference to prevent the
tablespace from being freed.
buf_dblwr_t::flush_buffered_writes_completed(),
row_quiesce_table_start(): Acquire a reference for the duration
of the fil_space_t::flush_low() operation. It should be impossible
for the object to be freed in these code paths, but we want to
satisfy the debug assertions.
fil_space_t::flush_low(): Do not increment or decrement the
reference count, but instead assert that the caller is holding
a reference.
fil_space_extend_must_retry(), fil_flush_file_spaces():
Acquire a reference before releasing fil_system.mutex.
This is what will fix the race condition.
-rw-r--r-- | extra/mariabackup/xtrabackup.cc | 2 | ||||
-rw-r--r-- | storage/innobase/buf/buf0dblwr.cc | 2 | ||||
-rw-r--r-- | storage/innobase/fil/fil0fil.cc | 17 | ||||
-rw-r--r-- | storage/innobase/include/fil0fil.h | 13 | ||||
-rw-r--r-- | storage/innobase/row/row0quiesce.cc | 2 |
5 files changed, 21 insertions, 15 deletions
diff --git a/extra/mariabackup/xtrabackup.cc b/extra/mariabackup/xtrabackup.cc index d44e212d58a..94b10019e1d 100644 --- a/extra/mariabackup/xtrabackup.cc +++ b/extra/mariabackup/xtrabackup.cc @@ -552,7 +552,7 @@ void CorruptedPages::zero_out_free_pages() *page_it, space_name.c_str()); } } - space->flush(); + space->flush<true>(); space->release(); } m_spaces.swap(non_free_pages); diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index 4e33bc58b53..6a4a6ced074 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -648,7 +648,7 @@ void buf_dblwr_t::flush_buffered_writes_completed(const IORequest &request) mysql_mutex_unlock(&mutex); /* Now flush the doublewrite buffer data to disk */ - fil_system.sys_space->flush(); + fil_system.sys_space->flush<false>(); /* The writes have been flushed to disk now and in recovery we will find them in the doublewrite buffer blocks. Next, write the data pages. */ diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc index b3d7aa2568b..2734d6bc68d 100644 --- a/storage/innobase/fil/fil0fil.cc +++ b/storage/innobase/fil/fil0fil.cc @@ -496,18 +496,16 @@ void fil_space_t::flush_low() { ut_ad(!mutex_own(&fil_system.mutex)); - uint32_t n= 0; - while (!n_pending.compare_exchange_strong(n, (n + 1) | NEEDS_FSYNC, + uint32_t n= 1; + while (!n_pending.compare_exchange_strong(n, n | NEEDS_FSYNC, std::memory_order_acquire, std::memory_order_relaxed)) { + ut_ad(n & PENDING); if (n & STOPPING) return; - if (!(n & NEEDS_FSYNC)) - continue; - if (acquire_low() & STOPPING) - return; - break; + if (n & NEEDS_FSYNC) + break; } fil_n_pending_tablespace_flushes++; @@ -535,7 +533,6 @@ void fil_space_t::flush_low() } clear_flush(); - release(); fil_n_pending_tablespace_flushes--; } @@ -632,8 +629,10 @@ fil_space_extend_must_retry( case TRX_SYS_SPACE: srv_sys_space.set_last_file_size(pages_in_MiB); do_flush: + space->reacquire(); mutex_exit(&fil_system.mutex); space->flush_low(); + space->release(); mutex_enter(&fil_system.mutex); break; default: @@ -3516,8 +3515,10 @@ rescan: { if (space.needs_flush_not_stopping()) { + space.reacquire(); mutex_exit(&fil_system.mutex); space.flush_low(); + space.release(); goto rescan; } } diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h index e645ce31232..79dd6933056 100644 --- a/storage/innobase/include/fil0fil.h +++ b/storage/innobase/include/fil0fil.h @@ -972,7 +972,7 @@ public: fil_io_t io(const IORequest &type, os_offset_t offset, size_t len, void *buf, buf_page_t *bpage= nullptr); /** Flush pending writes from the file system cache to the file. */ - inline void flush(); + template<bool have_reference> inline void flush(); /** Flush pending writes from the file system cache to the file. */ void flush_low(); @@ -1452,18 +1452,23 @@ inline void fil_space_t::set_stopping(bool stopping) } /** Flush pending writes from the file system cache to the file. */ -inline void fil_space_t::flush() +template<bool have_reference> inline void fil_space_t::flush() { ut_ad(!mutex_own(&fil_system.mutex)); - + ut_ad(!have_reference || (pending() & PENDING)); ut_ad(purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_IMPORT); if (srv_file_flush_method == SRV_O_DIRECT_NO_FSYNC) { ut_ad(!is_in_unflushed_spaces); ut_ad(!needs_flush()); } - else + else if (have_reference) + flush_low(); + else if (!(acquire_low() & STOPPING)) + { flush_low(); + release(); + } } /** @return the size in pages (0 if unreadable) */ diff --git a/storage/innobase/row/row0quiesce.cc b/storage/innobase/row/row0quiesce.cc index 0bdf52dfd56..8e0581dfa9b 100644 --- a/storage/innobase/row/row0quiesce.cc +++ b/storage/innobase/row/row0quiesce.cc @@ -545,7 +545,7 @@ row_quiesce_table_start( if (!trx_is_interrupted(trx)) { /* Ensure that all asynchronous IO is completed. */ os_aio_wait_until_no_pending_writes(); - table->space->flush(); + table->space->flush<false>(); if (row_quiesce_write_cfg(table, trx->mysql_thd) != DB_SUCCESS) { |