diff options
author | Marko Mäkelä <marko.makela@mariadb.com> | 2020-10-23 12:29:11 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@mariadb.com> | 2020-10-26 17:53:54 +0200 |
commit | ef3f71fa7435f092dfce36d606cf22332218dd8b (patch) | |
tree | f0f1bde9c8ecdaf02e708ec0ea4c1d42c11878d7 | |
parent | 8cb01c51fbd3bfebfbcadea78a30a6f18df46598 (diff) | |
download | mariadb-git-ef3f71fa7435f092dfce36d606cf22332218dd8b.tar.gz |
MDEV-23399 fixup: Interleaved doublewrite batches
Author: Vladislav Vaintroub
-rw-r--r-- | storage/innobase/buf/buf0dblwr.cc | 97 | ||||
-rw-r--r-- | storage/innobase/include/buf0dblwr.h | 42 |
2 files changed, 77 insertions, 62 deletions
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index eb460af2de2..81c934faf4f 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -53,8 +53,8 @@ inline buf_block_t *buf_dblwr_trx_sys_get(mtr_t *mtr) @param header doublewrite page header in the TRX_SYS page */ inline void buf_dblwr_t::init(const byte *header) { - ut_ad(!first_free); - ut_ad(!reserved); + ut_ad(!active_slot->first_free); + ut_ad(!active_slot->reserved); ut_ad(!batch_running); mysql_mutex_init(buf_dblwr_mutex_key, &mutex, nullptr); @@ -63,10 +63,14 @@ inline void buf_dblwr_t::init(const byte *header) block2= page_id_t(0, mach_read_from_4(header + TRX_SYS_DOUBLEWRITE_BLOCK2)); const uint32_t buf_size= 2 * block_size(); - write_buf= static_cast<byte*>(aligned_malloc(buf_size << srv_page_size_shift, - srv_page_size)); - buf_block_arr= static_cast<element*> - (ut_zalloc_nokey(buf_size * sizeof(element))); + for (int i= 0; i < 2; i++) + { + slots[i].write_buf= static_cast<byte*> + (aligned_malloc(buf_size << srv_page_size_shift, srv_page_size)); + slots[i].buf_block_arr= static_cast<element*> + (ut_zalloc_nokey(buf_size * sizeof(element))); + } + active_slot= &slots[0]; } /** Create or restore the doublewrite buffer in the TRX_SYS page. @@ -272,6 +276,7 @@ func_exit: TRX_SYS_DOUBLEWRITE + read_buf) != TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N; + auto write_buf= active_slot->write_buf; /* Read the pages from the doublewrite buffer to memory */ err= os_file_read(IORequestRead, file, write_buf, block1.page_no() << srv_page_size_shift, @@ -443,16 +448,20 @@ void buf_dblwr_t::close() return; /* Free the double write data structures. */ - ut_ad(!reserved); - ut_ad(!first_free); + ut_ad(!active_slot->reserved); + ut_ad(!active_slot->first_free); ut_ad(!batch_running); mysql_cond_destroy(&cond); - aligned_free(write_buf); - ut_free(buf_block_arr); + for (int i= 0; i < 2; i++) + { + aligned_free(slots[i].write_buf); + ut_free(slots[i].buf_block_arr); + } mysql_mutex_destroy(&mutex); memset((void*) this, 0, sizeof *this); + active_slot= &slots[0]; } /** Update the doublewrite buffer on write completion. */ @@ -466,10 +475,11 @@ void buf_dblwr_t::write_completed() mysql_mutex_lock(&mutex); ut_ad(batch_running); - ut_ad(reserved); - ut_ad(reserved <= first_free); + slot *flush_slot= active_slot == &slots[0] ? &slots[1] : &slots[0]; + ut_ad(flush_slot->reserved); + ut_ad(flush_slot->reserved <= flush_slot->first_free); - if (!--reserved) + if (!--flush_slot->reserved) { mysql_mutex_unlock(&mutex); /* This will finish the batch. Sync data files to the disk. */ @@ -477,7 +487,7 @@ void buf_dblwr_t::write_completed() mysql_mutex_lock(&mutex); /* We can now reuse the doublewrite memory buffer: */ - first_free= 0; + flush_slot->first_free= 0; batch_running= false; mysql_cond_broadcast(&cond); } @@ -552,25 +562,30 @@ bool buf_dblwr_t::flush_buffered_writes(const ulint size) for (;;) { - if (!first_free) + if (!active_slot->first_free) return false; if (!batch_running) break; mysql_cond_wait(&cond, &mutex); } - ut_ad(reserved == first_free); - /* Disallow anyone else to post to doublewrite buffer or to - start another batch of flushing. */ + ut_ad(active_slot->reserved == active_slot->first_free); + + /* Disallow anyone else to start another batch of flushing. */ + slot *flush_slot= active_slot; + /* Switch the active slot */ + active_slot= active_slot == &slots[0] ? &slots[1] : &slots[0]; + ut_a(active_slot->first_free == 0); batch_running= true; - const ulint old_first_free= first_free; + const ulint old_first_free= flush_slot->first_free; + auto write_buf= flush_slot->write_buf; /* Now safe to release the mutex. */ mysql_mutex_unlock(&mutex); #ifdef UNIV_DEBUG for (ulint len2= 0, i= 0; i < old_first_free; len2 += srv_page_size, i++) { - buf_page_t *bpage= buf_block_arr[i].request.bpage; + buf_page_t *bpage= flush_slot->buf_block_arr[i].request.bpage; if (bpage->zip.data) /* No simple validate for ROW_FORMAT=COMPRESSED pages exists. */ @@ -602,7 +617,7 @@ bool buf_dblwr_t::flush_buffered_writes(const ulint size) } /* increment the doublewrite flushed pages counter */ - srv_stats.dblwr_pages_written.add(first_free); + srv_stats.dblwr_pages_written.add(flush_slot->first_free); srv_stats.dblwr_writes.inc(); /* Now flush the doublewrite buffer data to disk */ @@ -612,20 +627,13 @@ bool buf_dblwr_t::flush_buffered_writes(const ulint size) and in recovery we will find them in the doublewrite buffer blocks. Next do the writes to the intended positions. */ - /* Up to this point old_first_free == first_free because we have set - the batch_running flag disallowing any other thread to post any - request but we can't safely access first_free in the loop below. - This is so because it is possible that after we are done with the - last iteration and before we terminate the loop, the batch gets - finished in the IO helper thread and another thread posts a new - batch setting first_free to a higher value. If this happens and we - are using first_free in the loop termination condition then we'll - end up dispatching the same block twice from two different - threads. */ - ut_ad(old_first_free == first_free); + + ut_ad(active_slot != flush_slot); + ut_ad(flush_slot->first_free == old_first_free); + for (ulint i= 0; i < old_first_free; i++) { - auto e= buf_block_arr[i]; + auto e= flush_slot->buf_block_arr[i]; buf_page_t* bpage= e.request.bpage; ut_ad(bpage->in_file()); @@ -696,18 +704,15 @@ void buf_dblwr_t::add_to_batch(fil_space_t *space, const IORequest &request, for (;;) { - while (batch_running) - mysql_cond_wait(&cond, &mutex); - - ut_ad(first_free <= buf_size); - if (first_free != buf_size) + ut_ad(active_slot->first_free <= buf_size); + if (active_slot->first_free != buf_size) break; if (flush_buffered_writes(buf_size / 2)) mysql_mutex_lock(&mutex); } - byte *p= write_buf + srv_page_size * first_free; + byte *p= active_slot->write_buf + srv_page_size * active_slot->first_free; /* We request frame here to get correct buffer in case of encryption and/or page compression */ @@ -715,11 +720,13 @@ void buf_dblwr_t::add_to_batch(fil_space_t *space, const IORequest &request, memcpy_aligned<OS_FILE_LOG_BLOCK_SIZE>(p, frame, size); ut_ad(!request.bpage->zip_size() || request.bpage->zip_size() == size); - ut_ad(reserved == first_free); - ut_ad(reserved < buf_size); - new (buf_block_arr + first_free++) element{space, request, size}; - reserved= first_free; - - if (first_free != buf_size || !flush_buffered_writes(buf_size / 2)) + ut_ad(active_slot->reserved == active_slot->first_free); + ut_ad(active_slot->reserved < buf_size); + new (active_slot->buf_block_arr + active_slot->first_free++) + element{space, request, size}; + active_slot->reserved= active_slot->first_free; + + if (active_slot->first_free != buf_size || + !flush_buffered_writes(buf_size / 2)) mysql_mutex_unlock(&mutex); } diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h index aac4715250d..0216c3025c2 100644 --- a/storage/innobase/include/buf0dblwr.h +++ b/storage/innobase/include/buf0dblwr.h @@ -32,6 +32,29 @@ Created 2011/12/19 Inaam Rana /** Doublewrite control struct */ class buf_dblwr_t { + struct element + { + /** tablespace */ + fil_space_t *space; + /** asynchronous write request */ + IORequest request; + /** payload size in bytes */ + size_t size; + }; + + struct slot + { + /** first free position in write_buf measured in units of + * srv_page_size */ + ulint first_free; + /** number of slots reserved for the current write batch */ + ulint reserved; + /** the doublewrite buffer, aligned to srv_page_size */ + byte* write_buf; + /** buffer blocks to be written via write_buf */ + element* buf_block_arr; + }; + /** the page number of the first doublewrite block (block_size() pages) */ page_id_t block1= page_id_t(0, 0); /** the page number of the second doublewrite block (block_size() pages) */ @@ -43,25 +66,10 @@ class buf_dblwr_t mysql_cond_t cond; /** whether a batch is being written from the doublewrite buffer */ bool batch_running; - /** first free position in write_buf measured in units of srv_page_size */ - ulint first_free; - /** number of slots reserved for the current write batch */ - ulint reserved; - /** the doublewrite buffer, aligned to srv_page_size */ - byte *write_buf; - struct element - { - /** tablespace */ - fil_space_t *space; - /** asynchronous write request */ - IORequest request; - /** payload size in bytes */ - size_t size; - }; + slot slots[2]; + slot *active_slot=&slots[0]; - /** buffer blocks to be written via write_buf */ - element *buf_block_arr; /** Initialize the doublewrite buffer data structure. @param header doublewrite page header in the TRX_SYS page */ |