diff options
Diffstat (limited to 'storage/innobase/buf/buf0dblwr.cc')
-rw-r--r-- | storage/innobase/buf/buf0dblwr.cc | 200 |
1 files changed, 121 insertions, 79 deletions
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc index fb853fe1543..2b2483fde6d 100644 --- a/storage/innobase/buf/buf0dblwr.cc +++ b/storage/innobase/buf/buf0dblwr.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -38,11 +38,6 @@ Created 2011/12/19 #ifndef UNIV_HOTBACKUP -/** Time in milliseconds that we sleep when unable to find a slot in -the doublewrite buffer or when we have to wait for a running batch -to end. */ -#define TRX_DOUBLEWRITE_BATCH_POLL_DELAY 10000 - #ifdef UNIV_PFS_MUTEX /* Key to register the mutex with performance schema */ UNIV_INTERN mysql_pfs_key_t buf_dblwr_mutex_key; @@ -104,6 +99,25 @@ buf_dblwr_get( return(buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE); } +/********************************************************************//** +Flush a batch of writes to the datafiles that have already been +written to the dblwr buffer on disk. */ +UNIV_INLINE +void +buf_dblwr_sync_datafiles() +/*======================*/ +{ + /* Wake possible simulated aio thread to actually post the + writes to the operating system */ + os_aio_simulated_wake_handler_threads(); + + /* Wait that all async writes to tablespaces have been posted to + the OS */ + os_aio_wait_until_no_pending_writes(); + + /* Now we flush the data to disk (for example, with fsync) */ + fil_flush_file_spaces(FIL_TABLESPACE); +} /****************************************************************//** Creates or initialializes the doublewrite buffer at a database start. */ @@ -131,6 +145,8 @@ buf_dblwr_init( mutex_create(buf_dblwr_mutex_key, &buf_dblwr->mutex, SYNC_DOUBLEWRITE); + buf_dblwr->b_event = os_event_create(); + buf_dblwr->s_event = os_event_create(); buf_dblwr->first_free = 0; buf_dblwr->s_reserved = 0; buf_dblwr->b_reserved = 0; @@ -140,8 +156,8 @@ buf_dblwr_init( buf_dblwr->block2 = mach_read_from_4( doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2); - buf_dblwr->in_use = static_cast<ibool*>( - mem_zalloc(buf_size * sizeof(ibool))); + buf_dblwr->in_use = static_cast<bool*>( + mem_zalloc(buf_size * sizeof(bool))); buf_dblwr->write_buf_unaligned = static_cast<byte*>( ut_malloc((1 + buf_size) * UNIV_PAGE_SIZE)); @@ -365,7 +381,7 @@ buf_dblwr_init_or_restore_pages( /* Read the trx sys header to check if we are using the doublewrite buffer */ - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0, + fil_io(OS_FILE_READ, true, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0, UNIV_PAGE_SIZE, read_buf, NULL); doublewrite = read_buf + TRX_SYS_DOUBLEWRITE; @@ -400,10 +416,10 @@ buf_dblwr_init_or_restore_pages( /* Read the pages from the doublewrite buffer to memory */ - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0, + fil_io(OS_FILE_READ, true, TRX_SYS_SPACE, 0, block1, 0, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE, buf, NULL); - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0, + fil_io(OS_FILE_READ, true, TRX_SYS_SPACE, 0, block2, 0, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE, buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE, NULL); @@ -433,7 +449,7 @@ buf_dblwr_init_or_restore_pages( + i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; } - fil_io(OS_FILE_WRITE, TRUE, 0, 0, source_page_no, 0, + fil_io(OS_FILE_WRITE, true, 0, 0, source_page_no, 0, UNIV_PAGE_SIZE, page, NULL); } else { @@ -473,7 +489,7 @@ buf_dblwr_init_or_restore_pages( ulint zip_size = fil_space_get_zip_size(space_id); /* Read in the actual page from the file */ - fil_io(OS_FILE_READ, TRUE, space_id, zip_size, + fil_io(OS_FILE_READ, true, space_id, zip_size, page_no, 0, zip_size ? zip_size : UNIV_PAGE_SIZE, read_buf, NULL); @@ -525,7 +541,7 @@ buf_dblwr_init_or_restore_pages( doublewrite buffer to the intended position */ - fil_io(OS_FILE_WRITE, TRUE, space_id, + fil_io(OS_FILE_WRITE, true, space_id, zip_size, page_no, 0, zip_size ? zip_size : UNIV_PAGE_SIZE, page, NULL); @@ -557,6 +573,8 @@ buf_dblwr_free(void) ut_ad(buf_dblwr->s_reserved == 0); ut_ad(buf_dblwr->b_reserved == 0); + os_event_free(buf_dblwr->b_event); + os_event_free(buf_dblwr->s_event); ut_free(buf_dblwr->write_buf_unaligned); buf_dblwr->write_buf_unaligned = NULL; @@ -572,38 +590,68 @@ buf_dblwr_free(void) } /********************************************************************//** -Updates the doublewrite buffer when an IO request that is part of an -LRU or flush batch is completed. */ +Updates the doublewrite buffer when an IO request is completed. */ UNIV_INTERN void -buf_dblwr_update(void) -/*==================*/ +buf_dblwr_update( +/*=============*/ + const buf_page_t* bpage, /*!< in: buffer block descriptor */ + buf_flush_t flush_type)/*!< in: flush type */ { if (!srv_use_doublewrite_buf || buf_dblwr == NULL) { return; } - mutex_enter(&buf_dblwr->mutex); + switch (flush_type) { + case BUF_FLUSH_LIST: + case BUF_FLUSH_LRU: + mutex_enter(&buf_dblwr->mutex); - ut_ad(buf_dblwr->batch_running); - ut_ad(buf_dblwr->b_reserved > 0); - ut_ad(buf_dblwr->b_reserved <= buf_dblwr->first_free); + ut_ad(buf_dblwr->batch_running); + ut_ad(buf_dblwr->b_reserved > 0); + ut_ad(buf_dblwr->b_reserved <= buf_dblwr->first_free); - buf_dblwr->b_reserved--; - if (buf_dblwr->b_reserved == 0) { + buf_dblwr->b_reserved--; + + if (buf_dblwr->b_reserved == 0) { + mutex_exit(&buf_dblwr->mutex); + /* This will finish the batch. Sync data files + to the disk. */ + fil_flush_file_spaces(FIL_TABLESPACE); + mutex_enter(&buf_dblwr->mutex); + + /* We can now reuse the doublewrite memory buffer: */ + buf_dblwr->first_free = 0; + buf_dblwr->batch_running = false; + os_event_set(buf_dblwr->b_event); + } mutex_exit(&buf_dblwr->mutex); - /* This will finish the batch. Sync data files - to the disk. */ - fil_flush_file_spaces(FIL_TABLESPACE); - mutex_enter(&buf_dblwr->mutex); + break; + case BUF_FLUSH_SINGLE_PAGE: + { + const ulint size = 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; + ulint i; + mutex_enter(&buf_dblwr->mutex); + for (i = srv_doublewrite_batch_size; i < size; ++i) { + if (buf_dblwr->buf_block_arr[i] == bpage) { + buf_dblwr->s_reserved--; + buf_dblwr->buf_block_arr[i] = NULL; + buf_dblwr->in_use[i] = false; + break; + } + } - /* We can now reuse the doublewrite memory buffer: */ - buf_dblwr->first_free = 0; - buf_dblwr->batch_running = FALSE; + /* The block we are looking for must exist as a + reserved block. */ + ut_a(i < size); + } + os_event_set(buf_dblwr->s_event); + mutex_exit(&buf_dblwr->mutex); + break; + case BUF_FLUSH_N_TYPES: + ut_error; } - - mutex_exit(&buf_dblwr->mutex); } /********************************************************************//** @@ -698,18 +746,19 @@ static void buf_dblwr_write_block_to_datafile( /*==============================*/ - const buf_page_t* bpage) /*!< in: page to write */ + const buf_page_t* bpage, /*!< in: page to write */ + bool sync) /*!< in: true if sync IO + is requested */ { ut_a(bpage); ut_a(buf_page_in_file(bpage)); - /* Increment the counter of I/O operations used - for selecting LRU policy. */ - buf_LRU_stat_inc_io(); + const ulint flags = sync + ? OS_FILE_WRITE + : OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER; if (bpage->zip.data) { - fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER, - FALSE, buf_page_get_space(bpage), + fil_io(flags, sync, buf_page_get_space(bpage), buf_page_get_zip_size(bpage), buf_page_get_page_no(bpage), 0, buf_page_get_zip_size(bpage), @@ -724,8 +773,7 @@ buf_dblwr_write_block_to_datafile( ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); buf_dblwr_check_page_lsn(block->frame); - fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER, - FALSE, buf_block_get_space(block), 0, + fil_io(flags, sync, buf_block_get_space(block), 0, buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE, (void*) block->frame, (void*) block); } @@ -747,12 +795,12 @@ buf_dblwr_flush_buffered_writes(void) if (!srv_use_doublewrite_buf || buf_dblwr == NULL) { /* Sync the writes to the disk. */ - buf_flush_sync_datafiles(); + buf_dblwr_sync_datafiles(); return; } try_again: - mutex_enter(&(buf_dblwr->mutex)); + mutex_enter(&buf_dblwr->mutex); /* Write first to doublewrite buffer blocks. We use synchronous aio and thus know that file write has been completed when the @@ -760,17 +808,18 @@ try_again: if (buf_dblwr->first_free == 0) { - mutex_exit(&(buf_dblwr->mutex)); + mutex_exit(&buf_dblwr->mutex); return; } if (buf_dblwr->batch_running) { - mutex_exit(&buf_dblwr->mutex); - /* Another thread is running the batch right now. Wait for it to finish. */ - os_thread_sleep(TRX_DOUBLEWRITE_BATCH_POLL_DELAY); + ib_int64_t sig_count = os_event_reset(buf_dblwr->b_event); + mutex_exit(&buf_dblwr->mutex); + + os_event_wait_low(buf_dblwr->b_event, sig_count); goto try_again; } @@ -779,7 +828,7 @@ try_again: /* Disallow anyone else to post to doublewrite buffer or to start another batch of flushing. */ - buf_dblwr->batch_running = TRUE; + buf_dblwr->batch_running = true; first_free = buf_dblwr->first_free; /* Now safe to release the mutex. Note that though no other @@ -818,7 +867,7 @@ try_again: len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE, buf_dblwr->first_free) * UNIV_PAGE_SIZE; - fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0, + fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, buf_dblwr->block1, 0, len, (void*) write_buf, NULL); @@ -834,7 +883,7 @@ try_again: write_buf = buf_dblwr->write_buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE; - fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0, + fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, buf_dblwr->block2, 0, len, (void*) write_buf, NULL); @@ -864,7 +913,7 @@ flush: ut_ad(first_free == buf_dblwr->first_free); for (ulint i = 0; i < first_free; i++) { buf_dblwr_write_block_to_datafile( - buf_dblwr->buf_block_arr[i]); + buf_dblwr->buf_block_arr[i], false); } /* Wake possible simulated aio thread to actually post the @@ -889,12 +938,11 @@ buf_dblwr_add_to_batch( ut_a(buf_page_in_file(bpage)); try_again: - mutex_enter(&(buf_dblwr->mutex)); + mutex_enter(&buf_dblwr->mutex); ut_a(buf_dblwr->first_free <= srv_doublewrite_batch_size); if (buf_dblwr->batch_running) { - mutex_exit(&buf_dblwr->mutex); /* This not nearly as bad as it looks. There is only page_cleaner thread which does background flushing @@ -902,7 +950,10 @@ try_again: point. The only exception is when a user thread is forced to do a flush batch because of a sync checkpoint. */ - os_thread_sleep(TRX_DOUBLEWRITE_BATCH_POLL_DELAY); + ib_int64_t sig_count = os_event_reset(buf_dblwr->b_event); + mutex_exit(&buf_dblwr->mutex); + + os_event_wait_low(buf_dblwr->b_event, sig_count); goto try_again; } @@ -967,7 +1018,8 @@ UNIV_INTERN void buf_dblwr_write_single_page( /*========================*/ - buf_page_t* bpage) /*!< in: buffer block to write */ + buf_page_t* bpage, /*!< in: buffer block to write */ + bool sync) /*!< in: true if sync IO requested */ { ulint n_slots; ulint size; @@ -1004,11 +1056,12 @@ retry: mutex_enter(&buf_dblwr->mutex); if (buf_dblwr->s_reserved == n_slots) { + /* All slots are reserved. */ + ib_int64_t sig_count = + os_event_reset(buf_dblwr->s_event); mutex_exit(&buf_dblwr->mutex); - /* All slots are reserved. Since it involves two IOs - during the processing a sleep of 10ms should be - enough. */ - os_thread_sleep(TRX_DOUBLEWRITE_BATCH_POLL_DELAY); + os_event_wait_low(buf_dblwr->s_event, sig_count); + goto retry; } @@ -1021,9 +1074,14 @@ retry: /* We are guaranteed to find a slot. */ ut_a(i < size); - buf_dblwr->in_use[i] = TRUE; + buf_dblwr->in_use[i] = true; buf_dblwr->s_reserved++; buf_dblwr->buf_block_arr[i] = bpage; + + /* increment the doublewrite flushed pages counter */ + srv_stats.dblwr_pages_written.inc(); + srv_stats.dblwr_writes.inc(); + mutex_exit(&buf_dblwr->mutex); /* Lets see if we are going to write in the first or second @@ -1053,14 +1111,14 @@ retry: memset(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i + zip_size, 0, UNIV_PAGE_SIZE - zip_size); - fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0, + fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, offset, 0, UNIV_PAGE_SIZE, (void*) (buf_dblwr->write_buf + UNIV_PAGE_SIZE * i), NULL); } else { /* It is a regular page. Write it directly to the doublewrite buffer */ - fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0, + fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0, offset, 0, UNIV_PAGE_SIZE, (void*) ((buf_block_t*) bpage)->frame, NULL); @@ -1072,22 +1130,6 @@ retry: /* We know that the write has been flushed to disk now and during recovery we will find it in the doublewrite buffer blocks. Next do the write to the intended position. */ - buf_dblwr_write_block_to_datafile(bpage); - - /* Sync the writes to the disk. */ - buf_flush_sync_datafiles(); - - mutex_enter(&buf_dblwr->mutex); - - buf_dblwr->s_reserved--; - buf_dblwr->buf_block_arr[i] = NULL; - buf_dblwr->in_use[i] = FALSE; - - /* increment the doublewrite flushed pages counter */ - srv_stats.dblwr_pages_written.inc(); - srv_stats.dblwr_writes.inc(); - - mutex_exit(&(buf_dblwr->mutex)); - + buf_dblwr_write_block_to_datafile(bpage, sync); } #endif /* !UNIV_HOTBACKUP */ |