diff options
Diffstat (limited to 'storage')
-rw-r--r-- | storage/innobase/handler/ha_innodb.cc | 22 | ||||
-rw-r--r-- | storage/innobase/include/log0log.h | 7 | ||||
-rw-r--r-- | storage/innobase/include/srv0srv.h | 8 | ||||
-rw-r--r-- | storage/innobase/log/log0log.c | 20 | ||||
-rw-r--r-- | storage/innobase/srv/srv0srv.c | 207 |
5 files changed, 221 insertions, 43 deletions
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc index 887acacbd1f..673c48b037b 100644 --- a/storage/innobase/handler/ha_innodb.cc +++ b/storage/innobase/handler/ha_innodb.cc @@ -133,6 +133,13 @@ static my_bool innobase_adaptive_hash_index = TRUE; static char* internal_innobase_data_file_path = NULL; +/* Default number of IO per second supported by server. Tunes background + IO rate. */ +static long innobase_io_capacity = 100; + +/* Write dirty pages when pct dirty is less than max pct dirty */ +static my_bool innobase_extra_dirty_writes = TRUE; + /* The following counter is used to convey information to InnoDB about server activity: in selects it is not sensible to call srv_active_wake_master_thread after each fetch or search, we only do @@ -1586,6 +1593,9 @@ innobase_init( #endif /* UNIV_LOG_ARCHIVE */ srv_log_buffer_size = (ulint) innobase_log_buffer_size; + srv_io_capacity = (ulint) innobase_io_capacity; + srv_extra_dirty_writes = (ulint) innobase_extra_dirty_writes; + /* We set srv_pool_size here in units of 1 kB. InnoDB internally changes the value so that it becomes the number of database pages. */ @@ -8010,6 +8020,16 @@ static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite, "Disable with --skip-innodb-doublewrite.", NULL, NULL, TRUE); +static MYSQL_SYSVAR_BOOL(extra_dirty_writes, innobase_extra_dirty_writes, + PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, + "Flush dirty buffer pages when dirty max pct is not exceeded", + NULL, NULL, TRUE); + +static MYSQL_SYSVAR_LONG(io_capacity, innobase_io_capacity, + PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, + "Number of IOPs the server can do. Tunes the background IO rate", + NULL, NULL, 100, 100, ~0L, 0); + static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown, PLUGIN_VAR_OPCMDARG, "Speeds up the shutdown process of the InnoDB storage engine. Possible " @@ -8225,6 +8245,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= { MYSQL_SYSVAR(thread_concurrency), MYSQL_SYSVAR(thread_sleep_delay), MYSQL_SYSVAR(autoinc_lock_mode), + MYSQL_SYSVAR(extra_dirty_writes), + MYSQL_SYSVAR(io_capacity), NULL }; diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h index 43662d02a34..337b9f1e783 100644 --- a/storage/innobase/include/log0log.h +++ b/storage/innobase/include/log0log.h @@ -169,6 +169,13 @@ void log_buffer_flush_to_disk(void); /*==========================*/ /******************************************************************** +Flushes the log buffer. Forces it to disk depending on the value of +the configuration parameter innodb_flush_log_at_trx_commit. */ + +void +log_buffer_flush_maybe_sync(void); +/*==========================*/ +/******************************************************************** Advances the smallest lsn for which there are unflushed dirty blocks in the buffer pool and also may make a new checkpoint. NOTE: this function may only be called if the calling thread owns no synchronization objects! */ diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 91daa6816b2..2f44c3408be 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -91,6 +91,14 @@ extern ulint srv_lock_table_size; extern ulint srv_n_file_io_threads; +/* Number of IO operations per second the server can do */ +extern ulint srv_io_capacity; + +/* Flush dirty pages when below max dirty percent */ +extern ibool srv_extra_dirty_writes; + + + #ifdef UNIV_LOG_ARCHIVE extern ibool srv_log_archive_on; extern ibool srv_archive_recovery; diff --git a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c index b10c348b24d..7eb5e3db422 100644 --- a/storage/innobase/log/log0log.c +++ b/storage/innobase/log/log0log.c @@ -1517,6 +1517,26 @@ log_buffer_flush_to_disk(void) } /******************************************************************** +Flush the log buffer. Force it to disk depending on the value of +innodb_flush_log_at_trx_commit. */ + +void +log_buffer_flush_maybe_sync(void) +/*==========================*/ +{ + dulint lsn; + + mutex_enter(&(log_sys->mutex)); + + lsn = log_sys->lsn; + + mutex_exit(&(log_sys->mutex)); + + /* Force log buffer to disk when innodb_flush_log_at_trx_commit = 1. */ + log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, + srv_flush_log_at_trx_commit == 1 ? TRUE : FALSE); +} +/******************************************************************** Tries to establish a big enough margin of free space in the log buffer, such that a new log entry can be catenated without an immediate need for a flush. */ static diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index 773b5d583e0..55555fe35f0 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -171,6 +171,12 @@ ulint srv_awe_window_size = 0; /* size in pages; MySQL inits ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */ ulint srv_lock_table_size = ULINT_MAX; +ulint srv_io_capacity = ULINT_MAX; /* Number of IO operations per + second the server can do */ + +ibool srv_extra_dirty_writes = TRUE; /* Write dirty pages to disk when pct + dirty < max dirty pct */ + ulint srv_n_file_io_threads = ULINT_MAX; #ifdef UNIV_LOG_ARCHIVE @@ -411,6 +417,30 @@ FILE* srv_misc_tmpfile; ulint srv_main_thread_process_no = 0; ulint srv_main_thread_id = 0; +// The following count work done by srv_master_thread. + +// Iterations by the 'once per second' loop. +ulint srv_main_1_second_loops = 0; +// Calls to sleep by the 'once per second' loop. +ulint srv_main_sleeps = 0; +// Iterations by the 'once per 10 seconds' loop. +ulint srv_main_10_second_loops = 0; +// Iterations of the loop bounded by the 'background_loop' label. +ulint srv_main_background_loops = 0; +// Iterations of the loop bounded by the 'flush_loop' label. +ulint srv_main_flush_loops = 0; +// Calls to log_buffer_flush_to_disk. +ulint srv_sync_flush = 0; +// Calls to log_buffer_flush_maybe_sync. +ulint srv_async_flush = 0; + +// Number of microseconds threads wait because of +// innodb_thread_concurrency +static ib_longlong srv_thread_wait_mics = 0; + +// Number of microseconds for spinlock delay +static ib_longlong srv_timed_spin_delay = 0; + /* IMPLEMENTATION OF THE SERVER MAIN PROGRAM ========================================= @@ -630,6 +660,65 @@ are indexed by the type of the thread. */ ulint srv_n_threads_active[SRV_MASTER + 1]; ulint srv_n_threads[SRV_MASTER + 1]; +static void srv_reset_free_tickets(trx_t* trx); + +/************************************************************************* +Return the difference in microseconds between 'end' and 'start' +*/ +static ib_longlong mics_diff(ulint start_sec, ulint start_usec, + ulint end_sec, ulint end_usec) +{ + ib_longlong end_mics = end_sec * 1000000LL + end_usec; + ib_longlong start_mics = start_sec * 1000000LL + start_usec; + + if (end_mics > start_mics) + return end_mics - start_mics; + else + return 0; +} + +static void time_spin_delay() +{ + ulint start_sec, end_sec; + ulint start_usec, end_usec; + int i; + + srv_timed_spin_delay = 0; + + ut_usectime(&start_sec, &start_usec); + + for (i = 0; i < SYNC_SPIN_ROUNDS; ++i) + ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); + + ut_usectime(&end_sec, &end_usec); + + srv_timed_spin_delay = mics_diff(start_sec, start_usec, end_sec, end_usec); +} + +/************************************************************************* +Prints counters for work done by srv_master_thread. */ + +static +void +srv_print_extra( +/*===================*/ + FILE *file) /* in: output stream */ +{ + fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, " + "%lu 10_second, %lu background, %lu flush\n", + srv_main_1_second_loops, srv_main_sleeps, + srv_main_10_second_loops, srv_main_background_loops, + srv_main_flush_loops); + fprintf(file, "srv_master_thread log flush: %lu sync, %lu async\n", + srv_sync_flush, srv_async_flush); + fprintf(file, "srv_wait_thread_mics %lld microseconds, %.1f seconds\n", + srv_thread_wait_mics, + (double) srv_thread_wait_mics / 1000000.0); + fprintf(file, + "spinlock delay for %d delay %d rounds is %lld mics\n", + srv_spin_wait_delay, SYNC_SPIN_ROUNDS, srv_timed_spin_delay); +} + /************************************************************************* Sets the info describing an i/o thread current state. */ @@ -863,6 +952,8 @@ srv_init(void) dict_table_t* table; ulint i; + time_spin_delay(); + srv_sys = mem_alloc(sizeof(srv_sys_t)); kernel_mutex_temp = mem_alloc(sizeof(mutex_t)); @@ -1646,6 +1737,11 @@ srv_printf_innodb_monitor( "Per second averages calculated from the last %lu seconds\n", (ulong)time_elapsed); + fputs("----------\n" + "BACKGROUND THREAD\n" + "----------\n", file); + srv_print_extra(file); + fputs("----------\n" "SEMAPHORES\n" "----------\n", file); @@ -1667,24 +1763,6 @@ srv_printf_innodb_monitor( mutex_exit(&dict_foreign_err_mutex); - lock_print_info_summary(file); - if (trx_start) { - long t = ftell(file); - if (t < 0) { - *trx_start = ULINT_UNDEFINED; - } else { - *trx_start = (ulint) t; - } - } - lock_print_info_all_transactions(file); - if (trx_end) { - long t = ftell(file); - if (t < 0) { - *trx_end = ULINT_UNDEFINED; - } else { - *trx_end = (ulint) t; - } - } fputs("--------\n" "FILE I/O\n" "--------\n", file); @@ -2187,6 +2265,14 @@ srv_wake_master_thread(void) } /************************************************************************* +Returns the number of IO operations that is X percent of the capacity. + +PCT_IO(5) -> returns the number of IO operations that is 5% of the max +where max is srv_io_capacity. +*/ +#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0))) + +/************************************************************************* The master thread controlling the server. */ os_thread_ret_t @@ -2217,6 +2303,9 @@ srv_master_thread( fprintf(stderr, "Master thread starts, id %lu\n", os_thread_pf(os_thread_get_curr_id())); #endif + fprintf(stderr, "InnoDB master thread running with io_capacity %lu\n", + srv_io_capacity); + srv_main_thread_process_no = os_proc_get_number(); srv_main_thread_id = os_thread_pf(os_thread_get_curr_id()); @@ -2258,10 +2347,12 @@ loop: n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read + buf_pool->n_pages_written; srv_main_thread_op_info = "sleeping"; + srv_main_1_second_loops++; if (!skip_sleep) { os_thread_sleep(1000000); + srv_main_sleeps++; } skip_sleep = FALSE; @@ -2287,27 +2378,28 @@ loop: srv_main_thread_op_info = "flushing log"; log_buffer_flush_to_disk(); + srv_sync_flush++; srv_main_thread_op_info = "making checkpoint"; log_free_check(); - /* If there were less than 5 i/os during the - one second sleep, we assume that there is free - disk i/o capacity available, and it makes sense to - do an insert buffer merge. */ + /* If i/os during one second sleep were less than 5% of + capacity, we assume that there is free disk i/o capacity + available, and it makes sense to do an insert buffer merge. */ n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes; n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + buf_pool->n_pages_written; - if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) { + if (n_pend_ios < PCT_IO(3) && (n_ios - n_ios_old < PCT_IO(5))) { srv_main_thread_op_info = "doing insert buffer merge"; - ibuf_contract_for_n_pages( - TRUE, srv_insert_buffer_batch_size / 4); + ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4); srv_main_thread_op_info = "flushing log"; - log_buffer_flush_to_disk(); + /* No fsync when srv_flush_log_at_trx_commit != 1 */ + log_buffer_flush_maybe_sync(); + srv_async_flush++; } if (UNIV_UNLIKELY(buf_get_modified_ratio_pct() @@ -2316,7 +2408,8 @@ loop: /* Try to keep the number of modified pages in the buffer pool under the limit wished by the user */ - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, + PCT_IO(100), ut_dulint_max); /* If we had to do the flush, it may have taken @@ -2344,30 +2437,40 @@ loop: seconds */ mem_validate_all_blocks(); #endif - /* If there were less than 200 i/os during the 10 second period, - we assume that there is free disk i/o capacity available, and it - makes sense to flush 100 pages. */ + /* If i/os during the 10 second period were less than 200% of + capacity, we assume that there is free disk i/o capacity + available, and it makes sense to flush srv_io_capacity pages. + + Note that this is done regardless of the fraction of dirty + pages relative to the max requested by the user. The one second + loop above requests writes for that case. The writes done here + are not required, and may be disabled. */ n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes; n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + buf_pool->n_pages_written; - if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) { + if (srv_extra_dirty_writes && + n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) { srv_main_thread_op_info = "flushing buffer pool pages"; - buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); + buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max); srv_main_thread_op_info = "flushing log"; - log_buffer_flush_to_disk(); + /* No fsync when srv_flush_log_at_trx_commit != 1 */ + log_buffer_flush_maybe_sync(); + srv_async_flush++; } /* We run a batch of insert buffer merge every 10 seconds, even if the server were active */ srv_main_thread_op_info = "doing insert buffer merge"; - ibuf_contract_for_n_pages(TRUE, srv_insert_buffer_batch_size / 4); + ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4); srv_main_thread_op_info = "flushing log"; - log_buffer_flush_to_disk(); + /* No fsync when srv_flush_log_at_trx_commit != 1 */ + log_buffer_flush_maybe_sync(); + srv_async_flush++; /* We run a full purge every 10 seconds, even if the server were active */ @@ -2393,6 +2496,7 @@ loop: log_buffer_flush_to_disk(); last_flush_time = current_time; + srv_sync_flush++; } } @@ -2406,14 +2510,16 @@ loop: (> 70 %), we assume we can afford reserving the disk(s) for the time it requires to flush 100 pages */ - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, + PCT_IO(100), ut_dulint_max); } else { /* Otherwise, we only flush a small number of pages so that we do not unnecessarily use much disk i/o capacity from other work */ - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10, + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, + PCT_IO(10), ut_dulint_max); } @@ -2447,7 +2553,7 @@ background_loop: /* The server has been quiet for a while: start running background operations */ - + srv_main_background_loops++; srv_main_thread_op_info = "doing background drop tables"; n_tables_to_drop = row_drop_tables_for_mysql_in_background(); @@ -2485,6 +2591,7 @@ background_loop: log_buffer_flush_to_disk(); last_flush_time = current_time; + srv_sync_flush++; } } @@ -2502,8 +2609,11 @@ background_loop: if (srv_fast_shutdown && srv_shutdown_state > 0) { n_bytes_merged = 0; } else { - n_bytes_merged = ibuf_contract_for_n_pages( - TRUE, srv_insert_buffer_batch_size); + /* This should do an amount of IO similar to the number of + * dirty pages that will be flushed in the call to + * buf_flush_batch below. Otherwise, the system favors + * clean pages over cleanup throughput. */ + n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IO(100)); } srv_main_thread_op_info = "reserving kernel mutex"; @@ -2517,9 +2627,10 @@ background_loop: flush_loop: srv_main_thread_op_info = "flushing buffer pool pages"; - + srv_main_flush_loops++; if (srv_fast_shutdown < 2) { - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, + PCT_IO(100), ut_dulint_max); } else { /* In the fastest shutdown we do not flush the buffer pool @@ -2542,7 +2653,17 @@ flush_loop: srv_main_thread_op_info = "flushing log"; - log_buffer_flush_to_disk(); + current_time = time(NULL); + if (difftime(current_time, last_flush_time) > 1) { + srv_main_thread_op_info = (char*) "flushing log"; + log_buffer_flush_to_disk(); + last_flush_time = current_time; + srv_sync_flush++; + } else { + /* No fsync when srv_flush_log_at_trx_commit != 1 */ + log_buffer_flush_maybe_sync(); + srv_async_flush++; + } srv_main_thread_op_info = "making checkpoint"; |