summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
Diffstat (limited to 'storage')
-rw-r--r--storage/innobase/handler/ha_innodb.cc22
-rw-r--r--storage/innobase/include/log0log.h7
-rw-r--r--storage/innobase/include/srv0srv.h8
-rw-r--r--storage/innobase/log/log0log.c20
-rw-r--r--storage/innobase/srv/srv0srv.c207
5 files changed, 221 insertions, 43 deletions
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 887acacbd1f..673c48b037b 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -133,6 +133,13 @@ static my_bool innobase_adaptive_hash_index = TRUE;
static char* internal_innobase_data_file_path = NULL;
+/* Default number of IO per second supported by server. Tunes background
+ IO rate. */
+static long innobase_io_capacity = 100;
+
+/* Write dirty pages when pct dirty is less than max pct dirty */
+static my_bool innobase_extra_dirty_writes = TRUE;
+
/* The following counter is used to convey information to InnoDB
about server activity: in selects it is not sensible to call
srv_active_wake_master_thread after each fetch or search, we only do
@@ -1586,6 +1593,9 @@ innobase_init(
#endif /* UNIV_LOG_ARCHIVE */
srv_log_buffer_size = (ulint) innobase_log_buffer_size;
+ srv_io_capacity = (ulint) innobase_io_capacity;
+ srv_extra_dirty_writes = (ulint) innobase_extra_dirty_writes;
+
/* We set srv_pool_size here in units of 1 kB. InnoDB internally
changes the value so that it becomes the number of database pages. */
@@ -8010,6 +8020,16 @@ static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite,
"Disable with --skip-innodb-doublewrite.",
NULL, NULL, TRUE);
+static MYSQL_SYSVAR_BOOL(extra_dirty_writes, innobase_extra_dirty_writes,
+ PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+ "Flush dirty buffer pages when dirty max pct is not exceeded",
+ NULL, NULL, TRUE);
+
+static MYSQL_SYSVAR_LONG(io_capacity, innobase_io_capacity,
+ PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+ "Number of IOPs the server can do. Tunes the background IO rate",
+ NULL, NULL, 100, 100, ~0L, 0);
+
static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown,
PLUGIN_VAR_OPCMDARG,
"Speeds up the shutdown process of the InnoDB storage engine. Possible "
@@ -8225,6 +8245,8 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(thread_concurrency),
MYSQL_SYSVAR(thread_sleep_delay),
MYSQL_SYSVAR(autoinc_lock_mode),
+ MYSQL_SYSVAR(extra_dirty_writes),
+ MYSQL_SYSVAR(io_capacity),
NULL
};
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index 43662d02a34..337b9f1e783 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -169,6 +169,13 @@ void
log_buffer_flush_to_disk(void);
/*==========================*/
/********************************************************************
+Flushes the log buffer. Forces it to disk depending on the value of
+the configuration parameter innodb_flush_log_at_trx_commit. */
+
+void
+log_buffer_flush_maybe_sync(void);
+/*==========================*/
+/********************************************************************
Advances the smallest lsn for which there are unflushed dirty blocks in the
buffer pool and also may make a new checkpoint. NOTE: this function may only
be called if the calling thread owns no synchronization objects! */
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 91daa6816b2..2f44c3408be 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -91,6 +91,14 @@ extern ulint srv_lock_table_size;
extern ulint srv_n_file_io_threads;
+/* Number of IO operations per second the server can do */
+extern ulint srv_io_capacity;
+
+/* Flush dirty pages when below max dirty percent */
+extern ibool srv_extra_dirty_writes;
+
+
+
#ifdef UNIV_LOG_ARCHIVE
extern ibool srv_log_archive_on;
extern ibool srv_archive_recovery;
diff --git a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c
index b10c348b24d..7eb5e3db422 100644
--- a/storage/innobase/log/log0log.c
+++ b/storage/innobase/log/log0log.c
@@ -1517,6 +1517,26 @@ log_buffer_flush_to_disk(void)
}
/********************************************************************
+Flush the log buffer. Force it to disk depending on the value of
+innodb_flush_log_at_trx_commit. */
+
+void
+log_buffer_flush_maybe_sync(void)
+/*==========================*/
+{
+ dulint lsn;
+
+ mutex_enter(&(log_sys->mutex));
+
+ lsn = log_sys->lsn;
+
+ mutex_exit(&(log_sys->mutex));
+
+ /* Force log buffer to disk when innodb_flush_log_at_trx_commit = 1. */
+ log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS,
+ srv_flush_log_at_trx_commit == 1 ? TRUE : FALSE);
+}
+/********************************************************************
Tries to establish a big enough margin of free space in the log buffer, such
that a new log entry can be catenated without an immediate need for a flush. */
static
diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
index 773b5d583e0..55555fe35f0 100644
--- a/storage/innobase/srv/srv0srv.c
+++ b/storage/innobase/srv/srv0srv.c
@@ -171,6 +171,12 @@ ulint srv_awe_window_size = 0; /* size in pages; MySQL inits
ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */
ulint srv_lock_table_size = ULINT_MAX;
+ulint srv_io_capacity = ULINT_MAX; /* Number of IO operations per
+ second the server can do */
+
+ibool srv_extra_dirty_writes = TRUE; /* Write dirty pages to disk when pct
+ dirty < max dirty pct */
+
ulint srv_n_file_io_threads = ULINT_MAX;
#ifdef UNIV_LOG_ARCHIVE
@@ -411,6 +417,30 @@ FILE* srv_misc_tmpfile;
ulint srv_main_thread_process_no = 0;
ulint srv_main_thread_id = 0;
+// The following count work done by srv_master_thread.
+
+// Iterations by the 'once per second' loop.
+ulint srv_main_1_second_loops = 0;
+// Calls to sleep by the 'once per second' loop.
+ulint srv_main_sleeps = 0;
+// Iterations by the 'once per 10 seconds' loop.
+ulint srv_main_10_second_loops = 0;
+// Iterations of the loop bounded by the 'background_loop' label.
+ulint srv_main_background_loops = 0;
+// Iterations of the loop bounded by the 'flush_loop' label.
+ulint srv_main_flush_loops = 0;
+// Calls to log_buffer_flush_to_disk.
+ulint srv_sync_flush = 0;
+// Calls to log_buffer_flush_maybe_sync.
+ulint srv_async_flush = 0;
+
+// Number of microseconds threads wait because of
+// innodb_thread_concurrency
+static ib_longlong srv_thread_wait_mics = 0;
+
+// Number of microseconds for spinlock delay
+static ib_longlong srv_timed_spin_delay = 0;
+
/*
IMPLEMENTATION OF THE SERVER MAIN PROGRAM
=========================================
@@ -630,6 +660,65 @@ are indexed by the type of the thread. */
ulint srv_n_threads_active[SRV_MASTER + 1];
ulint srv_n_threads[SRV_MASTER + 1];
+static void srv_reset_free_tickets(trx_t* trx);
+
+/*************************************************************************
+Return the difference in microseconds between 'end' and 'start'
+*/
+static ib_longlong mics_diff(ulint start_sec, ulint start_usec,
+ ulint end_sec, ulint end_usec)
+{
+ ib_longlong end_mics = end_sec * 1000000LL + end_usec;
+ ib_longlong start_mics = start_sec * 1000000LL + start_usec;
+
+ if (end_mics > start_mics)
+ return end_mics - start_mics;
+ else
+ return 0;
+}
+
+static void time_spin_delay()
+{
+ ulint start_sec, end_sec;
+ ulint start_usec, end_usec;
+ int i;
+
+ srv_timed_spin_delay = 0;
+
+ ut_usectime(&start_sec, &start_usec);
+
+ for (i = 0; i < SYNC_SPIN_ROUNDS; ++i)
+ ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
+
+ ut_usectime(&end_sec, &end_usec);
+
+ srv_timed_spin_delay = mics_diff(start_sec, start_usec, end_sec, end_usec);
+}
+
+/*************************************************************************
+Prints counters for work done by srv_master_thread. */
+
+static
+void
+srv_print_extra(
+/*===================*/
+ FILE *file) /* in: output stream */
+{
+ fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, "
+ "%lu 10_second, %lu background, %lu flush\n",
+ srv_main_1_second_loops, srv_main_sleeps,
+ srv_main_10_second_loops, srv_main_background_loops,
+ srv_main_flush_loops);
+ fprintf(file, "srv_master_thread log flush: %lu sync, %lu async\n",
+ srv_sync_flush, srv_async_flush);
+ fprintf(file, "srv_wait_thread_mics %lld microseconds, %.1f seconds\n",
+ srv_thread_wait_mics,
+ (double) srv_thread_wait_mics / 1000000.0);
+ fprintf(file,
+ "spinlock delay for %d delay %d rounds is %lld mics\n",
+ srv_spin_wait_delay, SYNC_SPIN_ROUNDS, srv_timed_spin_delay);
+}
+
/*************************************************************************
Sets the info describing an i/o thread current state. */
@@ -863,6 +952,8 @@ srv_init(void)
dict_table_t* table;
ulint i;
+ time_spin_delay();
+
srv_sys = mem_alloc(sizeof(srv_sys_t));
kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
@@ -1646,6 +1737,11 @@ srv_printf_innodb_monitor(
"Per second averages calculated from the last %lu seconds\n",
(ulong)time_elapsed);
+ fputs("----------\n"
+ "BACKGROUND THREAD\n"
+ "----------\n", file);
+ srv_print_extra(file);
+
fputs("----------\n"
"SEMAPHORES\n"
"----------\n", file);
@@ -1667,24 +1763,6 @@ srv_printf_innodb_monitor(
mutex_exit(&dict_foreign_err_mutex);
- lock_print_info_summary(file);
- if (trx_start) {
- long t = ftell(file);
- if (t < 0) {
- *trx_start = ULINT_UNDEFINED;
- } else {
- *trx_start = (ulint) t;
- }
- }
- lock_print_info_all_transactions(file);
- if (trx_end) {
- long t = ftell(file);
- if (t < 0) {
- *trx_end = ULINT_UNDEFINED;
- } else {
- *trx_end = (ulint) t;
- }
- }
fputs("--------\n"
"FILE I/O\n"
"--------\n", file);
@@ -2187,6 +2265,14 @@ srv_wake_master_thread(void)
}
/*************************************************************************
+Returns the number of IO operations that is X percent of the capacity.
+
+PCT_IO(5) -> returns the number of IO operations that is 5% of the max
+where max is srv_io_capacity.
+*/
+#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0)))
+
+/*************************************************************************
The master thread controlling the server. */
os_thread_ret_t
@@ -2217,6 +2303,9 @@ srv_master_thread(
fprintf(stderr, "Master thread starts, id %lu\n",
os_thread_pf(os_thread_get_curr_id()));
#endif
+ fprintf(stderr, "InnoDB master thread running with io_capacity %lu\n",
+ srv_io_capacity);
+
srv_main_thread_process_no = os_proc_get_number();
srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
@@ -2258,10 +2347,12 @@ loop:
n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read
+ buf_pool->n_pages_written;
srv_main_thread_op_info = "sleeping";
+ srv_main_1_second_loops++;
if (!skip_sleep) {
os_thread_sleep(1000000);
+ srv_main_sleeps++;
}
skip_sleep = FALSE;
@@ -2287,27 +2378,28 @@ loop:
srv_main_thread_op_info = "flushing log";
log_buffer_flush_to_disk();
+ srv_sync_flush++;
srv_main_thread_op_info = "making checkpoint";
log_free_check();
- /* If there were less than 5 i/os during the
- one second sleep, we assume that there is free
- disk i/o capacity available, and it makes sense to
- do an insert buffer merge. */
+ /* If i/os during one second sleep were less than 5% of
+ capacity, we assume that there is free disk i/o capacity
+ available, and it makes sense to do an insert buffer merge. */
n_pend_ios = buf_get_n_pending_ios()
+ log_sys->n_pending_writes;
n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
+ buf_pool->n_pages_written;
- if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) {
+ if (n_pend_ios < PCT_IO(3) && (n_ios - n_ios_old < PCT_IO(5))) {
srv_main_thread_op_info = "doing insert buffer merge";
- ibuf_contract_for_n_pages(
- TRUE, srv_insert_buffer_batch_size / 4);
+ ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4);
srv_main_thread_op_info = "flushing log";
- log_buffer_flush_to_disk();
+ /* No fsync when srv_flush_log_at_trx_commit != 1 */
+ log_buffer_flush_maybe_sync();
+ srv_async_flush++;
}
if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
@@ -2316,7 +2408,8 @@ loop:
/* Try to keep the number of modified pages in the
buffer pool under the limit wished by the user */
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
+ PCT_IO(100),
ut_dulint_max);
/* If we had to do the flush, it may have taken
@@ -2344,30 +2437,40 @@ loop:
seconds */
mem_validate_all_blocks();
#endif
- /* If there were less than 200 i/os during the 10 second period,
- we assume that there is free disk i/o capacity available, and it
- makes sense to flush 100 pages. */
+ /* If i/os during the 10 second period were less than 200% of
+ capacity, we assume that there is free disk i/o capacity
+ available, and it makes sense to flush srv_io_capacity pages.
+
+ Note that this is done regardless of the fraction of dirty
+ pages relative to the max requested by the user. The one second
+ loop above requests writes for that case. The writes done here
+ are not required, and may be disabled. */
n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
+ buf_pool->n_pages_written;
- if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
+ if (srv_extra_dirty_writes &&
+ n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) {
srv_main_thread_op_info = "flushing buffer pool pages";
- buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
+ buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max);
srv_main_thread_op_info = "flushing log";
- log_buffer_flush_to_disk();
+ /* No fsync when srv_flush_log_at_trx_commit != 1 */
+ log_buffer_flush_maybe_sync();
+ srv_async_flush++;
}
/* We run a batch of insert buffer merge every 10 seconds,
even if the server were active */
srv_main_thread_op_info = "doing insert buffer merge";
- ibuf_contract_for_n_pages(TRUE, srv_insert_buffer_batch_size / 4);
+ ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4);
srv_main_thread_op_info = "flushing log";
- log_buffer_flush_to_disk();
+ /* No fsync when srv_flush_log_at_trx_commit != 1 */
+ log_buffer_flush_maybe_sync();
+ srv_async_flush++;
/* We run a full purge every 10 seconds, even if the server
were active */
@@ -2393,6 +2496,7 @@ loop:
log_buffer_flush_to_disk();
last_flush_time = current_time;
+ srv_sync_flush++;
}
}
@@ -2406,14 +2510,16 @@ loop:
(> 70 %), we assume we can afford reserving the disk(s) for
the time it requires to flush 100 pages */
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
+ PCT_IO(100),
ut_dulint_max);
} else {
/* Otherwise, we only flush a small number of pages so that
we do not unnecessarily use much disk i/o capacity from
other work */
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10,
+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
+ PCT_IO(10),
ut_dulint_max);
}
@@ -2447,7 +2553,7 @@ background_loop:
/* The server has been quiet for a while: start running background
operations */
-
+ srv_main_background_loops++;
srv_main_thread_op_info = "doing background drop tables";
n_tables_to_drop = row_drop_tables_for_mysql_in_background();
@@ -2485,6 +2591,7 @@ background_loop:
log_buffer_flush_to_disk();
last_flush_time = current_time;
+ srv_sync_flush++;
}
}
@@ -2502,8 +2609,11 @@ background_loop:
if (srv_fast_shutdown && srv_shutdown_state > 0) {
n_bytes_merged = 0;
} else {
- n_bytes_merged = ibuf_contract_for_n_pages(
- TRUE, srv_insert_buffer_batch_size);
+ /* This should do an amount of IO similar to the number of
+ * dirty pages that will be flushed in the call to
+ * buf_flush_batch below. Otherwise, the system favors
+ * clean pages over cleanup throughput. */
+ n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IO(100));
}
srv_main_thread_op_info = "reserving kernel mutex";
@@ -2517,9 +2627,10 @@ background_loop:
flush_loop:
srv_main_thread_op_info = "flushing buffer pool pages";
-
+ srv_main_flush_loops++;
if (srv_fast_shutdown < 2) {
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
+ n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
+ PCT_IO(100),
ut_dulint_max);
} else {
/* In the fastest shutdown we do not flush the buffer pool
@@ -2542,7 +2653,17 @@ flush_loop:
srv_main_thread_op_info = "flushing log";
- log_buffer_flush_to_disk();
+ current_time = time(NULL);
+ if (difftime(current_time, last_flush_time) > 1) {
+ srv_main_thread_op_info = (char*) "flushing log";
+ log_buffer_flush_to_disk();
+ last_flush_time = current_time;
+ srv_sync_flush++;
+ } else {
+ /* No fsync when srv_flush_log_at_trx_commit != 1 */
+ log_buffer_flush_maybe_sync();
+ srv_async_flush++;
+ }
srv_main_thread_op_info = "making checkpoint";