diff options
Diffstat (limited to 'storage/innobase/srv/srv0srv.c')
-rw-r--r-- | storage/innobase/srv/srv0srv.c | 1346 |
1 files changed, 679 insertions, 667 deletions
diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index 26ea7958d0d..639da1ed2f3 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -1,4 +1,56 @@ -/****************************************************** +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2008, 2009 Google Inc. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ +/*********************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2009, Percona Inc. + +Portions of this file contain modifications contributed and copyrighted +by Percona Inc.. Those modifications are +gratefully acknowledged and are described briefly in the InnoDB +documentation. The contributions by Percona Inc. are incorporated with +their permission, and subject to the conditions contained in the file +COPYING.Percona. + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +***********************************************************************/ + +/**************************************************//** +@file srv/srv0srv.c The database server main program NOTE: SQL Server 7 uses something which the documentation @@ -20,14 +72,14 @@ Windows 2000 will have something called thread pooling Another possibility could be to use some very fast user space thread library. This might confuse NT though. -(c) 1995 Innobase Oy - Created 10/8/1995 Heikki Tuuri *******************************************************/ + /* Dummy comment */ #include "srv0srv.h" #include "ut0mem.h" +#include "ut0ut.h" #include "os0proc.h" #include "mem0mem.h" #include "mem0pool.h" @@ -42,156 +94,133 @@ Created 10/8/1995 Heikki Tuuri #include "trx0purge.h" #include "ibuf0ibuf.h" #include "buf0flu.h" +#include "buf0lru.h" #include "btr0sea.h" #include "dict0load.h" #include "dict0boot.h" #include "srv0start.h" #include "row0mysql.h" #include "ha_prototypes.h" +#include "trx0i_s.h" +#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */ /* This is set to TRUE if the MySQL user has set it in MySQL; currently affects only FOREIGN KEY definition parsing */ -ibool srv_lower_case_table_names = FALSE; +UNIV_INTERN ibool srv_lower_case_table_names = FALSE; /* The following counter is incremented whenever there is some user activity in the server */ -ulint srv_activity_count = 0; +UNIV_INTERN ulint srv_activity_count = 0; /* The following is the maximum allowed duration of a lock wait. */ -ulint srv_fatal_semaphore_wait_threshold = 600; +UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600; /* How much data manipulation language (DML) statements need to be delayed, in microseconds, in order to reduce the lagging of the purge thread. */ -ulint srv_dml_needed_delay = 0; +UNIV_INTERN ulint srv_dml_needed_delay = 0; -ibool srv_lock_timeout_active = FALSE; -ibool srv_monitor_active = FALSE; -ibool srv_error_monitor_active = FALSE; +UNIV_INTERN ibool srv_lock_timeout_and_monitor_active = FALSE; +UNIV_INTERN ibool srv_error_monitor_active = FALSE; -const char* srv_main_thread_op_info = ""; +UNIV_INTERN const char* srv_main_thread_op_info = ""; -/* Prefix used by MySQL to indicate pre-5.1 table name encoding */ -const char srv_mysql50_table_name_prefix[9] = "#mysql50#"; +/** Prefix used by MySQL to indicate pre-5.1 table name encoding */ +UNIV_INTERN const char srv_mysql50_table_name_prefix[9] = "#mysql50#"; /* Server parameters which are read from the initfile */ /* The following three are dir paths which are catenated before file names, where the file name itself may also contain a path */ -char* srv_data_home = NULL; +UNIV_INTERN char* srv_data_home = NULL; #ifdef UNIV_LOG_ARCHIVE -char* srv_arch_dir = NULL; +UNIV_INTERN char* srv_arch_dir = NULL; #endif /* UNIV_LOG_ARCHIVE */ -ibool srv_file_per_table = FALSE; /* store to its own file each table - created by an user; data dictionary - tables are in the system tablespace - 0 */ -ibool srv_locks_unsafe_for_binlog = FALSE; /* Place locks to - records only i.e. do - not use next-key - locking except on - duplicate key checking - and foreign key - checking */ -ulint srv_n_data_files = 0; -char** srv_data_file_names = NULL; -ulint* srv_data_file_sizes = NULL; /* size in database pages */ - -ibool srv_auto_extend_last_data_file = FALSE; /* if TRUE, then we - auto-extend the last data - file */ -ulint srv_last_file_size_max = 0; /* if != 0, this tells - the max size auto-extending - may increase the last data - file size */ -ulong srv_auto_extend_increment = 8; /* If the last data file is - auto-extended, we add this - many pages to it at a time */ -ulint* srv_data_file_is_raw_partition = NULL; +/** store to its own file each table created by an user; data +dictionary tables are in the system tablespace 0 */ +UNIV_INTERN my_bool srv_file_per_table; +/** The file format to use on new *.ibd files. */ +UNIV_INTERN ulint srv_file_format = 0; +/** Whether to check file format during startup. A value of +DICT_TF_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to +set it to the highest format we support. */ +UNIV_INTERN ulint srv_check_file_format_at_startup = DICT_TF_FORMAT_MAX; + +#if DICT_TF_FORMAT_51 +# error "DICT_TF_FORMAT_51 must be 0!" +#endif +/** Place locks to records only i.e. do not use next-key locking except +on duplicate key checking and foreign key checking */ +UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE; + +UNIV_INTERN ulint srv_n_data_files = 0; +UNIV_INTERN char** srv_data_file_names = NULL; +/* size in database pages */ +UNIV_INTERN ulint* srv_data_file_sizes = NULL; + +/* if TRUE, then we auto-extend the last data file */ +UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE; +/* if != 0, this tells the max size auto-extending may increase the +last data file size */ +UNIV_INTERN ulint srv_last_file_size_max = 0; +/* If the last data file is auto-extended, we add this +many pages to it at a time */ +UNIV_INTERN ulong srv_auto_extend_increment = 8; +UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL; /* If the following is TRUE we do not allow inserts etc. This protects the user from forgetting the 'newraw' keyword to my.cnf */ -ibool srv_created_new_raw = FALSE; - -char** srv_log_group_home_dirs = NULL; - -ulint srv_n_log_groups = ULINT_MAX; -ulint srv_n_log_files = ULINT_MAX; -ulint srv_log_file_size = ULINT_MAX; /* size in database pages */ -ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */ -ulong srv_flush_log_at_trx_commit = 1; - -/* Maximum number of times allowed to conditionally acquire -mutex before switching to blocking wait on the mutex */ -#define MAX_MUTEX_NOWAIT 20 - -/* Check whether the number of failed nonblocking mutex -acquisition attempts exceeds maximum allowed value. If so, -srv_printf_innodb_monitor() will request mutex acquisition -with mutex_enter(), which will wait until it gets the mutex. */ -#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT) - -byte srv_latin1_ordering[256] /* The sort order table of the latin1 - character set. The following table is - the MySQL order as of Feb 10th, 2002 */ -= { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 -, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F -, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 -, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F -, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27 -, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F -, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37 -, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F -, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47 -, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F -, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57 -, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F -, 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47 -, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F -, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57 -, 0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F -, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87 -, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F -, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97 -, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F -, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7 -, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF -, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7 -, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF -, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43 -, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49 -, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xD7 -, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xDF -, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43 -, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49 -, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xF7 -, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF -}; - -ulint srv_pool_size = ULINT_MAX; /* size in pages; MySQL inits - this to size in kilobytes but - we normalize this to pages in - srv_boot() */ -ulint srv_awe_window_size = 0; /* size in pages; MySQL inits - this to bytes, but we - normalize it to pages in - srv_boot() */ -ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */ -ulint srv_lock_table_size = ULINT_MAX; - -ulint srv_n_file_io_threads = ULINT_MAX; +UNIV_INTERN ibool srv_created_new_raw = FALSE; + +UNIV_INTERN char** srv_log_group_home_dirs = NULL; + +UNIV_INTERN ulint srv_n_log_groups = ULINT_MAX; +UNIV_INTERN ulint srv_n_log_files = ULINT_MAX; +/* size in database pages */ +UNIV_INTERN ulint srv_log_file_size = ULINT_MAX; +/* size in database pages */ +UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX; +UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1; + +/* Try to flush dirty pages so as to avoid IO bursts at +the checkpoints. */ +UNIV_INTERN char srv_adaptive_flushing = TRUE; + +/* The sort order table of the MySQL latin1_swedish_ci character set +collation */ +UNIV_INTERN const byte* srv_latin1_ordering; + +/* use os/external memory allocator */ +UNIV_INTERN my_bool srv_use_sys_malloc = TRUE; +/* requested size in kilobytes */ +UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX; +/* previously requested size */ +UNIV_INTERN ulint srv_buf_pool_old_size; +/* current size in kilobytes */ +UNIV_INTERN ulint srv_buf_pool_curr_size = 0; +/* size in bytes */ +UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX; +UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX; + +/* This parameter is deprecated. Use srv_n_io_[read|write]_threads +instead. */ +UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX; +UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX; +UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX; + +/* User settable value of the number of pages that must be present +in the buffer cache and accessed sequentially for InnoDB to trigger a +readahead request. */ +UNIV_INTERN ulong srv_read_ahead_threshold = 56; #ifdef UNIV_LOG_ARCHIVE -ibool srv_log_archive_on = FALSE; -ibool srv_archive_recovery = 0; -dulint srv_archive_recovery_limit_lsn; +UNIV_INTERN ibool srv_log_archive_on = FALSE; +UNIV_INTERN ibool srv_archive_recovery = 0; +UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn; #endif /* UNIV_LOG_ARCHIVE */ -ulint srv_lock_wait_timeout = 1024 * 1024 * 1024; - /* This parameter is used to throttle the number of insert buffers that are merged in a batch. By increasing this parameter on a faster disk you can possibly reduce the number of I/O operations performed to complete the @@ -200,92 +229,83 @@ background loop when the system is idle (low load), on a busy system the parameter is scaled down by a factor of 4, this is to avoid putting a heavier load on the I/O sub system. */ -ulong srv_insert_buffer_batch_size = 20; +UNIV_INTERN ulong srv_insert_buffer_batch_size = 20; + +UNIV_INTERN char* srv_file_flush_method_str = NULL; +UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC; +UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; -char* srv_file_flush_method_str = NULL; -ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC; -ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; +UNIV_INTERN ulint srv_max_n_open_files = 300; -ulint srv_max_n_open_files = 300; +/* Number of IO operations per second the server can do */ +UNIV_INTERN ulong srv_io_capacity = 200; /* The InnoDB main thread tries to keep the ratio of modified pages in the buffer pool to all database pages in the buffer pool smaller than the following number. But it is not guaranteed that the value stays below that during a time of heavy update/insert activity. */ -ulong srv_max_buf_pool_modified_pct = 90; +UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75; /* variable counts amount of data read in total (in bytes) */ -ulint srv_data_read = 0; +UNIV_INTERN ulint srv_data_read = 0; /* here we count the amount of data written in total (in bytes) */ -ulint srv_data_written = 0; +UNIV_INTERN ulint srv_data_written = 0; /* the number of the log write requests done */ -ulint srv_log_write_requests = 0; +UNIV_INTERN ulint srv_log_write_requests = 0; /* the number of physical writes to the log performed */ -ulint srv_log_writes = 0; +UNIV_INTERN ulint srv_log_writes = 0; /* amount of data written to the log files in bytes */ -ulint srv_os_log_written = 0; +UNIV_INTERN ulint srv_os_log_written = 0; /* amount of writes being done to the log files */ -ulint srv_os_log_pending_writes = 0; +UNIV_INTERN ulint srv_os_log_pending_writes = 0; /* we increase this counter, when there we don't have enough space in the log buffer and have to flush it */ -ulint srv_log_waits = 0; +UNIV_INTERN ulint srv_log_waits = 0; /* this variable counts the amount of times, when the doublewrite buffer was flushed */ -ulint srv_dblwr_writes = 0; +UNIV_INTERN ulint srv_dblwr_writes = 0; /* here we store the number of pages that have been flushed to the doublewrite buffer */ -ulint srv_dblwr_pages_written = 0; +UNIV_INTERN ulint srv_dblwr_pages_written = 0; /* in this variable we store the number of write requests issued */ -ulint srv_buf_pool_write_requests = 0; +UNIV_INTERN ulint srv_buf_pool_write_requests = 0; /* here we store the number of times when we had to wait for a free page in the buffer pool. It happens when the buffer pool is full and we need to make a flush, in order to be able to read or create a page. */ -ulint srv_buf_pool_wait_free = 0; +UNIV_INTERN ulint srv_buf_pool_wait_free = 0; /* variable to count the number of pages that were written from buffer pool to the disk */ -ulint srv_buf_pool_flushed = 0; +UNIV_INTERN ulint srv_buf_pool_flushed = 0; -/* variable to count the number of buffer pool reads that led to the +/** Number of buffer pool reads that led to the reading of a disk page */ -ulint srv_buf_pool_reads = 0; - -/* variable to count the number of sequential read-aheads */ -ulint srv_read_ahead_seq = 0; - -/* variable to count the number of random read-aheads */ -ulint srv_read_ahead_rnd = 0; - -/* An option to enable the fix for "Bug#43660 SHOW INDEXES/ANALYZE does -NOT update cardinality for indexes of InnoDB table". By default we are -running with the fix disabled because MySQL 5.1 is frozen for such -behavioral changes. */ -char srv_use_legacy_cardinality_algorithm = TRUE; +UNIV_INTERN ulint srv_buf_pool_reads = 0; /* structure to pass status variables to MySQL */ -export_struc export_vars; +UNIV_INTERN export_struc export_vars; /* If the following is != 0 we do not allow inserts etc. This protects the user from forgetting the innodb_force_recovery keyword to my.cnf */ -ulint srv_force_recovery = 0; +UNIV_INTERN ulint srv_force_recovery = 0; /*-----------------------*/ /* We are prepared for a situation that we have this many threads waiting for a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the value. */ -ulint srv_max_n_threads = 0; +UNIV_INTERN ulint srv_max_n_threads = 0; /* The following controls how many threads we let inside InnoDB concurrently: threads waiting for locks are not counted into the number because otherwise @@ -295,43 +315,38 @@ Value 10 should be good if there are less than 4 processors + 4 disks in the computer. Bigger computers need bigger values. Value 0 will disable the concurrency check. */ -ulong srv_thread_concurrency = 0; - -os_fast_mutex_t srv_conc_mutex; /* this mutex protects srv_conc data - structures */ -lint srv_conc_n_threads = 0; /* number of transactions that - have declared_to_be_inside_innodb - set. It used to be a non-error - for this value to drop below - zero temporarily. This is no - longer true. We'll, however, - keep the lint datatype to add - assertions to catch any corner - cases that we may have - missed. */ -ulint srv_conc_n_waiting_threads = 0; /* number of OS threads waiting in the - FIFO for a permission to enter InnoDB - */ +UNIV_INTERN ulong srv_thread_concurrency = 0; + +/* this mutex protects srv_conc data structures */ +UNIV_INTERN os_fast_mutex_t srv_conc_mutex; +/* number of transactions that have declared_to_be_inside_innodb set. +It used to be a non-error for this value to drop below zero temporarily. +This is no longer true. We'll, however, keep the lint datatype to add +assertions to catch any corner cases that we may have missed. */ +UNIV_INTERN lint srv_conc_n_threads = 0; +/* number of OS threads waiting in the FIFO for a permission to enter +InnoDB */ +UNIV_INTERN ulint srv_conc_n_waiting_threads = 0; typedef struct srv_conc_slot_struct srv_conc_slot_t; struct srv_conc_slot_struct{ - os_event_t event; /* event to wait */ - ibool reserved; /* TRUE if slot + os_event_t event; /*!< event to wait */ + ibool reserved; /*!< TRUE if slot reserved */ - ibool wait_ended; /* TRUE when another + ibool wait_ended; /*!< TRUE when another thread has already set the event and the thread in this slot is free to proceed; but reserved may still be TRUE at that point */ - UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /* queue node */ + UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /*!< queue node */ }; -UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue; /* queue of threads - waiting to get in */ -srv_conc_slot_t* srv_conc_slots; /* array of wait - slots */ +/* queue of threads waiting to get in */ +UNIV_INTERN UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue; +/* array of wait slots */ +UNIV_INTERN srv_conc_slot_t* srv_conc_slots; /* Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket at srv_conc_enter_innodb */ @@ -342,91 +357,121 @@ SQL query after it has once got the ticket at srv_conc_enter_innodb */ merge to completion before shutdown. If it is set to 2, do not even flush the buffer pool to data files at the shutdown: we effectively 'crash' InnoDB (but lose no committed transactions). */ -ulint srv_fast_shutdown = 0; +UNIV_INTERN ulint srv_fast_shutdown = 0; /* Generate a innodb_status.<pid> file */ -ibool srv_innodb_status = FALSE; +UNIV_INTERN ibool srv_innodb_status = FALSE; + +/* When estimating number of different key values in an index, sample +this many index pages */ +UNIV_INTERN unsigned long long srv_stats_sample_pages = 8; -ibool srv_use_doublewrite_buf = TRUE; -ibool srv_use_checksums = TRUE; +UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE; +UNIV_INTERN ibool srv_use_checksums = TRUE; -ibool srv_set_thread_priorities = TRUE; -int srv_query_thread_priority = 0; +UNIV_INTERN ibool srv_set_thread_priorities = TRUE; +UNIV_INTERN int srv_query_thread_priority = 0; -/* TRUE if the Address Windowing Extensions of Windows are used; then we must -disable adaptive hash indexes */ -ibool srv_use_awe = FALSE; -ibool srv_use_adaptive_hash_indexes = TRUE; +UNIV_INTERN ulong srv_replication_delay = 0; /*-------------------------------------------*/ -ulong srv_n_spin_wait_rounds = 20; -ulong srv_n_free_tickets_to_enter = 500; -ulong srv_thread_sleep_delay = 10000; -ulint srv_spin_wait_delay = 5; -ibool srv_priority_boost = TRUE; - -ibool srv_print_thread_releases = FALSE; -ibool srv_print_lock_waits = FALSE; -ibool srv_print_buf_io = FALSE; -ibool srv_print_log_io = FALSE; -ibool srv_print_latch_waits = FALSE; - -ulint srv_n_rows_inserted = 0; -ulint srv_n_rows_updated = 0; -ulint srv_n_rows_deleted = 0; -ulint srv_n_rows_read = 0; -#ifndef UNIV_HOTBACKUP +UNIV_INTERN ulong srv_n_spin_wait_rounds = 30; +UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500; +UNIV_INTERN ulong srv_thread_sleep_delay = 10000; +UNIV_INTERN ulong srv_spin_wait_delay = 6; +UNIV_INTERN ibool srv_priority_boost = TRUE; + +#ifdef UNIV_DEBUG +UNIV_INTERN ibool srv_print_thread_releases = FALSE; +UNIV_INTERN ibool srv_print_lock_waits = FALSE; +UNIV_INTERN ibool srv_print_buf_io = FALSE; +UNIV_INTERN ibool srv_print_log_io = FALSE; +UNIV_INTERN ibool srv_print_latch_waits = FALSE; +#endif /* UNIV_DEBUG */ + +UNIV_INTERN ulint srv_n_rows_inserted = 0; +UNIV_INTERN ulint srv_n_rows_updated = 0; +UNIV_INTERN ulint srv_n_rows_deleted = 0; +UNIV_INTERN ulint srv_n_rows_read = 0; + static ulint srv_n_rows_inserted_old = 0; static ulint srv_n_rows_updated_old = 0; static ulint srv_n_rows_deleted_old = 0; static ulint srv_n_rows_read_old = 0; -#endif /* !UNIV_HOTBACKUP */ -ulint srv_n_lock_wait_count = 0; -ulint srv_n_lock_wait_current_count = 0; -ib_longlong srv_n_lock_wait_time = 0; -ulint srv_n_lock_max_wait_time = 0; +UNIV_INTERN ulint srv_n_lock_wait_count = 0; +UNIV_INTERN ulint srv_n_lock_wait_current_count = 0; +UNIV_INTERN ib_int64_t srv_n_lock_wait_time = 0; +UNIV_INTERN ulint srv_n_lock_max_wait_time = 0; /* Set the following to 0 if you want InnoDB to write messages on stderr on startup/shutdown */ -ibool srv_print_verbose_log = TRUE; -ibool srv_print_innodb_monitor = FALSE; -ibool srv_print_innodb_lock_monitor = FALSE; -ibool srv_print_innodb_tablespace_monitor = FALSE; -ibool srv_print_innodb_table_monitor = FALSE; +UNIV_INTERN ibool srv_print_verbose_log = TRUE; +UNIV_INTERN ibool srv_print_innodb_monitor = FALSE; +UNIV_INTERN ibool srv_print_innodb_lock_monitor = FALSE; +UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE; +UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE; /* Array of English strings describing the current state of an i/o handler thread */ -const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS]; -const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS]; +UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS]; +UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS]; -time_t srv_last_monitor_time; +UNIV_INTERN time_t srv_last_monitor_time; -mutex_t srv_innodb_monitor_mutex; +UNIV_INTERN mutex_t srv_innodb_monitor_mutex; /* Mutex for locking srv_monitor_file */ -mutex_t srv_monitor_file_mutex; +UNIV_INTERN mutex_t srv_monitor_file_mutex; /* Temporary file for innodb monitor output */ -FILE* srv_monitor_file; +UNIV_INTERN FILE* srv_monitor_file; /* Mutex for locking srv_dict_tmpfile. This mutex has a very high rank; threads reserving it should not be holding any InnoDB latches. */ -mutex_t srv_dict_tmpfile_mutex; +UNIV_INTERN mutex_t srv_dict_tmpfile_mutex; /* Temporary file for output from the data dictionary */ -FILE* srv_dict_tmpfile; +UNIV_INTERN FILE* srv_dict_tmpfile; /* Mutex for locking srv_misc_tmpfile. This mutex has a very low rank; threads reserving it should not acquire any further latches or sleep before releasing this one. */ -mutex_t srv_misc_tmpfile_mutex; +UNIV_INTERN mutex_t srv_misc_tmpfile_mutex; /* Temporary file for miscellanous diagnostic output */ -FILE* srv_misc_tmpfile; - -ulint srv_main_thread_process_no = 0; -ulint srv_main_thread_id = 0; +UNIV_INTERN FILE* srv_misc_tmpfile; + +UNIV_INTERN ulint srv_main_thread_process_no = 0; +UNIV_INTERN ulint srv_main_thread_id = 0; + +/* The following count work done by srv_master_thread. */ + +/* Iterations by the 'once per second' loop. */ +static ulint srv_main_1_second_loops = 0; +/* Calls to sleep by the 'once per second' loop. */ +static ulint srv_main_sleeps = 0; +/* Iterations by the 'once per 10 seconds' loop. */ +static ulint srv_main_10_second_loops = 0; +/* Iterations of the loop bounded by the 'background_loop' label. */ +static ulint srv_main_background_loops = 0; +/* Iterations of the loop bounded by the 'flush_loop' label. */ +static ulint srv_main_flush_loops = 0; +/* Log writes involving flush. */ +static ulint srv_log_writes_and_flush = 0; + +/* This is only ever touched by the master thread. It records the +time when the last flush of log file has happened. The master +thread ensures that we flush the log files at least once per +second. */ +static time_t srv_last_log_flush_time; + +/* The master thread performs various tasks based on the current +state of IO activity and the level of IO utilization is past +intervals. Following macros define thresholds for these conditions. */ +#define SRV_PEND_IO_THRESHOLD (PCT_IO(3)) +#define SRV_RECENT_IO_ACTIVITY (PCT_IO(5)) +#define SRV_PAST_IO_ACTIVITY (PCT_IO(200)) /* IMPLEMENTATION OF THE SERVER MAIN PROGRAM @@ -562,7 +607,7 @@ future, but at the moment we plan to implement a more coarse solution, which could be called a global priority inheritance. If a thread has to wait for a long time, say 300 milliseconds, for a resource, we just guess that it may be waiting for a resource owned by a background -thread, and boost the the priority of all runnable background threads +thread, and boost the priority of all runnable background threads to the normal level. The background threads then themselves adjust their fixed priority back to background after releasing all resources they had (or, at some fixed points in their program code). @@ -598,63 +643,82 @@ Unix.*/ /* Thread slot in the thread table */ struct srv_slot_struct{ - os_thread_id_t id; /* thread id */ - os_thread_t handle; /* thread handle */ - ulint type; /* thread type: user, utility etc. */ - ibool in_use; /* TRUE if this slot is in use */ - ibool suspended; /* TRUE if the thread is waiting + os_thread_id_t id; /*!< thread id */ + os_thread_t handle; /*!< thread handle */ + unsigned type:3; /*!< thread type: user, utility etc. */ + unsigned in_use:1; /*!< TRUE if this slot is in use */ + unsigned suspended:1; /*!< TRUE if the thread is waiting for the event of this slot */ - ib_time_t suspend_time; /* time when the thread was + ib_time_t suspend_time; /*!< time when the thread was suspended */ - os_event_t event; /* event used in suspending the + os_event_t event; /*!< event used in suspending the thread when it has nothing to do */ - que_thr_t* thr; /* suspended query thread (only + que_thr_t* thr; /*!< suspended query thread (only used for MySQL threads) */ }; /* Table for MySQL threads where they will be suspended to wait for locks */ -srv_slot_t* srv_mysql_table = NULL; +UNIV_INTERN srv_slot_t* srv_mysql_table = NULL; -os_event_t srv_lock_timeout_thread_event; +UNIV_INTERN os_event_t srv_lock_timeout_thread_event; -srv_sys_t* srv_sys = NULL; +UNIV_INTERN srv_sys_t* srv_sys = NULL; -byte srv_pad1[64]; /* padding to prevent other memory update - hotspots from residing on the same memory - cache line */ -mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs, - query threads, and lock table */ -byte srv_pad2[64]; /* padding to prevent other memory update - hotspots from residing on the same memory - cache line */ +/* padding to prevent other memory update hotspots from residing on +the same memory cache line */ +UNIV_INTERN byte srv_pad1[64]; +/* mutex protecting the server, trx structs, query threads, and lock table */ +UNIV_INTERN mutex_t* kernel_mutex_temp; +/* padding to prevent other memory update hotspots from residing on +the same memory cache line */ +UNIV_INTERN byte srv_pad2[64]; +#if 0 /* The following three values measure the urgency of the jobs of buffer, version, and insert threads. They may vary from 0 - 1000. The server mutex protects all these variables. The low-water values tell that the server can acquiesce the utility when the value drops below this low-water mark. */ -ulint srv_meter[SRV_MASTER + 1]; -ulint srv_meter_low_water[SRV_MASTER + 1]; -ulint srv_meter_high_water[SRV_MASTER + 1]; -ulint srv_meter_high_water2[SRV_MASTER + 1]; -ulint srv_meter_foreground[SRV_MASTER + 1]; +static ulint srv_meter[SRV_MASTER + 1]; +static ulint srv_meter_low_water[SRV_MASTER + 1]; +static ulint srv_meter_high_water[SRV_MASTER + 1]; +static ulint srv_meter_high_water2[SRV_MASTER + 1]; +static ulint srv_meter_foreground[SRV_MASTER + 1]; +#endif /* The following values give info about the activity going on in the database. They are protected by the server mutex. The arrays are indexed by the type of the thread. */ -ulint srv_n_threads_active[SRV_MASTER + 1]; -ulint srv_n_threads[SRV_MASTER + 1]; +UNIV_INTERN ulint srv_n_threads_active[SRV_MASTER + 1]; +UNIV_INTERN ulint srv_n_threads[SRV_MASTER + 1]; -/************************************************************************* -Sets the info describing an i/o thread current state. */ +/*********************************************************************** +Prints counters for work done by srv_master_thread. */ +static +void +srv_print_master_thread_info( +/*=========================*/ + FILE *file) /* in: output stream */ +{ + fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, " + "%lu 10_second, %lu background, %lu flush\n", + srv_main_1_second_loops, srv_main_sleeps, + srv_main_10_second_loops, srv_main_background_loops, + srv_main_flush_loops); + fprintf(file, "srv_master_thread log flush and writes: %lu\n", + srv_log_writes_and_flush); +} +/*********************************************************************//** +Sets the info describing an i/o thread current state. */ +UNIV_INTERN void srv_set_io_thread_op_info( /*======================*/ - ulint i, /* in: the 'segment' of the i/o thread */ - const char* str) /* in: constant char string describing the + ulint i, /*!< in: the 'segment' of the i/o thread */ + const char* str) /*!< in: constant char string describing the state */ { ut_a(i < SRV_MAX_N_IO_THREADS); @@ -662,25 +726,25 @@ srv_set_io_thread_op_info( srv_io_thread_op_info[i] = str; } -/************************************************************************* +/*********************************************************************//** Accessor function to get pointer to n'th slot in the server thread -table. */ +table. +@return pointer to the slot */ static srv_slot_t* srv_table_get_nth_slot( /*===================*/ - /* out: pointer to the slot */ - ulint index) /* in: index of the slot */ + ulint index) /*!< in: index of the slot */ { ut_a(index < OS_THREAD_MAX_N); return(srv_sys->threads + index); } -#ifndef UNIV_HOTBACKUP -/************************************************************************* -Gets the number of threads in the system. */ - +/*********************************************************************//** +Gets the number of threads in the system. +@return sum of srv_n_threads[] */ +UNIV_INTERN ulint srv_get_n_threads(void) /*===================*/ @@ -700,16 +764,16 @@ srv_get_n_threads(void) return(n_threads); } -/************************************************************************* +/*********************************************************************//** Reserves a slot in the thread table for the current thread. Also creates the thread local storage struct for the current thread. NOTE! The server mutex -has to be reserved by the caller! */ +has to be reserved by the caller! +@return reserved slot index */ static ulint srv_table_reserve_slot( /*===================*/ - /* out: reserved slot index */ - ulint type) /* in: type of the thread: one of SRV_COM, ... */ + enum srv_thread_type type) /*!< in: type of the thread */ { srv_slot_t* slot; ulint i; @@ -729,9 +793,9 @@ srv_table_reserve_slot( slot->in_use = TRUE; slot->suspended = FALSE; + slot->type = type; slot->id = os_thread_get_curr_id(); slot->handle = os_thread_get_curr(); - slot->type = type; thr_local_create(); @@ -740,19 +804,19 @@ srv_table_reserve_slot( return(i); } -/************************************************************************* +/*********************************************************************//** Suspends the calling thread to wait for the event in its thread slot. -NOTE! The server mutex has to be reserved by the caller! */ +NOTE! The server mutex has to be reserved by the caller! +@return event for the calling thread to wait */ static os_event_t srv_suspend_thread(void) /*====================*/ - /* out: event for the calling thread to wait */ { - srv_slot_t* slot; - os_event_t event; - ulint slot_no; - ulint type; + srv_slot_t* slot; + os_event_t event; + ulint slot_no; + enum srv_thread_type type; ut_ad(mutex_own(&kernel_mutex)); @@ -760,9 +824,8 @@ srv_suspend_thread(void) if (srv_print_thread_releases) { fprintf(stderr, - "Suspending thread %lu to slot %lu meter %lu\n", - (ulong) os_thread_get_curr_id(), (ulong) slot_no, - (ulong) srv_meter[SRV_RECOVERY]); + "Suspending thread %lu to slot %lu\n", + (ulong) os_thread_get_curr_id(), (ulong) slot_no); } slot = srv_table_get_nth_slot(slot_no); @@ -784,20 +847,18 @@ srv_suspend_thread(void) return(event); } -#endif /* !UNIV_HOTBACKUP */ -/************************************************************************* +/*********************************************************************//** Releases threads of the type given from suspension in the thread table. -NOTE! The server mutex has to be reserved by the caller! */ - +NOTE! The server mutex has to be reserved by the caller! +@return number of threads released: this may be less than n if not +enough threads were suspended at the moment */ +UNIV_INTERN ulint srv_release_threads( /*================*/ - /* out: number of threads released: this may be - < n if not enough threads were suspended at the - moment */ - ulint type, /* in: thread type */ - ulint n) /* in: number of threads to release */ + enum srv_thread_type type, /*!< in: thread type */ + ulint n) /*!< in: number of threads to release */ { srv_slot_t* slot; ulint i; @@ -823,10 +884,9 @@ srv_release_threads( if (srv_print_thread_releases) { fprintf(stderr, "Releasing thread %lu type %lu" - " from slot %lu meter %lu\n", + " from slot %lu\n", (ulong) slot->id, (ulong) type, - (ulong) i, - (ulong) srv_meter[SRV_RECOVERY]); + (ulong) i); } count++; @@ -840,17 +900,17 @@ srv_release_threads( return(count); } -/************************************************************************* -Returns the calling thread type. */ - -ulint +/*********************************************************************//** +Returns the calling thread type. +@return SRV_COM, ... */ +UNIV_INTERN +enum srv_thread_type srv_get_thread_type(void) /*=====================*/ - /* out: SRV_COM, ... */ { - ulint slot_no; - srv_slot_t* slot; - ulint type; + ulint slot_no; + srv_slot_t* slot; + enum srv_thread_type type; mutex_enter(&kernel_mutex); @@ -868,16 +928,15 @@ srv_get_thread_type(void) return(type); } -/************************************************************************* +/*********************************************************************//** Initializes the server. */ - +UNIV_INTERN void srv_init(void) /*==========*/ { srv_conc_slot_t* conc_slot; srv_slot_t* slot; - dict_table_t* table; ulint i; srv_sys = mem_alloc(sizeof(srv_sys_t)); @@ -912,39 +971,20 @@ srv_init(void) for (i = 0; i < SRV_MASTER + 1; i++) { srv_n_threads_active[i] = 0; srv_n_threads[i] = 0; +#if 0 srv_meter[i] = 30; srv_meter_low_water[i] = 50; srv_meter_high_water[i] = 100; srv_meter_high_water2[i] = 200; srv_meter_foreground[i] = 250; +#endif } UT_LIST_INIT(srv_sys->tasks); - /* create dummy table and index for old-style infimum and supremum */ - table = dict_mem_table_create("SYS_DUMMY1", - DICT_HDR_SPACE, 1, 0); - dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR, - DATA_ENGLISH | DATA_NOT_NULL, 8); - - srv_sys->dummy_ind1 = dict_mem_index_create( - "SYS_DUMMY1", "SYS_DUMMY1", DICT_HDR_SPACE, 0, 1); - dict_index_add_col(srv_sys->dummy_ind1, table, (dict_col_t*) - dict_table_get_nth_col(table, 0), 0); - srv_sys->dummy_ind1->table = table; - /* create dummy table and index for new-style infimum and supremum */ - table = dict_mem_table_create("SYS_DUMMY2", - DICT_HDR_SPACE, 1, DICT_TF_COMPACT); - dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR, - DATA_ENGLISH | DATA_NOT_NULL, 8); - srv_sys->dummy_ind2 = dict_mem_index_create( - "SYS_DUMMY2", "SYS_DUMMY2", DICT_HDR_SPACE, 0, 1); - dict_index_add_col(srv_sys->dummy_ind2, table, (dict_col_t*) - dict_table_get_nth_col(table, 0), 0); - srv_sys->dummy_ind2->table = table; - - /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ - srv_sys->dummy_ind1->cached = srv_sys->dummy_ind2->cached = TRUE; + /* Create dummy indexes for infimum and supremum records */ + + dict_ind_init(); /* Init the server concurrency restriction data structures */ @@ -960,26 +1000,45 @@ srv_init(void) conc_slot->event = os_event_create(NULL); ut_a(conc_slot->event); } -} -/************************************************************************* -Frees the OS fast mutex created in srv_init(). */ + /* Initialize some INFORMATION SCHEMA internal structures */ + trx_i_s_cache_init(trx_i_s_cache); +} +/*********************************************************************//** +Frees the data structures created in srv_init(). */ +UNIV_INTERN void srv_free(void) /*==========*/ { os_fast_mutex_free(&srv_conc_mutex); + mem_free(srv_conc_slots); + srv_conc_slots = NULL; + + mem_free(srv_sys->threads); + mem_free(srv_sys); + srv_sys = NULL; + + mem_free(kernel_mutex_temp); + kernel_mutex_temp = NULL; + mem_free(srv_mysql_table); + srv_mysql_table = NULL; + + trx_i_s_cache_free(trx_i_s_cache); } -/************************************************************************* +/*********************************************************************//** Initializes the synchronization primitives, memory system, and the thread local storage. */ - +UNIV_INTERN void srv_general_init(void) /*==================*/ { + ut_mem_init(); + /* Reset the system variables in the recovery module. */ + recv_sys_var_init(); os_sync_init(); sync_init(); mem_init(srv_mem_pool_size); @@ -989,16 +1048,16 @@ srv_general_init(void) /*======================= InnoDB Server FIFO queue =======================*/ /* Maximum allowable purge history length. <=0 means 'infinite'. */ -ulong srv_max_purge_lag = 0; +UNIV_INTERN ulong srv_max_purge_lag = 0; -/************************************************************************* +/*********************************************************************//** Puts an OS thread to wait if there are too many concurrent threads (>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */ - +UNIV_INTERN void srv_conc_enter_innodb( /*==================*/ - trx_t* trx) /* in: transaction object associated with the + trx_t* trx) /*!< in: transaction object associated with the thread */ { ibool has_slept = FALSE; @@ -1008,11 +1067,10 @@ srv_conc_enter_innodb( if (trx->mysql_thd != NULL && thd_is_replication_slave_thread(trx->mysql_thd)) { - /* TODO Do something more interesting (based on a config - parameter). Some users what to give the replication - thread very low priority, see http://bugs.mysql.com/25078 - This can be done by introducing - innodb_replication_delay(ms) config parameter */ + UT_WAIT_FOR(srv_conc_n_threads + < (lint)srv_thread_concurrency, + srv_replication_delay * 1000); + return; } @@ -1152,14 +1210,14 @@ retry: os_fast_mutex_unlock(&srv_conc_mutex); } -/************************************************************************* +/*********************************************************************//** This lets a thread enter InnoDB regardless of the number of threads inside InnoDB. This must be called when a thread ends a lock wait. */ - +UNIV_INTERN void srv_conc_force_enter_innodb( /*========================*/ - trx_t* trx) /* in: transaction object associated with the + trx_t* trx) /*!< in: transaction object associated with the thread */ { if (UNIV_LIKELY(!srv_thread_concurrency)) { @@ -1178,14 +1236,14 @@ srv_conc_force_enter_innodb( os_fast_mutex_unlock(&srv_conc_mutex); } -/************************************************************************* +/*********************************************************************//** This must be called when a thread exits InnoDB in a lock wait or at the end of an SQL statement. */ - +UNIV_INTERN void srv_conc_force_exit_innodb( /*=======================*/ - trx_t* trx) /* in: transaction object associated with the + trx_t* trx) /*!< in: transaction object associated with the thread */ { srv_conc_slot_t* slot = NULL; @@ -1235,13 +1293,13 @@ srv_conc_force_exit_innodb( } } -/************************************************************************* +/*********************************************************************//** This must be called when a thread exits InnoDB. */ - +UNIV_INTERN void srv_conc_exit_innodb( /*=================*/ - trx_t* trx) /* in: transaction object associated with the + trx_t* trx) /*!< in: transaction object associated with the thread */ { if (trx->n_tickets_to_enter_innodb > 0) { @@ -1260,13 +1318,13 @@ srv_conc_exit_innodb( /*========================================================================*/ -/************************************************************************* -Normalizes init parameter values to use units we use inside InnoDB. */ +/*********************************************************************//** +Normalizes init parameter values to use units we use inside InnoDB. +@return DB_SUCCESS or error code */ static ulint srv_normalize_init_values(void) /*===========================*/ - /* out: DB_SUCCESS or error code */ { ulint n; ulint i; @@ -1285,30 +1343,18 @@ srv_normalize_init_values(void) srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE; - srv_pool_size = srv_pool_size / (UNIV_PAGE_SIZE / 1024); - - srv_awe_window_size = srv_awe_window_size / UNIV_PAGE_SIZE; - - if (srv_use_awe) { - /* If we are using AWE we must save memory in the 32-bit - address space of the process, and cannot bind the lock - table size to the real buffer pool size. */ - - srv_lock_table_size = 20 * srv_awe_window_size; - } else { - srv_lock_table_size = 5 * srv_pool_size; - } + srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE); return(DB_SUCCESS); } -/************************************************************************* -Boots the InnoDB server. */ - +/*********************************************************************//** +Boots the InnoDB server. +@return DB_SUCCESS or error code */ +UNIV_INTERN ulint srv_boot(void) /*==========*/ - /* out: DB_SUCCESS or error code */ { ulint err; @@ -1333,15 +1379,14 @@ srv_boot(void) return(DB_SUCCESS); } -#ifndef UNIV_HOTBACKUP -/************************************************************************* +/*********************************************************************//** Reserves a slot in the thread table for the current MySQL OS thread. -NOTE! The kernel mutex has to be reserved by the caller! */ +NOTE! The kernel mutex has to be reserved by the caller! +@return reserved slot */ static srv_slot_t* srv_table_reserve_slot_for_mysql(void) /*==================================*/ - /* out: reserved slot */ { srv_slot_t* slot; ulint i; @@ -1398,33 +1443,32 @@ srv_table_reserve_slot_for_mysql(void) return(slot); } -#endif /* !UNIV_HOTBACKUP */ -/******************************************************************* +/***************************************************************//** Puts a MySQL OS thread to wait for a lock to be released. If an error occurs during the wait trx->error_state associated with thr is != DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK are possible errors. DB_DEADLOCK is returned if selective deadlock resolution chose this transaction as a victim. */ - +UNIV_INTERN void srv_suspend_mysql_thread( /*=====================*/ - que_thr_t* thr) /* in: query thread associated with the MySQL + que_thr_t* thr) /*!< in: query thread associated with the MySQL OS thread */ { -#ifndef UNIV_HOTBACKUP srv_slot_t* slot; os_event_t event; double wait_time; trx_t* trx; - ibool had_dict_lock = FALSE; + ulint had_dict_lock; ibool was_declared_inside_innodb = FALSE; - ib_longlong start_time = 0; - ib_longlong finish_time; + ib_int64_t start_time = 0; + ib_int64_t finish_time; ulint diff_time; ulint sec; ulint ms; + ulong lock_wait_timeout; ut_ad(!mutex_own(&kernel_mutex)); @@ -1473,7 +1517,7 @@ srv_suspend_mysql_thread( if (ut_usectime(&sec, &ms) == -1) { start_time = -1; } else { - start_time = (ib_longlong)sec * 1000000 + ms; + start_time = (ib_int64_t) sec * 1000000 + ms; } } /* Wake the lock timeout monitor thread, if it is suspended */ @@ -1493,23 +1537,35 @@ srv_suspend_mysql_thread( srv_conc_force_exit_innodb(trx); } - /* Release possible foreign key check latch */ - if (trx->dict_operation_lock_mode == RW_S_LATCH) { - - had_dict_lock = TRUE; + had_dict_lock = trx->dict_operation_lock_mode; + switch (had_dict_lock) { + case RW_S_LATCH: + /* Release foreign key check latch */ row_mysql_unfreeze_data_dictionary(trx); + break; + case RW_X_LATCH: + /* Release fast index creation latch */ + row_mysql_unlock_data_dictionary(trx); + break; } ut_a(trx->dict_operation_lock_mode == 0); - /* Wait for the release */ + /* Suspend this thread and wait for the event. */ os_event_wait(event); - if (had_dict_lock) { + /* After resuming, reacquire the data dictionary latch if + necessary. */ + switch (had_dict_lock) { + case RW_S_LATCH: row_mysql_freeze_data_dictionary(trx); + break; + case RW_X_LATCH: + row_mysql_lock_data_dictionary(trx); + break; } if (was_declared_inside_innodb) { @@ -1531,7 +1587,7 @@ srv_suspend_mysql_thread( if (ut_usectime(&sec, &ms) == -1) { finish_time = -1; } else { - finish_time = (ib_longlong)sec * 1000000 + ms; + finish_time = (ib_int64_t) sec * 1000000 + ms; } diff_time = (ulint) (finish_time - start_time); @@ -1554,30 +1610,29 @@ srv_suspend_mysql_thread( mutex_exit(&kernel_mutex); - if (srv_lock_wait_timeout < 100000000 - && wait_time > (double)srv_lock_wait_timeout) { + /* InnoDB system transactions (such as the purge, and + incomplete transactions that are being rolled back after crash + recovery) will use the global value of + innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */ + lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd); + + if (lock_wait_timeout < 100000000 + && wait_time > (double) lock_wait_timeout) { trx->error_state = DB_LOCK_WAIT_TIMEOUT; } -#else /* UNIV_HOTBACKUP */ - /* This function depends on MySQL code that is not included in - InnoDB Hot Backup builds. Besides, this function should never - be called in InnoDB Hot Backup. */ - ut_error; -#endif /* UNIV_HOTBACKUP */ } -/************************************************************************ +/********************************************************************//** Releases a MySQL OS thread waiting for a lock to be released, if the thread is already suspended. */ - +UNIV_INTERN void srv_release_mysql_thread_if_suspended( /*==================================*/ - que_thr_t* thr) /* in: query thread associated with the + que_thr_t* thr) /*!< in: query thread associated with the MySQL OS thread */ { -#ifndef UNIV_HOTBACKUP srv_slot_t* slot; ulint i; @@ -1597,16 +1652,9 @@ srv_release_mysql_thread_if_suspended( } /* not found */ -#else /* UNIV_HOTBACKUP */ - /* This function depends on MySQL code that is not included in - InnoDB Hot Backup builds. Besides, this function should never - be called in InnoDB Hot Backup. */ - ut_error; -#endif /* UNIV_HOTBACKUP */ } -#ifndef UNIV_HOTBACKUP -/********************************************************************** +/******************************************************************//** Refreshes the values used to calculate per-second averages. */ static void @@ -1634,25 +1682,21 @@ srv_refresh_innodb_monitor_stats(void) mutex_exit(&srv_innodb_monitor_mutex); } -/********************************************************************** +/******************************************************************//** Outputs to a file the output of the InnoDB Monitor. */ - -ibool +UNIV_INTERN +void srv_printf_innodb_monitor( /*======================*/ - /* out: FALSE if not all information printed - due to failure to obtain necessary mutex */ - FILE* file, /* in: output stream */ - ibool nowait, /* in: whether to wait for the mutex. */ - ulint* trx_start, /* out: file position of the start of + FILE* file, /*!< in: output stream */ + ulint* trx_start, /*!< out: file position of the start of the list of active transactions */ - ulint* trx_end) /* out: file position of the end of + ulint* trx_end) /*!< out: file position of the end of the list of active transactions */ { double time_elapsed; time_t current_time; ulint n_reserved; - ibool ret; mutex_enter(&srv_innodb_monitor_mutex); @@ -1677,6 +1721,11 @@ srv_printf_innodb_monitor( (ulong)time_elapsed); fputs("----------\n" + "BACKGROUND THREAD\n" + "----------\n", file); + srv_print_master_thread_info(file); + + fputs("----------\n" "SEMAPHORES\n" "----------\n", file); sync_print(file); @@ -1697,31 +1746,24 @@ srv_printf_innodb_monitor( mutex_exit(&dict_foreign_err_mutex); - /* Only if lock_print_info_summary proceeds correctly, - before we call the lock_print_info_all_transactions - to print all the lock information. */ - ret = lock_print_info_summary(file, nowait); - - if (ret) { - if (trx_start) { - long t = ftell(file); - if (t < 0) { - *trx_start = ULINT_UNDEFINED; - } else { - *trx_start = (ulint) t; - } + lock_print_info_summary(file); + if (trx_start) { + long t = ftell(file); + if (t < 0) { + *trx_start = ULINT_UNDEFINED; + } else { + *trx_start = (ulint) t; } - lock_print_info_all_transactions(file); - if (trx_end) { - long t = ftell(file); - if (t < 0) { - *trx_end = ULINT_UNDEFINED; - } else { - *trx_end = (ulint) t; - } + } + lock_print_info_all_transactions(file); + if (trx_end) { + long t = ftell(file); + if (t < 0) { + *trx_end = ULINT_UNDEFINED; + } else { + *trx_end = (ulint) t; } } - fputs("--------\n" "FILE I/O\n" "--------\n", file); @@ -1759,13 +1801,6 @@ srv_printf_innodb_monitor( fprintf(file, "Dictionary memory allocated " ULINTPF "\n", dict_sys->size); - if (srv_use_awe) { - fprintf(file, - "In addition to that %lu MB of AWE memory allocated\n", - (ulong) (srv_pool_size - / ((1024 * 1024) / UNIV_PAGE_SIZE))); - } - buf_print_io(file); fputs("--------------\n" @@ -1826,15 +1861,14 @@ srv_printf_innodb_monitor( "============================\n", file); mutex_exit(&srv_innodb_monitor_mutex); fflush(file); - - return(ret); } -/********************************************************************** +/******************************************************************//** Function to pass InnoDB status variables to MySQL */ - +UNIV_INTERN void srv_export_innodb_status(void) +/*==========================*/ { mutex_enter(&srv_innodb_monitor_mutex); @@ -1850,14 +1884,16 @@ srv_export_innodb_status(void) export_vars.innodb_data_reads = os_n_file_reads; export_vars.innodb_data_writes = os_n_file_writes; export_vars.innodb_data_written = srv_data_written; - export_vars.innodb_buffer_pool_read_requests = buf_pool->n_page_gets; + export_vars.innodb_buffer_pool_read_requests = buf_pool->stat.n_page_gets; export_vars.innodb_buffer_pool_write_requests = srv_buf_pool_write_requests; export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free; export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed; export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads; - export_vars.innodb_buffer_pool_read_ahead_rnd = srv_read_ahead_rnd; - export_vars.innodb_buffer_pool_read_ahead_seq = srv_read_ahead_seq; + export_vars.innodb_buffer_pool_read_ahead + = buf_pool->stat.n_ra_pages_read; + export_vars.innodb_buffer_pool_read_ahead_evicted + = buf_pool->stat.n_ra_pages_evicted; export_vars.innodb_buffer_pool_pages_data = UT_LIST_GET_LEN(buf_pool->LRU); export_vars.innodb_buffer_pool_pages_dirty @@ -1870,9 +1906,14 @@ srv_export_innodb_status(void) #endif /* UNIV_DEBUG */ export_vars.innodb_buffer_pool_pages_total = buf_pool->curr_size; - export_vars.innodb_buffer_pool_pages_misc = buf_pool->max_size + export_vars.innodb_buffer_pool_pages_misc = buf_pool->curr_size - UT_LIST_GET_LEN(buf_pool->LRU) - UT_LIST_GET_LEN(buf_pool->free); +#ifdef HAVE_ATOMIC_BUILTINS + export_vars.innodb_have_atomic_builtins = 1; +#else + export_vars.innodb_have_atomic_builtins = 0; +#endif export_vars.innodb_page_size = UNIV_PAGE_SIZE; export_vars.innodb_log_waits = srv_log_waits; export_vars.innodb_os_log_written = srv_os_log_written; @@ -1883,9 +1924,9 @@ srv_export_innodb_status(void) export_vars.innodb_log_writes = srv_log_writes; export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written; export_vars.innodb_dblwr_writes = srv_dblwr_writes; - export_vars.innodb_pages_created = buf_pool->n_pages_created; - export_vars.innodb_pages_read = buf_pool->n_pages_read; - export_vars.innodb_pages_written = buf_pool->n_pages_written; + export_vars.innodb_pages_created = buf_pool->stat.n_pages_created; + export_vars.innodb_pages_read = buf_pool->stat.n_pages_read; + export_vars.innodb_pages_written = buf_pool->stat.n_pages_written; export_vars.innodb_row_lock_waits = srv_n_lock_wait_count; export_vars.innodb_row_lock_current_waits = srv_n_lock_wait_current_count; @@ -1906,24 +1947,27 @@ srv_export_innodb_status(void) mutex_exit(&srv_innodb_monitor_mutex); } -/************************************************************************* -A thread prints the info output by various InnoDB monitors. */ - +/*********************************************************************//** +A thread which wakes up threads whose lock wait may have lasted too long. +This also prints the info output by various InnoDB monitors. +@return a dummy parameter */ +UNIV_INTERN os_thread_ret_t -srv_monitor_thread( -/*===============*/ - /* out: a dummy parameter */ +srv_lock_timeout_and_monitor_thread( +/*================================*/ void* arg __attribute__((unused))) - /* in: a dummy parameter required by + /*!< in: a dummy parameter required by os_thread_create */ { + srv_slot_t* slot; double time_elapsed; time_t current_time; time_t last_table_monitor_time; time_t last_tablespace_monitor_time; time_t last_monitor_time; - ulint mutex_skipped; - ibool last_srv_print_monitor; + ibool some_waits; + double wait_time; + ulint i; #ifdef UNIV_DEBUG_THREAD_CREATION fprintf(stderr, "Lock timeout thread starts, id %lu\n", @@ -1934,15 +1978,13 @@ srv_monitor_thread( last_table_monitor_time = time(NULL); last_tablespace_monitor_time = time(NULL); last_monitor_time = time(NULL); - mutex_skipped = 0; - last_srv_print_monitor = srv_print_innodb_monitor; loop: - srv_monitor_active = TRUE; + srv_lock_timeout_and_monitor_active = TRUE; - /* Wake up every 5 seconds to see if we need to print - monitor information. */ + /* When someone is waiting for a lock, we wake up every second + and check if a timeout has passed for a lock wait */ - os_thread_sleep(5000000); + os_thread_sleep(1000000); current_time = time(NULL); @@ -1952,40 +1994,14 @@ loop: last_monitor_time = time(NULL); if (srv_print_innodb_monitor) { - /* Reset mutex_skipped counter everytime - srv_print_innodb_monitor changes. This is to - ensure we will not be blocked by kernel_mutex - for short duration information printing, - such as requested by sync_array_print_long_waits() */ - if (!last_srv_print_monitor) { - mutex_skipped = 0; - last_srv_print_monitor = TRUE; - } - - if (!srv_printf_innodb_monitor(stderr, - MUTEX_NOWAIT(mutex_skipped), - NULL, NULL)) { - mutex_skipped++; - } else { - /* Reset the counter */ - mutex_skipped = 0; - } - } else { - last_srv_print_monitor = FALSE; + srv_printf_innodb_monitor(stderr, NULL, NULL); } - if (srv_innodb_status) { mutex_enter(&srv_monitor_file_mutex); rewind(srv_monitor_file); - if (!srv_printf_innodb_monitor(srv_monitor_file, - MUTEX_NOWAIT(mutex_skipped), - NULL, NULL)) { - mutex_skipped++; - } else { - mutex_skipped = 0; - } - + srv_printf_innodb_monitor(srv_monitor_file, NULL, + NULL); os_file_set_eof(srv_monitor_file); mutex_exit(&srv_monitor_file_mutex); } @@ -2038,56 +2054,6 @@ loop: } } - if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) { - goto exit_func; - } - - if (srv_print_innodb_monitor - || srv_print_innodb_lock_monitor - || srv_print_innodb_tablespace_monitor - || srv_print_innodb_table_monitor) { - goto loop; - } - - srv_monitor_active = FALSE; - - goto loop; - -exit_func: - srv_monitor_active = FALSE; - - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. */ - - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} - -/************************************************************************* -A thread which wakes up threads whose lock wait may have lasted too long. */ - -os_thread_ret_t -srv_lock_timeout_thread( -/*====================*/ - /* out: a dummy parameter */ - void* arg __attribute__((unused))) - /* in: a dummy parameter required by - os_thread_create */ -{ - srv_slot_t* slot; - ibool some_waits; - double wait_time; - ulint i; - -loop: - /* When someone is waiting for a lock, we wake up every second - and check if a timeout has passed for a lock wait */ - - os_thread_sleep(1000000); - - srv_lock_timeout_active = TRUE; - mutex_enter(&kernel_mutex); some_waits = FALSE; @@ -2100,12 +2066,19 @@ loop: slot = srv_mysql_table + i; if (slot->in_use) { + trx_t* trx; + ulong lock_wait_timeout; + some_waits = TRUE; wait_time = ut_difftime(ut_time(), slot->suspend_time); - if (srv_lock_wait_timeout < 100000000 - && (wait_time > (double) srv_lock_wait_timeout + trx = thr_get_trx(slot->thr); + lock_wait_timeout = thd_lock_wait_timeout( + trx->mysql_thd); + + if (lock_wait_timeout < 100000000 + && (wait_time > (double) lock_wait_timeout || wait_time < 0)) { /* Timeout exceeded or a wrap-around in system @@ -2115,10 +2088,9 @@ loop: possible that the lock has already been granted: in that case do nothing */ - if (thr_get_trx(slot->thr)->wait_lock) { + if (trx->wait_lock) { lock_cancel_waiting_and_release( - thr_get_trx(slot->thr) - ->wait_lock); + trx->wait_lock); } } } @@ -2132,11 +2104,17 @@ loop: goto exit_func; } - if (some_waits) { + if (some_waits || srv_print_innodb_monitor + || srv_print_innodb_lock_monitor + || srv_print_innodb_tablespace_monitor + || srv_print_innodb_table_monitor) { goto loop; } - srv_lock_timeout_active = FALSE; + /* No one was waiting for a lock and no monitor was active: + suspend this thread */ + + srv_lock_timeout_and_monitor_active = FALSE; #if 0 /* The following synchronisation is disabled, since @@ -2146,7 +2124,7 @@ loop: goto loop; exit_func: - srv_lock_timeout_active = FALSE; + srv_lock_timeout_and_monitor_active = FALSE; /* We count the number of threads in os_thread_exit(). A created thread should always use that to exit and not use return() to exit. */ @@ -2156,22 +2134,22 @@ exit_func: OS_THREAD_DUMMY_RETURN; } -/************************************************************************* +/*********************************************************************//** A thread which prints warnings about semaphore waits which have lasted -too long. These can be used to track bugs which cause hangs. */ - +too long. These can be used to track bugs which cause hangs. +@return a dummy parameter */ +UNIV_INTERN os_thread_ret_t srv_error_monitor_thread( /*=====================*/ - /* out: a dummy parameter */ void* arg __attribute__((unused))) - /* in: a dummy parameter required by + /*!< in: a dummy parameter required by os_thread_create */ { /* number of successive fatal timeouts observed */ - ulint fatal_cnt = 0; - dulint old_lsn; - dulint new_lsn; + ulint fatal_cnt = 0; + ib_uint64_t old_lsn; + ib_uint64_t new_lsn; old_lsn = srv_start_lsn; @@ -2187,18 +2165,15 @@ loop: new_lsn = log_get_lsn(); - if (ut_dulint_cmp(new_lsn, old_lsn) < 0) { + if (new_lsn < old_lsn) { ut_print_timestamp(stderr); fprintf(stderr, - " InnoDB: Error: old log sequence number %lu %lu" + " InnoDB: Error: old log sequence number %llu" " was greater\n" - "InnoDB: than the new log sequence number %lu %lu!\n" + "InnoDB: than the new log sequence number %llu!\n" "InnoDB: Please submit a bug report" " to http://bugs.mysql.com\n", - (ulong) ut_dulint_get_high(old_lsn), - (ulong) ut_dulint_get_low(old_lsn), - (ulong) ut_dulint_get_high(new_lsn), - (ulong) ut_dulint_get_low(new_lsn)); + old_lsn, new_lsn); } old_lsn = new_lsn; @@ -2210,10 +2185,17 @@ loop: srv_refresh_innodb_monitor_stats(); } + /* Update the statistics collected for deciding LRU + eviction policy. */ + buf_LRU_stat_update(); + + /* Update the statistics collected for flush rate policy. */ + buf_flush_stat_update(); + /* In case mutex_exit is not a memory barrier, it is theoretically possible some threads are left waiting though the semaphore is already released. Wake up those threads: */ - + sync_arr_wake_threads_if_sema_free(); if (sync_array_print_long_waits()) { @@ -2255,13 +2237,13 @@ loop: OS_THREAD_DUMMY_RETURN; } -/*********************************************************************** +/*******************************************************************//** Tells the InnoDB server that there has been activity in the database and wakes up the master thread if it is suspended (not sleeping). Used in the MySQL interface. Note that there is a small chance that the master thread stays suspended (we do not protect our operation with the kernel mutex, for performace reasons). */ - +UNIV_INTERN void srv_active_wake_master_thread(void) /*===============================*/ @@ -2278,9 +2260,9 @@ srv_active_wake_master_thread(void) } } -/*********************************************************************** +/*******************************************************************//** Wakes up the master thread if it is suspended or being suspended. */ - +UNIV_INTERN void srv_wake_master_thread(void) /*========================*/ @@ -2294,22 +2276,40 @@ srv_wake_master_thread(void) mutex_exit(&kernel_mutex); } -/************************************************************************* -The master thread controlling the server. */ +/********************************************************************** +The master thread is tasked to ensure that flush of log file happens +once every second in the background. This is to ensure that not more +than one second of trxs are lost in case of crash when +innodb_flush_logs_at_trx_commit != 1 */ +static +void +srv_sync_log_buffer_in_background(void) +/*===================================*/ +{ + time_t current_time = time(NULL); + + srv_main_thread_op_info = "flushing log"; + if (difftime(current_time, srv_last_log_flush_time) >= 1) { + log_buffer_sync_in_background(TRUE); + srv_last_log_flush_time = current_time; + srv_log_writes_and_flush++; + } +} +/*********************************************************************//** +The master thread controlling the server. +@return a dummy parameter */ +UNIV_INTERN os_thread_ret_t srv_master_thread( /*==============*/ - /* out: a dummy parameter */ void* arg __attribute__((unused))) - /* in: a dummy parameter required by + /*!< in: a dummy parameter required by os_thread_create */ { os_event_t event; - time_t last_flush_time; - time_t current_time; ulint old_activity_count; - ulint n_pages_purged; + ulint n_pages_purged = 0; ulint n_bytes_merged; ulint n_pages_flushed; ulint n_bytes_archived; @@ -2343,8 +2343,8 @@ loop: srv_main_thread_op_info = "reserving kernel mutex"; - n_ios_very_old = log_sys->n_log_ios + buf_pool->n_pages_read - + buf_pool->n_pages_written; + n_ios_very_old = log_sys->n_log_ios + buf_pool->stat.n_pages_read + + buf_pool->stat.n_pages_written; mutex_enter(&kernel_mutex); /* Store the user activity counter at the start of this loop */ @@ -2360,16 +2360,19 @@ loop: /* ---- We run the following loop approximately once per second when there is database activity */ + srv_last_log_flush_time = time(NULL); skip_sleep = FALSE; for (i = 0; i < 10; i++) { - n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read - + buf_pool->n_pages_written; + n_ios_old = log_sys->n_log_ios + buf_pool->stat.n_pages_read + + buf_pool->stat.n_pages_written; srv_main_thread_op_info = "sleeping"; + srv_main_1_second_loops++; if (!skip_sleep) { os_thread_sleep(1000000); + srv_main_sleeps++; } skip_sleep = FALSE; @@ -2389,33 +2392,27 @@ loop: goto background_loop; } - /* We flush the log once in a second even if no commit - is issued or the we have specified in my.cnf no flush - at transaction commit */ - - srv_main_thread_op_info = "flushing log"; - log_buffer_flush_to_disk(); + /* Flush logs if needed */ + srv_sync_log_buffer_in_background(); srv_main_thread_op_info = "making checkpoint"; log_free_check(); - /* If there were less than 5 i/os during the - one second sleep, we assume that there is free - disk i/o capacity available, and it makes sense to - do an insert buffer merge. */ + /* If i/os during one second sleep were less than 5% of + capacity, we assume that there is free disk i/o capacity + available, and it makes sense to do an insert buffer merge. */ n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes; - n_ios = log_sys->n_log_ios + buf_pool->n_pages_read - + buf_pool->n_pages_written; - if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) { + n_ios = log_sys->n_log_ios + buf_pool->stat.n_pages_read + + buf_pool->stat.n_pages_written; + if (n_pend_ios < SRV_PEND_IO_THRESHOLD + && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) { srv_main_thread_op_info = "doing insert buffer merge"; - ibuf_contract_for_n_pages( - TRUE, srv_insert_buffer_batch_size / 4); + ibuf_contract_for_n_pages(FALSE, PCT_IO(5)); - srv_main_thread_op_info = "flushing log"; - - log_buffer_flush_to_disk(); + /* Flush logs if needed */ + srv_sync_log_buffer_in_background(); } if (UNIV_UNLIKELY(buf_get_modified_ratio_pct() @@ -2424,8 +2421,11 @@ loop: /* Try to keep the number of modified pages in the buffer pool under the limit wished by the user */ - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, - ut_dulint_max); + srv_main_thread_op_info = + "flushing buffer pool pages"; + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, + PCT_IO(100), + IB_ULONGLONG_MAX); /* If we had to do the flush, it may have taken even more than 1 second, and also, there may be more @@ -2433,6 +2433,24 @@ loop: iteration of this loop. */ skip_sleep = TRUE; + } else if (srv_adaptive_flushing) { + + /* Try to keep the rate of flushing of dirty + pages such that redo log generation does not + produce bursts of IO at checkpoint time. */ + ulint n_flush = buf_flush_get_desired_flush_rate(); + + if (n_flush) { + srv_main_thread_op_info = + "flushing buffer pool pages"; + n_flush = ut_min(PCT_IO(100), n_flush); + n_pages_flushed = + buf_flush_batch( + BUF_FLUSH_LIST, + n_flush, + IB_ULONGLONG_MAX); + skip_sleep = TRUE; + } } if (srv_activity_count == old_activity_count) { @@ -2452,39 +2470,43 @@ loop: seconds */ mem_validate_all_blocks(); #endif - /* If there were less than 200 i/os during the 10 second period, - we assume that there is free disk i/o capacity available, and it - makes sense to flush 100 pages. */ + /* If i/os during the 10 second period were less than 200% of + capacity, we assume that there is free disk i/o capacity + available, and it makes sense to flush srv_io_capacity pages. + + Note that this is done regardless of the fraction of dirty + pages relative to the max requested by the user. The one second + loop above requests writes for that case. The writes done here + are not required, and may be disabled. */ n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes; - n_ios = log_sys->n_log_ios + buf_pool->n_pages_read - + buf_pool->n_pages_written; - if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) { + n_ios = log_sys->n_log_ios + buf_pool->stat.n_pages_read + + buf_pool->stat.n_pages_written; + + srv_main_10_second_loops++; + if (n_pend_ios < SRV_PEND_IO_THRESHOLD + && (n_ios - n_ios_very_old < SRV_PAST_IO_ACTIVITY)) { srv_main_thread_op_info = "flushing buffer pool pages"; - buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); + buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), + IB_ULONGLONG_MAX); - srv_main_thread_op_info = "flushing log"; - log_buffer_flush_to_disk(); + /* Flush logs if needed */ + srv_sync_log_buffer_in_background(); } /* We run a batch of insert buffer merge every 10 seconds, even if the server were active */ srv_main_thread_op_info = "doing insert buffer merge"; - ibuf_contract_for_n_pages(TRUE, srv_insert_buffer_batch_size / 4); + ibuf_contract_for_n_pages(FALSE, PCT_IO(5)); - srv_main_thread_op_info = "flushing log"; - log_buffer_flush_to_disk(); + /* Flush logs if needed */ + srv_sync_log_buffer_in_background(); /* We run a full purge every 10 seconds, even if the server were active */ - - n_pages_purged = 1; - - last_flush_time = time(NULL); - - while (n_pages_purged) { + do { if (srv_fast_shutdown && srv_shutdown_state > 0) { @@ -2494,15 +2516,10 @@ loop: srv_main_thread_op_info = "purging"; n_pages_purged = trx_purge(); - current_time = time(NULL); + /* Flush logs if needed */ + srv_sync_log_buffer_in_background(); - if (difftime(current_time, last_flush_time) > 1) { - srv_main_thread_op_info = "flushing log"; - - log_buffer_flush_to_disk(); - last_flush_time = current_time; - } - } + } while (n_pages_purged); srv_main_thread_op_info = "flushing buffer pool pages"; @@ -2514,15 +2531,17 @@ loop: (> 70 %), we assume we can afford reserving the disk(s) for the time it requires to flush 100 pages */ - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, - ut_dulint_max); + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, + PCT_IO(100), + IB_ULONGLONG_MAX); } else { /* Otherwise, we only flush a small number of pages so that we do not unnecessarily use much disk i/o capacity from other work */ - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10, - ut_dulint_max); + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, + PCT_IO(10), + IB_ULONGLONG_MAX); } srv_main_thread_op_info = "making checkpoint"; @@ -2555,7 +2574,7 @@ background_loop: /* The server has been quiet for a while: start running background operations */ - + srv_main_background_loops++; srv_main_thread_op_info = "doing background drop tables"; n_tables_to_drop = row_drop_tables_for_mysql_in_background(); @@ -2572,12 +2591,7 @@ background_loop: srv_main_thread_op_info = "purging"; /* Run a full purge */ - - n_pages_purged = 1; - - last_flush_time = time(NULL); - - while (n_pages_purged) { + do { if (srv_fast_shutdown && srv_shutdown_state > 0) { break; @@ -2586,15 +2600,10 @@ background_loop: srv_main_thread_op_info = "purging"; n_pages_purged = trx_purge(); - current_time = time(NULL); + /* Flush logs if needed */ + srv_sync_log_buffer_in_background(); - if (difftime(current_time, last_flush_time) > 1) { - srv_main_thread_op_info = "flushing log"; - - log_buffer_flush_to_disk(); - last_flush_time = current_time; - } - } + } while (n_pages_purged); srv_main_thread_op_info = "reserving kernel mutex"; @@ -2610,8 +2619,12 @@ background_loop: if (srv_fast_shutdown && srv_shutdown_state > 0) { n_bytes_merged = 0; } else { - n_bytes_merged = ibuf_contract_for_n_pages( - TRUE, srv_insert_buffer_batch_size); + /* This should do an amount of IO similar to the number of + dirty pages that will be flushed in the call to + buf_flush_batch below. Otherwise, the system favors + clean pages over cleanup throughput. */ + n_bytes_merged = ibuf_contract_for_n_pages(FALSE, + PCT_IO(100)); } srv_main_thread_op_info = "reserving kernel mutex"; @@ -2625,10 +2638,11 @@ background_loop: flush_loop: srv_main_thread_op_info = "flushing buffer pool pages"; - + srv_main_flush_loops++; if (srv_fast_shutdown < 2) { - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, - ut_dulint_max); + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, + PCT_IO(100), + IB_ULONGLONG_MAX); } else { /* In the fastest shutdown we do not flush the buffer pool to data files: we set n_pages_flushed to 0 artificially. */ @@ -2648,9 +2662,8 @@ flush_loop: srv_main_thread_op_info = "waiting for buffer pool flush to end"; buf_flush_wait_batch_end(BUF_FLUSH_LIST); - srv_main_thread_op_info = "flushing log"; - - log_buffer_flush_to_disk(); + /* Flush logs if needed */ + srv_sync_log_buffer_in_background(); srv_main_thread_op_info = "making checkpoint"; @@ -2743,4 +2756,3 @@ suspend_thread: OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */ } -#endif /* !UNIV_HOTBACKUP */ |