diff options
Diffstat (limited to 'storage/innobase/srv/srv0srv.c')
-rw-r--r-- | storage/innobase/srv/srv0srv.c | 2598 |
1 files changed, 2598 insertions, 0 deletions
diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c new file mode 100644 index 00000000000..41b0594d705 --- /dev/null +++ b/storage/innobase/srv/srv0srv.c @@ -0,0 +1,2598 @@ +/****************************************************** +The database server main program + +NOTE: SQL Server 7 uses something which the documentation +calls user mode scheduled threads (UMS threads). One such +thread is usually allocated per processor. Win32 +documentation does not know any UMS threads, which suggests +that the concept is internal to SQL Server 7. It may mean that +SQL Server 7 does all the scheduling of threads itself, even +in i/o waits. We should maybe modify InnoDB to use the same +technique, because thread switches within NT may be too slow. + +SQL Server 7 also mentions fibers, which are cooperatively +scheduled threads. They can boost performance by 5 %, +according to the Delaney and Soukup's book. + +Windows 2000 will have something called thread pooling +(see msdn website), which we could possibly use. + +Another possibility could be to use some very fast user space +thread library. This might confuse NT though. + +(c) 1995 Innobase Oy + +Created 10/8/1995 Heikki Tuuri +*******************************************************/ +/* Dummy comment */ +#include "srv0srv.h" + +#include "ut0mem.h" +#include "os0proc.h" +#include "mem0mem.h" +#include "mem0pool.h" +#include "sync0sync.h" +#include "thr0loc.h" +#include "que0que.h" +#include "srv0que.h" +#include "log0recv.h" +#include "pars0pars.h" +#include "usr0sess.h" +#include "lock0lock.h" +#include "trx0purge.h" +#include "ibuf0ibuf.h" +#include "buf0flu.h" +#include "btr0sea.h" +#include "dict0load.h" +#include "dict0boot.h" +#include "srv0start.h" +#include "row0mysql.h" + +/* This is set to TRUE if the MySQL user has set it in MySQL; currently +affects only FOREIGN KEY definition parsing */ +ibool srv_lower_case_table_names = FALSE; + +/* The following counter is incremented whenever there is some user activity +in the server */ +ulint srv_activity_count = 0; + +/* The following is the maximum allowed duration of a lock wait. */ +ulint srv_fatal_semaphore_wait_threshold = 600; + +/* How much data manipulation language (DML) statements need to be delayed, +in microseconds, in order to reduce the lagging of the purge thread. */ +ulint srv_dml_needed_delay = 0; + +ibool srv_lock_timeout_and_monitor_active = FALSE; +ibool srv_error_monitor_active = FALSE; + +const char* srv_main_thread_op_info = ""; + +/* Server parameters which are read from the initfile */ + +/* The following three are dir paths which are catenated before file +names, where the file name itself may also contain a path */ + +char* srv_data_home = NULL; +#ifdef UNIV_LOG_ARCHIVE +char* srv_arch_dir = NULL; +#endif /* UNIV_LOG_ARCHIVE */ + +ibool srv_file_per_table = FALSE; /* store to its own file each table + created by an user; data dictionary + tables are in the system tablespace + 0 */ +ibool srv_locks_unsafe_for_binlog = FALSE; /* Place locks to records only + i.e. do not use next-key locking + except on duplicate key checking and + foreign key checking */ +ulint srv_n_data_files = 0; +char** srv_data_file_names = NULL; +ulint* srv_data_file_sizes = NULL; /* size in database pages */ + +ibool srv_auto_extend_last_data_file = FALSE; /* if TRUE, then we + auto-extend the last data + file */ +ulint srv_last_file_size_max = 0; /* if != 0, this tells + the max size auto-extending + may increase the last data + file size */ +ulong srv_auto_extend_increment = 8; /* If the last data file is + auto-extended, we add this + many pages to it at a time */ +ulint* srv_data_file_is_raw_partition = NULL; + +/* If the following is TRUE we do not allow inserts etc. This protects +the user from forgetting the 'newraw' keyword to my.cnf */ + +ibool srv_created_new_raw = FALSE; + +char** srv_log_group_home_dirs = NULL; + +ulint srv_n_log_groups = ULINT_MAX; +ulint srv_n_log_files = ULINT_MAX; +ulint srv_log_file_size = ULINT_MAX; /* size in database pages */ +ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */ +ulint srv_flush_log_at_trx_commit = 1; + +byte srv_latin1_ordering[256] /* The sort order table of the latin1 + character set. The following table is + the MySQL order as of Feb 10th, 2002 */ += { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 +, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F +, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 +, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F +, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27 +, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F +, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37 +, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F +, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47 +, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F +, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57 +, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F +, 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47 +, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F +, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57 +, 0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F +, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87 +, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F +, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97 +, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F +, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7 +, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF +, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7 +, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF +, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43 +, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49 +, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xD7 +, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xDF +, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43 +, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49 +, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xF7 +, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF +}; + +ulint srv_pool_size = ULINT_MAX; /* size in pages; MySQL inits + this to size in kilobytes but + we normalize this to pages in + srv_boot() */ +ulint srv_awe_window_size = 0; /* size in pages; MySQL inits + this to bytes, but we + normalize it to pages in + srv_boot() */ +ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */ +ulint srv_lock_table_size = ULINT_MAX; + +ulint srv_n_file_io_threads = ULINT_MAX; + +#ifdef UNIV_LOG_ARCHIVE +ibool srv_log_archive_on = FALSE; +ibool srv_archive_recovery = 0; +dulint srv_archive_recovery_limit_lsn; +#endif /* UNIV_LOG_ARCHIVE */ + +ulint srv_lock_wait_timeout = 1024 * 1024 * 1024; + +char* srv_file_flush_method_str = NULL; +ulint srv_unix_file_flush_method = SRV_UNIX_FDATASYNC; +ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; + +ulint srv_max_n_open_files = 300; + +/* The InnoDB main thread tries to keep the ratio of modified pages +in the buffer pool to all database pages in the buffer pool smaller than +the following number. But it is not guaranteed that the value stays below +that during a time of heavy update/insert activity. */ + +ulong srv_max_buf_pool_modified_pct = 90; + +/* variable counts amount of data read in total (in bytes) */ +ulint srv_data_read = 0; + +/* here we count the amount of data written in total (in bytes) */ +ulint srv_data_written = 0; + +/* the number of the log write requests done */ +ulint srv_log_write_requests = 0; + +/* the number of physical writes to the log performed */ +ulint srv_log_writes = 0; + +/* amount of data written to the log files in bytes */ +ulint srv_os_log_written = 0; + +/* amount of writes being done to the log files */ +ulint srv_os_log_pending_writes = 0; + +/* we increase this counter, when there we don't have enough space in the +log buffer and have to flush it */ +ulint srv_log_waits = 0; + +/* this variable counts the amount of times, when the doublewrite buffer +was flushed */ +ulint srv_dblwr_writes = 0; + +/* here we store the number of pages that have been flushed to the +doublewrite buffer */ +ulint srv_dblwr_pages_written = 0; + +/* in this variable we store the number of write requests issued */ +ulint srv_buf_pool_write_requests = 0; + +/* here we store the number of times when we had to wait for a free page +in the buffer pool. It happens when the buffer pool is full and we need +to make a flush, in order to be able to read or create a page. */ +ulint srv_buf_pool_wait_free = 0; + +/* variable to count the number of pages that were written from buffer +pool to the disk */ +ulint srv_buf_pool_flushed = 0; + +/* variable to count the number of buffer pool reads that led to the +reading of a disk page */ +ulint srv_buf_pool_reads = 0; + +/* variable to count the number of sequential read-aheads */ +ulint srv_read_ahead_seq = 0; + +/* variable to count the number of random read-aheads */ +ulint srv_read_ahead_rnd = 0; + +/* structure to pass status variables to MySQL */ +export_struc export_vars; + +/* If the following is != 0 we do not allow inserts etc. This protects +the user from forgetting the innodb_force_recovery keyword to my.cnf */ + +ulint srv_force_recovery = 0; +/*-----------------------*/ +/* We are prepared for a situation that we have this many threads waiting for +a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the +value. */ + +ulint srv_max_n_threads = 0; + +/* The following controls how many threads we let inside InnoDB concurrently: +threads waiting for locks are not counted into the number because otherwise +we could get a deadlock. MySQL creates a thread for each user session, and +semaphore contention and convoy problems can occur withput this restriction. +Value 10 should be good if there are less than 4 processors + 4 disks in the +computer. Bigger computers need bigger values. */ + +ulong srv_thread_concurrency = 8; + +os_fast_mutex_t srv_conc_mutex; /* this mutex protects srv_conc data + structures */ +lint srv_conc_n_threads = 0; /* number of OS threads currently + inside InnoDB; it is not an error + if this drops temporarily below zero + because we do not demand that every + thread increments this, but a thread + waiting for a lock decrements this + temporarily */ +ulint srv_conc_n_waiting_threads = 0; /* number of OS threads waiting in the + FIFO for a permission to enter InnoDB + */ + +typedef struct srv_conc_slot_struct srv_conc_slot_t; +struct srv_conc_slot_struct{ + os_event_t event; /* event to wait */ + ibool reserved; /* TRUE if slot + reserved */ + ibool wait_ended; /* TRUE when another + thread has already set + the event and the + thread in this slot is + free to proceed; but + reserved may still be + TRUE at that point */ + UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /* queue node */ +}; + +UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue; /* queue of threads + waiting to get in */ +srv_conc_slot_t* srv_conc_slots; /* array of wait + slots */ + +/* Number of times a thread is allowed to enter InnoDB within the same +SQL query after it has once got the ticket at srv_conc_enter_innodb */ +#define SRV_FREE_TICKETS_TO_ENTER srv_n_free_tickets_to_enter +#define SRV_THREAD_SLEEP_DELAY srv_thread_sleep_delay +/*-----------------------*/ +/* If the following is set TRUE then we do not run purge and insert buffer +merge to completion before shutdown */ + +ibool srv_fast_shutdown = FALSE; + +ibool srv_very_fast_shutdown = FALSE; /* if this TRUE, do not flush the + buffer pool to data files at the + shutdown; we effectively 'crash' + InnoDB */ +/* Generate a innodb_status.<pid> file */ +ibool srv_innodb_status = FALSE; + +ibool srv_use_doublewrite_buf = TRUE; +ibool srv_use_checksums = TRUE; + +ibool srv_set_thread_priorities = TRUE; +int srv_query_thread_priority = 0; + +/* TRUE if the Address Windowing Extensions of Windows are used; then we must +disable adaptive hash indexes */ +ibool srv_use_awe = FALSE; +ibool srv_use_adaptive_hash_indexes = TRUE; + +/*-------------------------------------------*/ +ulong srv_n_spin_wait_rounds = 20; +ulong srv_n_free_tickets_to_enter = 500; +ulong srv_thread_sleep_delay = 10000; +ulint srv_spin_wait_delay = 5; +ibool srv_priority_boost = TRUE; + +ibool srv_print_thread_releases = FALSE; +ibool srv_print_lock_waits = FALSE; +ibool srv_print_buf_io = FALSE; +ibool srv_print_log_io = FALSE; +ibool srv_print_latch_waits = FALSE; + +ulint srv_n_rows_inserted = 0; +ulint srv_n_rows_updated = 0; +ulint srv_n_rows_deleted = 0; +ulint srv_n_rows_read = 0; +static ulint srv_n_rows_inserted_old = 0; +static ulint srv_n_rows_updated_old = 0; +static ulint srv_n_rows_deleted_old = 0; +static ulint srv_n_rows_read_old = 0; + +ulint srv_n_lock_wait_count = 0; +ulint srv_n_lock_wait_current_count = 0; +ib_longlong srv_n_lock_wait_time = 0; +ulint srv_n_lock_max_wait_time = 0; + + +/* + Set the following to 0 if you want InnoDB to write messages on + stderr on startup/shutdown +*/ +ibool srv_print_verbose_log = TRUE; +ibool srv_print_innodb_monitor = FALSE; +ibool srv_print_innodb_lock_monitor = FALSE; +ibool srv_print_innodb_tablespace_monitor = FALSE; +ibool srv_print_innodb_table_monitor = FALSE; + +/* The parameters below are obsolete: */ + +ibool srv_print_parsed_sql = FALSE; + +ulint srv_sim_disk_wait_pct = ULINT_MAX; +ulint srv_sim_disk_wait_len = ULINT_MAX; +ibool srv_sim_disk_wait_by_yield = FALSE; +ibool srv_sim_disk_wait_by_wait = FALSE; + +ibool srv_measure_contention = FALSE; +ibool srv_measure_by_spin = FALSE; + +ibool srv_test_extra_mutexes = FALSE; +ibool srv_test_nocache = FALSE; +ibool srv_test_cache_evict = FALSE; + +ibool srv_test_sync = FALSE; +ulint srv_test_n_threads = ULINT_MAX; +ulint srv_test_n_loops = ULINT_MAX; +ulint srv_test_n_free_rnds = ULINT_MAX; +ulint srv_test_n_reserved_rnds = ULINT_MAX; +ulint srv_test_array_size = ULINT_MAX; +ulint srv_test_n_mutexes = ULINT_MAX; + +/* Array of English strings describing the current state of an +i/o handler thread */ + +const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS]; +const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS]; + +time_t srv_last_monitor_time; + +mutex_t srv_innodb_monitor_mutex; + +/* Mutex for locking srv_monitor_file */ +mutex_t srv_monitor_file_mutex; +/* Temporary file for innodb monitor output */ +FILE* srv_monitor_file; + +ulint srv_main_thread_process_no = 0; +ulint srv_main_thread_id = 0; + +/* + IMPLEMENTATION OF THE SERVER MAIN PROGRAM + ========================================= + +There is the following analogue between this database +server and an operating system kernel: + +DB concept equivalent OS concept +---------- --------------------- +transaction -- process; + +query thread -- thread; + +lock -- semaphore; + +transaction set to +the rollback state -- kill signal delivered to a process; + +kernel -- kernel; + +query thread execution: +(a) without kernel mutex +reserved -- process executing in user mode; +(b) with kernel mutex reserved + -- process executing in kernel mode; + +The server is controlled by a master thread which runs at +a priority higher than normal, that is, higher than user threads. +It sleeps most of the time, and wakes up, say, every 300 milliseconds, +to check whether there is anything happening in the server which +requires intervention of the master thread. Such situations may be, +for example, when flushing of dirty blocks is needed in the buffer +pool or old version of database rows have to be cleaned away. + +The threads which we call user threads serve the queries of +the clients and input from the console of the server. +They run at normal priority. The server may have several +communications endpoints. A dedicated set of user threads waits +at each of these endpoints ready to receive a client request. +Each request is taken by a single user thread, which then starts +processing and, when the result is ready, sends it to the client +and returns to wait at the same endpoint the thread started from. + +So, we do not have dedicated communication threads listening at +the endpoints and dealing the jobs to dedicated worker threads. +Our architecture saves one thread swithch per request, compared +to the solution with dedicated communication threads +which amounts to 15 microseconds on 100 MHz Pentium +running NT. If the client +is communicating over a network, this saving is negligible, but +if the client resides in the same machine, maybe in an SMP machine +on a different processor from the server thread, the saving +can be important as the threads can communicate over shared +memory with an overhead of a few microseconds. + +We may later implement a dedicated communication thread solution +for those endpoints which communicate over a network. + +Our solution with user threads has two problems: for each endpoint +there has to be a number of listening threads. If there are many +communication endpoints, it may be difficult to set the right number +of concurrent threads in the system, as many of the threads +may always be waiting at less busy endpoints. Another problem +is queuing of the messages, as the server internally does not +offer any queue for jobs. + +Another group of user threads is intended for splitting the +queries and processing them in parallel. Let us call these +parallel communication threads. These threads are waiting for +parallelized tasks, suspended on event semaphores. + +A single user thread waits for input from the console, +like a command to shut the database. + +Utility threads are a different group of threads which takes +care of the buffer pool flushing and other, mainly background +operations, in the server. +Some of these utility threads always run at a lower than normal +priority, so that they are always in background. Some of them +may dynamically boost their priority by the pri_adjust function, +even to higher than normal priority, if their task becomes urgent. +The running of utilities is controlled by high- and low-water marks +of urgency. The urgency may be measured by the number of dirty blocks +in the buffer pool, in the case of the flush thread, for example. +When the high-water mark is exceeded, an utility starts running, until +the urgency drops under the low-water mark. Then the utility thread +suspend itself to wait for an event. The master thread is +responsible of signaling this event when the utility thread is +again needed. + +For each individual type of utility, some threads always remain +at lower than normal priority. This is because pri_adjust is implemented +so that the threads at normal or higher priority control their +share of running time by calling sleep. Thus, if the load of the +system sudenly drops, these threads cannot necessarily utilize +the system fully. The background priority threads make up for this, +starting to run when the load drops. + +When there is no activity in the system, also the master thread +suspends itself to wait for an event making +the server totally silent. The responsibility to signal this +event is on the user thread which again receives a message +from a client. + +There is still one complication in our server design. If a +background utility thread obtains a resource (e.g., mutex) needed by a user +thread, and there is also some other user activity in the system, +the user thread may have to wait indefinitely long for the +resource, as the OS does not schedule a background thread if +there is some other runnable user thread. This problem is called +priority inversion in real-time programming. + +One solution to the priority inversion problem would be to +keep record of which thread owns which resource and +in the above case boost the priority of the background thread +so that it will be scheduled and it can release the resource. +This solution is called priority inheritance in real-time programming. +A drawback of this solution is that the overhead of acquiring a mutex +increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because +the thread has to call os_thread_get_curr_id. +This may be compared to 0.5 microsecond overhead for a mutex lock-unlock +pair. Note that the thread +cannot store the information in the resource, say mutex, itself, +because competing threads could wipe out the information if it is +stored before acquiring the mutex, and if it stored afterwards, +the information is outdated for the time of one machine instruction, +at least. (To be precise, the information could be stored to +lock_word in mutex if the machine supports atomic swap.) + +The above solution with priority inheritance may become actual in the +future, but at the moment we plan to implement a more coarse solution, +which could be called a global priority inheritance. If a thread +has to wait for a long time, say 300 milliseconds, for a resource, +we just guess that it may be waiting for a resource owned by a background +thread, and boost the the priority of all runnable background threads +to the normal level. The background threads then themselves adjust +their fixed priority back to background after releasing all resources +they had (or, at some fixed points in their program code). + +What is the performance of the global priority inheritance solution? +We may weigh the length of the wait time 300 milliseconds, during +which the system processes some other thread +to the cost of boosting the priority of each runnable background +thread, rescheduling it, and lowering the priority again. +On 100 MHz Pentium + NT this overhead may be of the order 100 +microseconds per thread. So, if the number of runnable background +threads is not very big, say < 100, the cost is tolerable. +Utility threads probably will access resources used by +user threads not very often, so collisions of user threads +to preempted utility threads should not happen very often. + +The thread table contains +information of the current status of each thread existing in the system, +and also the event semaphores used in suspending the master thread +and utility and parallel communication threads when they have nothing to do. +The thread table can be seen as an analogue to the process table +in a traditional Unix implementation. + +The thread table is also used in the global priority inheritance +scheme. This brings in one additional complication: threads accessing +the thread table must have at least normal fixed priority, +because the priority inheritance solution does not work if a background +thread is preempted while possessing the mutex protecting the thread table. +So, if a thread accesses the thread table, its priority has to be +boosted at least to normal. This priority requirement can be seen similar to +the privileged mode used when processing the kernel calls in traditional +Unix.*/ + +/* Thread slot in the thread table */ +struct srv_slot_struct{ + os_thread_id_t id; /* thread id */ + os_thread_t handle; /* thread handle */ + ulint type; /* thread type: user, utility etc. */ + ibool in_use; /* TRUE if this slot is in use */ + ibool suspended; /* TRUE if the thread is waiting + for the event of this slot */ + ib_time_t suspend_time; /* time when the thread was + suspended */ + os_event_t event; /* event used in suspending the + thread when it has nothing to do */ + que_thr_t* thr; /* suspended query thread (only + used for MySQL threads) */ +}; + +/* Table for MySQL threads where they will be suspended to wait for locks */ +srv_slot_t* srv_mysql_table = NULL; + +os_event_t srv_lock_timeout_thread_event; + +srv_sys_t* srv_sys = NULL; + +byte srv_pad1[64]; /* padding to prevent other memory update + hotspots from residing on the same memory + cache line */ +mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs, + query threads, and lock table */ +byte srv_pad2[64]; /* padding to prevent other memory update + hotspots from residing on the same memory + cache line */ + +/* The following three values measure the urgency of the jobs of +buffer, version, and insert threads. They may vary from 0 - 1000. +The server mutex protects all these variables. The low-water values +tell that the server can acquiesce the utility when the value +drops below this low-water mark. */ + +ulint srv_meter[SRV_MASTER + 1]; +ulint srv_meter_low_water[SRV_MASTER + 1]; +ulint srv_meter_high_water[SRV_MASTER + 1]; +ulint srv_meter_high_water2[SRV_MASTER + 1]; +ulint srv_meter_foreground[SRV_MASTER + 1]; + +/* The following values give info about the activity going on in +the database. They are protected by the server mutex. The arrays +are indexed by the type of the thread. */ + +ulint srv_n_threads_active[SRV_MASTER + 1]; +ulint srv_n_threads[SRV_MASTER + 1]; + +/************************************************************************* +Sets the info describing an i/o thread current state. */ + +void +srv_set_io_thread_op_info( +/*======================*/ + ulint i, /* in: the 'segment' of the i/o thread */ + const char* str) /* in: constant char string describing the + state */ +{ + ut_a(i < SRV_MAX_N_IO_THREADS); + + srv_io_thread_op_info[i] = str; +} + +/************************************************************************* +Accessor function to get pointer to n'th slot in the server thread +table. */ +static +srv_slot_t* +srv_table_get_nth_slot( +/*===================*/ + /* out: pointer to the slot */ + ulint index) /* in: index of the slot */ +{ + ut_a(index < OS_THREAD_MAX_N); + + return(srv_sys->threads + index); +} + +/************************************************************************* +Gets the number of threads in the system. */ + +ulint +srv_get_n_threads(void) +/*===================*/ +{ + ulint i; + ulint n_threads = 0; + + mutex_enter(&kernel_mutex); + + for (i = SRV_COM; i < SRV_MASTER + 1; i++) { + + n_threads += srv_n_threads[i]; + } + + mutex_exit(&kernel_mutex); + + return(n_threads); +} + +/************************************************************************* +Reserves a slot in the thread table for the current thread. Also creates the +thread local storage struct for the current thread. NOTE! The server mutex +has to be reserved by the caller! */ +static +ulint +srv_table_reserve_slot( +/*===================*/ + /* out: reserved slot index */ + ulint type) /* in: type of the thread: one of SRV_COM, ... */ +{ + srv_slot_t* slot; + ulint i; + + ut_a(type > 0); + ut_a(type <= SRV_MASTER); + + i = 0; + slot = srv_table_get_nth_slot(i); + + while (slot->in_use) { + i++; + slot = srv_table_get_nth_slot(i); + } + + ut_a(slot->in_use == FALSE); + + slot->in_use = TRUE; + slot->suspended = FALSE; + slot->id = os_thread_get_curr_id(); + slot->handle = os_thread_get_curr(); + slot->type = type; + + thr_local_create(); + + thr_local_set_slot_no(os_thread_get_curr_id(), i); + + return(i); +} + +/************************************************************************* +Suspends the calling thread to wait for the event in its thread slot. +NOTE! The server mutex has to be reserved by the caller! */ +static +os_event_t +srv_suspend_thread(void) +/*====================*/ + /* out: event for the calling thread to wait */ +{ + srv_slot_t* slot; + os_event_t event; + ulint slot_no; + ulint type; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&kernel_mutex)); +#endif /* UNIV_SYNC_DEBUG */ + + slot_no = thr_local_get_slot_no(os_thread_get_curr_id()); + + if (srv_print_thread_releases) { + fprintf(stderr, + "Suspending thread %lu to slot %lu meter %lu\n", + (ulong) os_thread_get_curr_id(), (ulong) slot_no, + (ulong) srv_meter[SRV_RECOVERY]); + } + + slot = srv_table_get_nth_slot(slot_no); + + type = slot->type; + + ut_ad(type >= SRV_WORKER); + ut_ad(type <= SRV_MASTER); + + event = slot->event; + + slot->suspended = TRUE; + + ut_ad(srv_n_threads_active[type] > 0); + + srv_n_threads_active[type]--; + + os_event_reset(event); + + return(event); +} + +/************************************************************************* +Releases threads of the type given from suspension in the thread table. +NOTE! The server mutex has to be reserved by the caller! */ + +ulint +srv_release_threads( +/*================*/ + /* out: number of threads released: this may be + < n if not enough threads were suspended at the + moment */ + ulint type, /* in: thread type */ + ulint n) /* in: number of threads to release */ +{ + srv_slot_t* slot; + ulint i; + ulint count = 0; + + ut_ad(type >= SRV_WORKER); + ut_ad(type <= SRV_MASTER); + ut_ad(n > 0); +#ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&kernel_mutex)); +#endif /* UNIV_SYNC_DEBUG */ + + for (i = 0; i < OS_THREAD_MAX_N; i++) { + + slot = srv_table_get_nth_slot(i); + + if (slot->in_use && slot->type == type && slot->suspended) { + + slot->suspended = FALSE; + + srv_n_threads_active[type]++; + + os_event_set(slot->event); + + if (srv_print_thread_releases) { + fprintf(stderr, + "Releasing thread %lu type %lu from slot %lu meter %lu\n", + (ulong) slot->id, (ulong) type, (ulong) i, + (ulong) srv_meter[SRV_RECOVERY]); + } + + count++; + + if (count == n) { + break; + } + } + } + + return(count); +} + +/************************************************************************* +Returns the calling thread type. */ + +ulint +srv_get_thread_type(void) +/*=====================*/ + /* out: SRV_COM, ... */ +{ + ulint slot_no; + srv_slot_t* slot; + ulint type; + + mutex_enter(&kernel_mutex); + + slot_no = thr_local_get_slot_no(os_thread_get_curr_id()); + + slot = srv_table_get_nth_slot(slot_no); + + type = slot->type; + + ut_ad(type >= SRV_WORKER); + ut_ad(type <= SRV_MASTER); + + mutex_exit(&kernel_mutex); + + return(type); +} + +/************************************************************************* +Initializes the server. */ + +void +srv_init(void) +/*==========*/ +{ + srv_conc_slot_t* conc_slot; + srv_slot_t* slot; + dict_table_t* table; + ulint i; + + srv_sys = mem_alloc(sizeof(srv_sys_t)); + + kernel_mutex_temp = mem_alloc(sizeof(mutex_t)); + mutex_create(&kernel_mutex); + mutex_set_level(&kernel_mutex, SYNC_KERNEL); + + mutex_create(&srv_innodb_monitor_mutex); + mutex_set_level(&srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK); + + srv_sys->threads = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)); + + for (i = 0; i < OS_THREAD_MAX_N; i++) { + slot = srv_table_get_nth_slot(i); + slot->in_use = FALSE; + slot->type=0; /* Avoid purify errors */ + slot->event = os_event_create(NULL); + ut_a(slot->event); + } + + srv_mysql_table = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)); + + for (i = 0; i < OS_THREAD_MAX_N; i++) { + slot = srv_mysql_table + i; + slot->in_use = FALSE; + slot->type = 0; + slot->event = os_event_create(NULL); + ut_a(slot->event); + } + + srv_lock_timeout_thread_event = os_event_create(NULL); + + for (i = 0; i < SRV_MASTER + 1; i++) { + srv_n_threads_active[i] = 0; + srv_n_threads[i] = 0; + srv_meter[i] = 30; + srv_meter_low_water[i] = 50; + srv_meter_high_water[i] = 100; + srv_meter_high_water2[i] = 200; + srv_meter_foreground[i] = 250; + } + + srv_sys->operational = os_event_create(NULL); + + ut_a(srv_sys->operational); + + UT_LIST_INIT(srv_sys->tasks); + + /* create dummy table and index for old-style infimum and supremum */ + table = dict_mem_table_create("SYS_DUMMY1", + DICT_HDR_SPACE, 1, FALSE); + dict_mem_table_add_col(table, "DUMMY", DATA_CHAR, + DATA_ENGLISH | DATA_NOT_NULL, 8, 0); + + srv_sys->dummy_ind1 = dict_mem_index_create("SYS_DUMMY1", + "SYS_DUMMY1", DICT_HDR_SPACE, 0, 1); + dict_index_add_col(srv_sys->dummy_ind1, + dict_table_get_nth_col(table, 0), 0, 0); + srv_sys->dummy_ind1->table = table; + /* create dummy table and index for new-style infimum and supremum */ + table = dict_mem_table_create("SYS_DUMMY2", + DICT_HDR_SPACE, 1, TRUE); + dict_mem_table_add_col(table, "DUMMY", DATA_CHAR, + DATA_ENGLISH | DATA_NOT_NULL, 8, 0); + srv_sys->dummy_ind2 = dict_mem_index_create("SYS_DUMMY2", + "SYS_DUMMY2", DICT_HDR_SPACE, 0, 1); + dict_index_add_col(srv_sys->dummy_ind2, + dict_table_get_nth_col(table, 0), 0, 0); + srv_sys->dummy_ind2->table = table; + + /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ + srv_sys->dummy_ind1->cached = srv_sys->dummy_ind2->cached = TRUE; + + /* Init the server concurrency restriction data structures */ + + os_fast_mutex_init(&srv_conc_mutex); + + UT_LIST_INIT(srv_conc_queue); + + srv_conc_slots = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t)); + + for (i = 0; i < OS_THREAD_MAX_N; i++) { + conc_slot = srv_conc_slots + i; + conc_slot->reserved = FALSE; + conc_slot->event = os_event_create(NULL); + ut_a(conc_slot->event); + } +} + +/************************************************************************* +Frees the OS fast mutex created in srv_init(). */ + +void +srv_free(void) +/*==========*/ +{ + os_fast_mutex_free(&srv_conc_mutex); +} + +/************************************************************************* +Initializes the synchronization primitives, memory system, and the thread +local storage. */ + +void +srv_general_init(void) +/*==================*/ +{ + os_sync_init(); + sync_init(); + mem_init(srv_mem_pool_size); + thr_local_init(); +} + +/*======================= InnoDB Server FIFO queue =======================*/ + +/* Maximum allowable purge history length. <=0 means 'infinite'. */ +ulong srv_max_purge_lag = 0; + +/************************************************************************* +Puts an OS thread to wait if there are too many concurrent threads +(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */ + +void +srv_conc_enter_innodb( +/*==================*/ + trx_t* trx) /* in: transaction object associated with the + thread */ +{ + ibool has_slept = FALSE; + srv_conc_slot_t* slot = NULL; + ulint i; + + if (srv_thread_concurrency >= 500) { + /* Disable the concurrency check */ + + return; + } + + /* If trx has 'free tickets' to enter the engine left, then use one + such ticket */ + + if (trx->n_tickets_to_enter_innodb > 0) { + trx->n_tickets_to_enter_innodb--; + + return; + } + + os_fast_mutex_lock(&srv_conc_mutex); +retry: + if (trx->declared_to_be_inside_innodb) { + ut_print_timestamp(stderr); + fputs( +" InnoDB: Error: trying to declare trx to enter InnoDB, but\n" +"InnoDB: it already is declared.\n", stderr); + trx_print(stderr, trx); + putc('\n', stderr); + os_fast_mutex_unlock(&srv_conc_mutex); + + return; + } + + if (srv_conc_n_threads < (lint)srv_thread_concurrency) { + + srv_conc_n_threads++; + trx->declared_to_be_inside_innodb = TRUE; + trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER; + + os_fast_mutex_unlock(&srv_conc_mutex); + + return; + } + + /* If the transaction is not holding resources, + let it sleep for SRV_THREAD_SLEEP_DELAY microseconds, and try again then */ + + if (!has_slept && !trx->has_search_latch + && NULL == UT_LIST_GET_FIRST(trx->trx_locks)) { + + has_slept = TRUE; /* We let is sleep only once to avoid + starvation */ + + srv_conc_n_waiting_threads++; + + os_fast_mutex_unlock(&srv_conc_mutex); + + trx->op_info = "sleeping before joining InnoDB queue"; + + /* Peter Zaitsev suggested that we take the sleep away + altogether. But the sleep may be good in pathological + situations of lots of thread switches. Simply put some + threads aside for a while to reduce the number of thread + switches. */ + if (SRV_THREAD_SLEEP_DELAY > 0) + { + os_thread_sleep(SRV_THREAD_SLEEP_DELAY); + } + + trx->op_info = ""; + + os_fast_mutex_lock(&srv_conc_mutex); + + srv_conc_n_waiting_threads--; + + goto retry; + } + + /* Too many threads inside: put the current thread to a queue */ + + for (i = 0; i < OS_THREAD_MAX_N; i++) { + slot = srv_conc_slots + i; + + if (!slot->reserved) { + + break; + } + } + + if (i == OS_THREAD_MAX_N) { + /* Could not find a free wait slot, we must let the + thread enter */ + + srv_conc_n_threads++; + trx->declared_to_be_inside_innodb = TRUE; + trx->n_tickets_to_enter_innodb = 0; + + os_fast_mutex_unlock(&srv_conc_mutex); + + return; + } + + /* Release possible search system latch this thread has */ + if (trx->has_search_latch) { + trx_search_latch_release_if_reserved(trx); + } + + /* Add to the queue */ + slot->reserved = TRUE; + slot->wait_ended = FALSE; + + UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot); + + os_event_reset(slot->event); + + srv_conc_n_waiting_threads++; + + os_fast_mutex_unlock(&srv_conc_mutex); + + /* Go to wait for the event; when a thread leaves InnoDB it will + release this thread */ + + trx->op_info = "waiting in InnoDB queue"; + + os_event_wait(slot->event); + + trx->op_info = ""; + + os_fast_mutex_lock(&srv_conc_mutex); + + srv_conc_n_waiting_threads--; + + /* NOTE that the thread which released this thread already + incremented the thread counter on behalf of this thread */ + + slot->reserved = FALSE; + + UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot); + + trx->declared_to_be_inside_innodb = TRUE; + trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER; + + os_fast_mutex_unlock(&srv_conc_mutex); +} + +/************************************************************************* +This lets a thread enter InnoDB regardless of the number of threads inside +InnoDB. This must be called when a thread ends a lock wait. */ + +void +srv_conc_force_enter_innodb( +/*========================*/ + trx_t* trx) /* in: transaction object associated with the + thread */ +{ + if (srv_thread_concurrency >= 500) { + + return; + } + + os_fast_mutex_lock(&srv_conc_mutex); + + srv_conc_n_threads++; + trx->declared_to_be_inside_innodb = TRUE; + trx->n_tickets_to_enter_innodb = 0; + + os_fast_mutex_unlock(&srv_conc_mutex); +} + +/************************************************************************* +This must be called when a thread exits InnoDB in a lock wait or at the +end of an SQL statement. */ + +void +srv_conc_force_exit_innodb( +/*=======================*/ + trx_t* trx) /* in: transaction object associated with the + thread */ +{ + srv_conc_slot_t* slot = NULL; + + if (srv_thread_concurrency >= 500) { + + return; + } + + if (trx->declared_to_be_inside_innodb == FALSE) { + + return; + } + + os_fast_mutex_lock(&srv_conc_mutex); + + srv_conc_n_threads--; + trx->declared_to_be_inside_innodb = FALSE; + trx->n_tickets_to_enter_innodb = 0; + + if (srv_conc_n_threads < (lint)srv_thread_concurrency) { + /* Look for a slot where a thread is waiting and no other + thread has yet released the thread */ + + slot = UT_LIST_GET_FIRST(srv_conc_queue); + + while (slot && slot->wait_ended == TRUE) { + slot = UT_LIST_GET_NEXT(srv_conc_queue, slot); + } + + if (slot != NULL) { + slot->wait_ended = TRUE; + + /* We increment the count on behalf of the released + thread */ + + srv_conc_n_threads++; + } + } + + os_fast_mutex_unlock(&srv_conc_mutex); + + if (slot != NULL) { + os_event_set(slot->event); + } +} + +/************************************************************************* +This must be called when a thread exits InnoDB. */ + +void +srv_conc_exit_innodb( +/*=================*/ + trx_t* trx) /* in: transaction object associated with the + thread */ +{ + if (srv_thread_concurrency >= 500) { + + return; + } + + if (trx->n_tickets_to_enter_innodb > 0) { + /* We will pretend the thread is still inside InnoDB though it + now leaves the InnoDB engine. In this way we save + a lot of semaphore operations. srv_conc_force_exit_innodb is + used to declare the thread definitely outside InnoDB. It + should be called when there is a lock wait or an SQL statement + ends. */ + + return; + } + + srv_conc_force_exit_innodb(trx); +} + +/*========================================================================*/ + +/************************************************************************* +Normalizes init parameter values to use units we use inside InnoDB. */ +static +ulint +srv_normalize_init_values(void) +/*===========================*/ + /* out: DB_SUCCESS or error code */ +{ + ulint n; + ulint i; + + n = srv_n_data_files; + + for (i = 0; i < n; i++) { + srv_data_file_sizes[i] = srv_data_file_sizes[i] + * ((1024 * 1024) / UNIV_PAGE_SIZE); + } + + srv_last_file_size_max = srv_last_file_size_max + * ((1024 * 1024) / UNIV_PAGE_SIZE); + + srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE; + + srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE; + + srv_pool_size = srv_pool_size / (UNIV_PAGE_SIZE / 1024); + + srv_awe_window_size = srv_awe_window_size / UNIV_PAGE_SIZE; + + if (srv_use_awe) { + /* If we are using AWE we must save memory in the 32-bit + address space of the process, and cannot bind the lock + table size to the real buffer pool size. */ + + srv_lock_table_size = 20 * srv_awe_window_size; + } else { + srv_lock_table_size = 5 * srv_pool_size; + } + + return(DB_SUCCESS); +} + +/************************************************************************* +Boots the InnoDB server. */ + +ulint +srv_boot(void) +/*==========*/ + /* out: DB_SUCCESS or error code */ +{ + ulint err; + + /* Transform the init parameter values given by MySQL to + use units we use inside InnoDB: */ + + err = srv_normalize_init_values(); + + if (err != DB_SUCCESS) { + return(err); + } + + /* Initialize synchronization primitives, memory management, and thread + local storage */ + + srv_general_init(); + + /* Initialize this module */ + + srv_init(); + + return(DB_SUCCESS); +} + +#ifndef UNIV_HOTBACKUP +/************************************************************************* +Reserves a slot in the thread table for the current MySQL OS thread. +NOTE! The kernel mutex has to be reserved by the caller! */ +static +srv_slot_t* +srv_table_reserve_slot_for_mysql(void) +/*==================================*/ + /* out: reserved slot */ +{ + srv_slot_t* slot; + ulint i; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&kernel_mutex)); +#endif /* UNIV_SYNC_DEBUG */ + + i = 0; + slot = srv_mysql_table + i; + + while (slot->in_use) { + i++; + + if (i >= OS_THREAD_MAX_N) { + + ut_print_timestamp(stderr); + + fprintf(stderr, +" InnoDB: There appear to be %lu MySQL threads currently waiting\n" +"InnoDB: inside InnoDB, which is the upper limit. Cannot continue operation.\n" +"InnoDB: We intentionally generate a seg fault to print a stack trace\n" +"InnoDB: on Linux. But first we print a list of waiting threads.\n", (ulong) i); + + for (i = 0; i < OS_THREAD_MAX_N; i++) { + + slot = srv_mysql_table + i; + + fprintf(stderr, +"Slot %lu: thread id %lu, type %lu, in use %lu, susp %lu, time %lu\n", + (ulong) i, (ulong) os_thread_pf(slot->id), + (ulong) slot->type, (ulong) slot->in_use, + (ulong) slot->suspended, + (ulong) difftime(ut_time(), slot->suspend_time)); + } + + ut_error; + } + + slot = srv_mysql_table + i; + } + + ut_a(slot->in_use == FALSE); + + slot->in_use = TRUE; + slot->id = os_thread_get_curr_id(); + slot->handle = os_thread_get_curr(); + + return(slot); +} +#endif /* !UNIV_HOTBACKUP */ + +/******************************************************************* +Puts a MySQL OS thread to wait for a lock to be released. If an error +occurs during the wait trx->error_state associated with thr is +!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK +are possible errors. DB_DEADLOCK is returned if selective deadlock +resolution chose this transaction as a victim. */ + +void +srv_suspend_mysql_thread( +/*=====================*/ + que_thr_t* thr) /* in: query thread associated with the MySQL + OS thread */ +{ +#ifndef UNIV_HOTBACKUP + srv_slot_t* slot; + os_event_t event; + double wait_time; + trx_t* trx; + ibool had_dict_lock = FALSE; + ibool was_declared_inside_innodb = FALSE; + ib_longlong start_time = 0; + ib_longlong finish_time; + ulint diff_time; + ulint sec; + ulint ms; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(!mutex_own(&kernel_mutex)); +#endif /* UNIV_SYNC_DEBUG */ + + trx = thr_get_trx(thr); + + os_event_set(srv_lock_timeout_thread_event); + + mutex_enter(&kernel_mutex); + + trx->error_state = DB_SUCCESS; + + if (thr->state == QUE_THR_RUNNING) { + + ut_ad(thr->is_active == TRUE); + + /* The lock has already been released or this transaction + was chosen as a deadlock victim: no need to suspend */ + + if (trx->was_chosen_as_deadlock_victim) { + + trx->error_state = DB_DEADLOCK; + trx->was_chosen_as_deadlock_victim = FALSE; + } + + mutex_exit(&kernel_mutex); + + return; + } + + ut_ad(thr->is_active == FALSE); + + slot = srv_table_reserve_slot_for_mysql(); + + event = slot->event; + + slot->thr = thr; + + os_event_reset(event); + + slot->suspend_time = ut_time(); + + if (thr->lock_state == QUE_THR_LOCK_ROW) { + srv_n_lock_wait_count++; + srv_n_lock_wait_current_count++; + + ut_usectime(&sec, &ms); + start_time = (ib_longlong)sec * 1000000 + ms; + } + /* Wake the lock timeout monitor thread, if it is suspended */ + + os_event_set(srv_lock_timeout_thread_event); + + mutex_exit(&kernel_mutex); + + if (trx->declared_to_be_inside_innodb) { + + was_declared_inside_innodb = TRUE; + + /* We must declare this OS thread to exit InnoDB, since a + possible other thread holding a lock which this thread waits + for must be allowed to enter, sooner or later */ + + srv_conc_force_exit_innodb(trx); + } + + /* Release possible foreign key check latch */ + if (trx->dict_operation_lock_mode == RW_S_LATCH) { + + had_dict_lock = TRUE; + + row_mysql_unfreeze_data_dictionary(trx); + } + + ut_a(trx->dict_operation_lock_mode == 0); + + /* Wait for the release */ + + os_event_wait(event); + + if (had_dict_lock) { + + row_mysql_freeze_data_dictionary(trx); + } + + if (was_declared_inside_innodb) { + + /* Return back inside InnoDB */ + + srv_conc_force_enter_innodb(trx); + } + + mutex_enter(&kernel_mutex); + + /* Release the slot for others to use */ + + slot->in_use = FALSE; + + wait_time = ut_difftime(ut_time(), slot->suspend_time); + + if (thr->lock_state == QUE_THR_LOCK_ROW) { + ut_usectime(&sec, &ms); + finish_time = (ib_longlong)sec * 1000000 + ms; + + diff_time = (ulint) (finish_time - start_time); + + srv_n_lock_wait_current_count--; + srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time; + if (diff_time > srv_n_lock_max_wait_time) { + srv_n_lock_max_wait_time = diff_time; + } + } + + if (trx->was_chosen_as_deadlock_victim) { + + trx->error_state = DB_DEADLOCK; + trx->was_chosen_as_deadlock_victim = FALSE; + } + + mutex_exit(&kernel_mutex); + + if (srv_lock_wait_timeout < 100000000 && + wait_time > (double)srv_lock_wait_timeout) { + + trx->error_state = DB_LOCK_WAIT_TIMEOUT; + } +#else /* UNIV_HOTBACKUP */ + /* This function depends on MySQL code that is not included in + InnoDB Hot Backup builds. Besides, this function should never + be called in InnoDB Hot Backup. */ + ut_error; +#endif /* UNIV_HOTBACKUP */ +} + +/************************************************************************ +Releases a MySQL OS thread waiting for a lock to be released, if the +thread is already suspended. */ + +void +srv_release_mysql_thread_if_suspended( +/*==================================*/ + que_thr_t* thr) /* in: query thread associated with the + MySQL OS thread */ +{ +#ifndef UNIV_HOTBACKUP + srv_slot_t* slot; + ulint i; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(mutex_own(&kernel_mutex)); +#endif /* UNIV_SYNC_DEBUG */ + + for (i = 0; i < OS_THREAD_MAX_N; i++) { + + slot = srv_mysql_table + i; + + if (slot->in_use && slot->thr == thr) { + /* Found */ + + os_event_set(slot->event); + + return; + } + } + + /* not found */ +#else /* UNIV_HOTBACKUP */ + /* This function depends on MySQL code that is not included in + InnoDB Hot Backup builds. Besides, this function should never + be called in InnoDB Hot Backup. */ + ut_error; +#endif /* UNIV_HOTBACKUP */ +} + +#ifndef UNIV_HOTBACKUP +/********************************************************************** +Refreshes the values used to calculate per-second averages. */ +static +void +srv_refresh_innodb_monitor_stats(void) +/*==================================*/ +{ + mutex_enter(&srv_innodb_monitor_mutex); + + srv_last_monitor_time = time(NULL); + + os_aio_refresh_stats(); + + btr_cur_n_sea_old = btr_cur_n_sea; + btr_cur_n_non_sea_old = btr_cur_n_non_sea; + + log_refresh_stats(); + + buf_refresh_io_stats(); + + srv_n_rows_inserted_old = srv_n_rows_inserted; + srv_n_rows_updated_old = srv_n_rows_updated; + srv_n_rows_deleted_old = srv_n_rows_deleted; + srv_n_rows_read_old = srv_n_rows_read; + + mutex_exit(&srv_innodb_monitor_mutex); +} + +/********************************************************************** +Outputs to a file the output of the InnoDB Monitor. */ + +void +srv_printf_innodb_monitor( +/*======================*/ + FILE* file) /* in: output stream */ +{ + double time_elapsed; + time_t current_time; + ulint n_reserved; + + mutex_enter(&srv_innodb_monitor_mutex); + + current_time = time(NULL); + + /* We add 0.001 seconds to time_elapsed to prevent division + by zero if two users happen to call SHOW INNODB STATUS at the same + time */ + + time_elapsed = difftime(current_time, srv_last_monitor_time) + + 0.001; + + srv_last_monitor_time = time(NULL); + + fputs("\n=====================================\n", file); + + ut_print_timestamp(file); + fprintf(file, + " INNODB MONITOR OUTPUT\n" + "=====================================\n" + "Per second averages calculated from the last %lu seconds\n", + (ulong)time_elapsed); + + fputs("----------\n" + "SEMAPHORES\n" + "----------\n", file); + sync_print(file); + + /* Conceptually, srv_innodb_monitor_mutex has a very high latching + order level in sync0sync.h, while dict_foreign_err_mutex has a very + low level 135. Therefore we can reserve the latter mutex here without + a danger of a deadlock of threads. */ + + mutex_enter(&dict_foreign_err_mutex); + + if (ftell(dict_foreign_err_file) != 0L) { + fputs("------------------------\n" + "LATEST FOREIGN KEY ERROR\n" + "------------------------\n", file); + ut_copy_file(file, dict_foreign_err_file); + } + + mutex_exit(&dict_foreign_err_mutex); + + lock_print_info(file); + fputs("--------\n" + "FILE I/O\n" + "--------\n", file); + os_aio_print(file); + + fputs("-------------------------------------\n" + "INSERT BUFFER AND ADAPTIVE HASH INDEX\n" + "-------------------------------------\n", file); + ibuf_print(file); + + ha_print_info(file, btr_search_sys->hash_index); + + fprintf(file, + "%.2f hash searches/s, %.2f non-hash searches/s\n", + (btr_cur_n_sea - btr_cur_n_sea_old) + / time_elapsed, + (btr_cur_n_non_sea - btr_cur_n_non_sea_old) + / time_elapsed); + btr_cur_n_sea_old = btr_cur_n_sea; + btr_cur_n_non_sea_old = btr_cur_n_non_sea; + + fputs("---\n" + "LOG\n" + "---\n", file); + log_print(file); + + fputs("----------------------\n" + "BUFFER POOL AND MEMORY\n" + "----------------------\n", file); + fprintf(file, + "Total memory allocated " ULINTPF + "; in additional pool allocated " ULINTPF "\n", + ut_total_allocated_memory, + mem_pool_get_reserved(mem_comm_pool)); + + if (srv_use_awe) { + fprintf(file, + "In addition to that %lu MB of AWE memory allocated\n", + (ulong) (srv_pool_size / ((1024 * 1024) / UNIV_PAGE_SIZE))); + } + + buf_print_io(file); + + fputs("--------------\n" + "ROW OPERATIONS\n" + "--------------\n", file); + fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n", + (long) srv_conc_n_threads, + (ulong) srv_conc_n_waiting_threads); + n_reserved = fil_space_get_n_reserved_extents(0); + if (n_reserved > 0) { + fprintf(file, + "%lu tablespace extents now reserved for B-tree split operations\n", + (ulong) n_reserved); + } + +#ifdef UNIV_LINUX + fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n", + (ulong) srv_main_thread_process_no, + (ulong) srv_main_thread_id, + srv_main_thread_op_info); +#else + fprintf(file, "Main thread id %lu, state: %s\n", + (ulong) srv_main_thread_id, + srv_main_thread_op_info); +#endif + fprintf(file, + "Number of rows inserted " ULINTPF + ", updated " ULINTPF ", deleted " ULINTPF ", read " ULINTPF "\n", + srv_n_rows_inserted, + srv_n_rows_updated, + srv_n_rows_deleted, + srv_n_rows_read); + fprintf(file, + "%.2f inserts/s, %.2f updates/s, %.2f deletes/s, %.2f reads/s\n", + (srv_n_rows_inserted - srv_n_rows_inserted_old) + / time_elapsed, + (srv_n_rows_updated - srv_n_rows_updated_old) + / time_elapsed, + (srv_n_rows_deleted - srv_n_rows_deleted_old) + / time_elapsed, + (srv_n_rows_read - srv_n_rows_read_old) + / time_elapsed); + + srv_n_rows_inserted_old = srv_n_rows_inserted; + srv_n_rows_updated_old = srv_n_rows_updated; + srv_n_rows_deleted_old = srv_n_rows_deleted; + srv_n_rows_read_old = srv_n_rows_read; + + fputs("----------------------------\n" + "END OF INNODB MONITOR OUTPUT\n" + "============================\n", file); + mutex_exit(&srv_innodb_monitor_mutex); + fflush(file); +} + +/********************************************************************** +Function to pass InnoDB status variables to MySQL */ + +void +srv_export_innodb_status(void) +{ + + mutex_enter(&srv_innodb_monitor_mutex); + export_vars.innodb_data_pending_reads= os_n_pending_reads; + export_vars.innodb_data_pending_writes= os_n_pending_writes; + export_vars.innodb_data_pending_fsyncs= + fil_n_pending_log_flushes + fil_n_pending_tablespace_flushes; + export_vars.innodb_data_fsyncs= os_n_fsyncs; + export_vars.innodb_data_read= srv_data_read; + export_vars.innodb_data_reads= os_n_file_reads; + export_vars.innodb_data_writes= os_n_file_writes; + export_vars.innodb_data_written= srv_data_written; + export_vars.innodb_buffer_pool_read_requests= buf_pool->n_page_gets; + export_vars.innodb_buffer_pool_write_requests= srv_buf_pool_write_requests; + export_vars.innodb_buffer_pool_wait_free= srv_buf_pool_wait_free; + export_vars.innodb_buffer_pool_pages_flushed= srv_buf_pool_flushed; + export_vars.innodb_buffer_pool_reads= srv_buf_pool_reads; + export_vars.innodb_buffer_pool_read_ahead_rnd= srv_read_ahead_rnd; + export_vars.innodb_buffer_pool_read_ahead_seq= srv_read_ahead_seq; + export_vars.innodb_buffer_pool_pages_data= UT_LIST_GET_LEN(buf_pool->LRU); + export_vars.innodb_buffer_pool_pages_dirty= UT_LIST_GET_LEN(buf_pool->flush_list); + export_vars.innodb_buffer_pool_pages_free= UT_LIST_GET_LEN(buf_pool->free); + export_vars.innodb_buffer_pool_pages_latched= buf_get_latched_pages_number(); + export_vars.innodb_buffer_pool_pages_total= buf_pool->curr_size; + export_vars.innodb_buffer_pool_pages_misc= buf_pool->max_size - + UT_LIST_GET_LEN(buf_pool->LRU) - UT_LIST_GET_LEN(buf_pool->free); + export_vars.innodb_page_size= UNIV_PAGE_SIZE; + export_vars.innodb_log_waits= srv_log_waits; + export_vars.innodb_os_log_written= srv_os_log_written; + export_vars.innodb_os_log_fsyncs= fil_n_log_flushes; + export_vars.innodb_os_log_pending_fsyncs= fil_n_pending_log_flushes; + export_vars.innodb_os_log_pending_writes= srv_os_log_pending_writes; + export_vars.innodb_log_write_requests= srv_log_write_requests; + export_vars.innodb_log_writes= srv_log_writes; + export_vars.innodb_dblwr_pages_written= srv_dblwr_pages_written; + export_vars.innodb_dblwr_writes= srv_dblwr_writes; + export_vars.innodb_pages_created= buf_pool->n_pages_created; + export_vars.innodb_pages_read= buf_pool->n_pages_read; + export_vars.innodb_pages_written= buf_pool->n_pages_written; + export_vars.innodb_row_lock_waits= srv_n_lock_wait_count; + export_vars.innodb_row_lock_current_waits= srv_n_lock_wait_current_count; + export_vars.innodb_row_lock_time= srv_n_lock_wait_time / 10000; + if (srv_n_lock_wait_count > 0) { + export_vars.innodb_row_lock_time_avg = (ulint) + (srv_n_lock_wait_time / 10000 / srv_n_lock_wait_count); + } else { + export_vars.innodb_row_lock_time_avg = 0; + } + export_vars.innodb_row_lock_time_max= srv_n_lock_max_wait_time / 10000; + export_vars.innodb_rows_read= srv_n_rows_read; + export_vars.innodb_rows_inserted= srv_n_rows_inserted; + export_vars.innodb_rows_updated= srv_n_rows_updated; + export_vars.innodb_rows_deleted= srv_n_rows_deleted; + mutex_exit(&srv_innodb_monitor_mutex); + +} + +/************************************************************************* +A thread which wakes up threads whose lock wait may have lasted too long. +This also prints the info output by various InnoDB monitors. */ + +#ifndef __WIN__ +void* +#else +ulint +#endif +srv_lock_timeout_and_monitor_thread( +/*================================*/ + /* out: a dummy parameter */ + void* arg __attribute__((unused))) + /* in: a dummy parameter required by + os_thread_create */ +{ + srv_slot_t* slot; + double time_elapsed; + time_t current_time; + time_t last_table_monitor_time; + time_t last_monitor_time; + ibool some_waits; + double wait_time; + ulint i; + +#ifdef UNIV_DEBUG_THREAD_CREATION + fprintf(stderr, "Lock timeout thread starts, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +#endif + UT_NOT_USED(arg); + srv_last_monitor_time = time(NULL); + last_table_monitor_time = time(NULL); + last_monitor_time = time(NULL); +loop: + srv_lock_timeout_and_monitor_active = TRUE; + + /* When someone is waiting for a lock, we wake up every second + and check if a timeout has passed for a lock wait */ + + os_thread_sleep(1000000); + + /* In case mutex_exit is not a memory barrier, it is + theoretically possible some threads are left waiting though + the semaphore is already released. Wake up those threads: */ + + sync_arr_wake_threads_if_sema_free(); + + current_time = time(NULL); + + time_elapsed = difftime(current_time, last_monitor_time); + + if (time_elapsed > 15) { + last_monitor_time = time(NULL); + + if (srv_print_innodb_monitor) { + srv_printf_innodb_monitor(stderr); + } + + if (srv_innodb_status) { + mutex_enter(&srv_monitor_file_mutex); + rewind(srv_monitor_file); + srv_printf_innodb_monitor(srv_monitor_file); + os_file_set_eof(srv_monitor_file); + mutex_exit(&srv_monitor_file_mutex); + } + + if (srv_print_innodb_tablespace_monitor + && difftime(current_time, last_table_monitor_time) > 60) { + + last_table_monitor_time = time(NULL); + + fputs("================================================\n", + stderr); + + ut_print_timestamp(stderr); + + fputs(" INNODB TABLESPACE MONITOR OUTPUT\n" + "================================================\n", + stderr); + + fsp_print(0); + fputs("Validating tablespace\n", stderr); + fsp_validate(0); + fputs("Validation ok\n" + "---------------------------------------\n" + "END OF INNODB TABLESPACE MONITOR OUTPUT\n" + "=======================================\n", + stderr); + } + + if (srv_print_innodb_table_monitor + && difftime(current_time, last_table_monitor_time) > 60) { + + last_table_monitor_time = time(NULL); + + fputs("===========================================\n", stderr); + + ut_print_timestamp(stderr); + + fputs(" INNODB TABLE MONITOR OUTPUT\n" + "===========================================\n", + stderr); + dict_print(); + + fputs("-----------------------------------\n" + "END OF INNODB TABLE MONITOR OUTPUT\n" + "==================================\n", + stderr); + } + } + + mutex_enter(&kernel_mutex); + + some_waits = FALSE; + + /* Check of all slots if a thread is waiting there, and if it + has exceeded the time limit */ + + for (i = 0; i < OS_THREAD_MAX_N; i++) { + + slot = srv_mysql_table + i; + + if (slot->in_use) { + some_waits = TRUE; + + wait_time = ut_difftime(ut_time(), slot->suspend_time); + + if (srv_lock_wait_timeout < 100000000 && + (wait_time > (double) srv_lock_wait_timeout + || wait_time < 0)) { + + /* Timeout exceeded or a wrap-around in system + time counter: cancel the lock request queued + by the transaction and release possible + other transactions waiting behind; it is + possible that the lock has already been + granted: in that case do nothing */ + + if (thr_get_trx(slot->thr)->wait_lock) { + lock_cancel_waiting_and_release( + thr_get_trx(slot->thr)->wait_lock); + } + } + } + } + + os_event_reset(srv_lock_timeout_thread_event); + + mutex_exit(&kernel_mutex); + + if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) { + goto exit_func; + } + + if (some_waits || srv_print_innodb_monitor + || srv_print_innodb_lock_monitor + || srv_print_innodb_tablespace_monitor + || srv_print_innodb_table_monitor) { + goto loop; + } + + /* No one was waiting for a lock and no monitor was active: + suspend this thread */ + + srv_lock_timeout_and_monitor_active = FALSE; + +#if 0 + /* The following synchronisation is disabled, since + the InnoDB monitor output is to be updated every 15 seconds. */ + os_event_wait(srv_lock_timeout_thread_event); +#endif + goto loop; + +exit_func: + srv_lock_timeout_and_monitor_active = FALSE; + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + + os_thread_exit(NULL); +#ifndef __WIN__ + return(NULL); +#else + return(0); +#endif +} + +/************************************************************************* +A thread which prints warnings about semaphore waits which have lasted +too long. These can be used to track bugs which cause hangs. */ + +#ifndef __WIN__ +void* +#else +ulint +#endif +srv_error_monitor_thread( +/*=====================*/ + /* out: a dummy parameter */ + void* arg __attribute__((unused))) + /* in: a dummy parameter required by + os_thread_create */ +{ + /* number of successive fatal timeouts observed */ + ulint fatal_cnt = 0; + dulint old_lsn; + dulint new_lsn; + + old_lsn = srv_start_lsn; + +#ifdef UNIV_DEBUG_THREAD_CREATION + fprintf(stderr, "Error monitor thread starts, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +#endif +loop: + srv_error_monitor_active = TRUE; + + /* Try to track a strange bug reported by Harald Fuchs and others, + where the lsn seems to decrease at times */ + + new_lsn = log_get_lsn(); + + if (ut_dulint_cmp(new_lsn, old_lsn) < 0) { + ut_print_timestamp(stderr); + fprintf(stderr, +" InnoDB: Error: old log sequence number %lu %lu was greater\n" +"InnoDB: than the new log sequence number %lu %lu!\n" +"InnoDB: Please send a bug report to mysql@lists.mysql.com\n", + (ulong) ut_dulint_get_high(old_lsn), + (ulong) ut_dulint_get_low(old_lsn), + (ulong) ut_dulint_get_high(new_lsn), + (ulong) ut_dulint_get_low(new_lsn)); + } + + old_lsn = new_lsn; + + if (difftime(time(NULL), srv_last_monitor_time) > 60) { + /* We referesh InnoDB Monitor values so that averages are + printed from at most 60 last seconds */ + + srv_refresh_innodb_monitor_stats(); + } + + if (sync_array_print_long_waits()) { + fatal_cnt++; + if (fatal_cnt > 5) { + + fprintf(stderr, +"InnoDB: Error: semaphore wait has lasted > %lu seconds\n" +"InnoDB: We intentionally crash the server, because it appears to be hung.\n", + srv_fatal_semaphore_wait_threshold); + + ut_error; + } + } else { + fatal_cnt = 0; + } + + /* Flush stderr so that a database user gets the output + to possible MySQL error file */ + + fflush(stderr); + + os_thread_sleep(2000000); + + if (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) { + + goto loop; + } + + srv_error_monitor_active = FALSE; + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + + os_thread_exit(NULL); + +#ifndef __WIN__ + return(NULL); +#else + return(0); +#endif +} + +/*********************************************************************** +Tells the InnoDB server that there has been activity in the database +and wakes up the master thread if it is suspended (not sleeping). Used +in the MySQL interface. Note that there is a small chance that the master +thread stays suspended (we do not protect our operation with the kernel +mutex, for performace reasons). */ + +void +srv_active_wake_master_thread(void) +/*===============================*/ +{ + srv_activity_count++; + + if (srv_n_threads_active[SRV_MASTER] == 0) { + + mutex_enter(&kernel_mutex); + + srv_release_threads(SRV_MASTER, 1); + + mutex_exit(&kernel_mutex); + } +} + +/*********************************************************************** +Wakes up the master thread if it is suspended or being suspended. */ + +void +srv_wake_master_thread(void) +/*========================*/ +{ + srv_activity_count++; + + mutex_enter(&kernel_mutex); + + srv_release_threads(SRV_MASTER, 1); + + mutex_exit(&kernel_mutex); +} + +/************************************************************************* +The master thread controlling the server. */ + +#ifndef __WIN__ +void* +#else +ulint +#endif +srv_master_thread( +/*==============*/ + /* out: a dummy parameter */ + void* arg __attribute__((unused))) + /* in: a dummy parameter required by + os_thread_create */ +{ + os_event_t event; + time_t last_flush_time; + time_t current_time; + ulint old_activity_count; + ulint n_pages_purged; + ulint n_bytes_merged; + ulint n_pages_flushed; + ulint n_bytes_archived; + ulint n_tables_to_drop; + ulint n_ios; + ulint n_ios_old; + ulint n_ios_very_old; + ulint n_pend_ios; + ibool skip_sleep = FALSE; + ulint i; + +#ifdef UNIV_DEBUG_THREAD_CREATION + fprintf(stderr, "Master thread starts, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +#endif + srv_main_thread_process_no = os_proc_get_number(); + srv_main_thread_id = os_thread_pf(os_thread_get_curr_id()); + + srv_table_reserve_slot(SRV_MASTER); + + mutex_enter(&kernel_mutex); + + srv_n_threads_active[SRV_MASTER]++; + + mutex_exit(&kernel_mutex); + + os_event_set(srv_sys->operational); +loop: + /*****************************************************************/ + /* ---- When there is database activity by users, we cycle in this + loop */ + + srv_main_thread_op_info = "reserving kernel mutex"; + + n_ios_very_old = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; + mutex_enter(&kernel_mutex); + + /* Store the user activity counter at the start of this loop */ + old_activity_count = srv_activity_count; + + mutex_exit(&kernel_mutex); + + if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) { + + goto suspend_thread; + } + + /* ---- We run the following loop approximately once per second + when there is database activity */ + + skip_sleep = FALSE; + + for (i = 0; i < 10; i++) { + n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; + srv_main_thread_op_info = "sleeping"; + + if (!skip_sleep) { + + os_thread_sleep(1000000); + } + + skip_sleep = FALSE; + + /* ALTER TABLE in MySQL requires on Unix that the table handler + can drop tables lazily after there no longer are SELECT + queries to them. */ + + srv_main_thread_op_info = "doing background drop tables"; + + row_drop_tables_for_mysql_in_background(); + + srv_main_thread_op_info = ""; + + if (srv_fast_shutdown && srv_shutdown_state > 0) { + + goto background_loop; + } + + /* We flush the log once in a second even if no commit + is issued or the we have specified in my.cnf no flush + at transaction commit */ + + srv_main_thread_op_info = "flushing log"; + log_buffer_flush_to_disk(); + + srv_main_thread_op_info = "making checkpoint"; + log_free_check(); + + /* If there were less than 5 i/os during the + one second sleep, we assume that there is free + disk i/o capacity available, and it makes sense to + do an insert buffer merge. */ + + n_pend_ios = buf_get_n_pending_ios() + + log_sys->n_pending_writes; + n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; + if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) { + srv_main_thread_op_info = "doing insert buffer merge"; + ibuf_contract_for_n_pages(TRUE, 5); + + srv_main_thread_op_info = "flushing log"; + + log_buffer_flush_to_disk(); + } + + if (buf_get_modified_ratio_pct() > + srv_max_buf_pool_modified_pct) { + + /* Try to keep the number of modified pages in the + buffer pool under the limit wished by the user */ + + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, + ut_dulint_max); + + /* If we had to do the flush, it may have taken + even more than 1 second, and also, there may be more + to flush. Do not sleep 1 second during the next + iteration of this loop. */ + + skip_sleep = TRUE; + } + + if (srv_activity_count == old_activity_count) { + + /* There is no user activity at the moment, go to + the background loop */ + + goto background_loop; + } + } + + /* ---- We perform the following code approximately once per + 10 seconds when there is database activity */ + +#ifdef MEM_PERIODIC_CHECK + /* Check magic numbers of every allocated mem block once in 10 + seconds */ + mem_validate_all_blocks(); +#endif + /* If there were less than 200 i/os during the 10 second period, + we assume that there is free disk i/o capacity available, and it + makes sense to flush 100 pages. */ + + n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes; + n_ios = log_sys->n_log_ios + buf_pool->n_pages_read + + buf_pool->n_pages_written; + if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) { + + srv_main_thread_op_info = "flushing buffer pool pages"; + buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); + + srv_main_thread_op_info = "flushing log"; + log_buffer_flush_to_disk(); + } + + /* We run a batch of insert buffer merge every 10 seconds, + even if the server were active */ + + srv_main_thread_op_info = "doing insert buffer merge"; + ibuf_contract_for_n_pages(TRUE, 5); + + srv_main_thread_op_info = "flushing log"; + log_buffer_flush_to_disk(); + + /* We run a full purge every 10 seconds, even if the server + were active */ + + n_pages_purged = 1; + + last_flush_time = time(NULL); + + while (n_pages_purged) { + + if (srv_fast_shutdown && srv_shutdown_state > 0) { + + goto background_loop; + } + + srv_main_thread_op_info = "purging"; + n_pages_purged = trx_purge(); + + current_time = time(NULL); + + if (difftime(current_time, last_flush_time) > 1) { + srv_main_thread_op_info = "flushing log"; + + log_buffer_flush_to_disk(); + last_flush_time = current_time; + } + } + + srv_main_thread_op_info = "flushing buffer pool pages"; + + /* Flush a few oldest pages to make a new checkpoint younger */ + + if (buf_get_modified_ratio_pct() > 70) { + + /* If there are lots of modified pages in the buffer pool + (> 70 %), we assume we can afford reserving the disk(s) for + the time it requires to flush 100 pages */ + + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, + ut_dulint_max); + } else { + /* Otherwise, we only flush a small number of pages so that + we do not unnecessarily use much disk i/o capacity from + other work */ + + n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10, + ut_dulint_max); + } + + srv_main_thread_op_info = "making checkpoint"; + + /* Make a new checkpoint about once in 10 seconds */ + + log_checkpoint(TRUE, FALSE); + + srv_main_thread_op_info = "reserving kernel mutex"; + + mutex_enter(&kernel_mutex); + + /* ---- When there is database activity, we jump from here back to + the start of loop */ + + if (srv_activity_count != old_activity_count) { + mutex_exit(&kernel_mutex); + goto loop; + } + + mutex_exit(&kernel_mutex); + + /* If the database is quiet, we enter the background loop */ + + /*****************************************************************/ +background_loop: + /* ---- In this loop we run background operations when the server + is quiet from user activity. Also in the case of a shutdown, we + loop here, flushing the buffer pool to the data files. */ + + /* The server has been quiet for a while: start running background + operations */ + + srv_main_thread_op_info = "doing background drop tables"; + + n_tables_to_drop = row_drop_tables_for_mysql_in_background(); + + if (n_tables_to_drop > 0) { + /* Do not monopolize the CPU even if there are tables waiting + in the background drop queue. (It is essentially a bug if + MySQL tries to drop a table while there are still open handles + to it and we had to put it to the background drop queue.) */ + + os_thread_sleep(100000); + } + + srv_main_thread_op_info = "purging"; + + /* Run a full purge */ + + n_pages_purged = 1; + + last_flush_time = time(NULL); + + while (n_pages_purged) { + if (srv_fast_shutdown && srv_shutdown_state > 0) { + + break; + } + + srv_main_thread_op_info = "purging"; + n_pages_purged = trx_purge(); + + current_time = time(NULL); + + if (difftime(current_time, last_flush_time) > 1) { + srv_main_thread_op_info = "flushing log"; + + log_buffer_flush_to_disk(); + last_flush_time = current_time; + } + } + + srv_main_thread_op_info = "reserving kernel mutex"; + + mutex_enter(&kernel_mutex); + if (srv_activity_count != old_activity_count) { + mutex_exit(&kernel_mutex); + goto loop; + } + mutex_exit(&kernel_mutex); + + srv_main_thread_op_info = "doing insert buffer merge"; + + if (srv_fast_shutdown && srv_shutdown_state > 0) { + n_bytes_merged = 0; + } else { + n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20); + } + + srv_main_thread_op_info = "reserving kernel mutex"; + + mutex_enter(&kernel_mutex); + if (srv_activity_count != old_activity_count) { + mutex_exit(&kernel_mutex); + goto loop; + } + mutex_exit(&kernel_mutex); + +flush_loop: + srv_main_thread_op_info = "flushing buffer pool pages"; + + if (!srv_very_fast_shutdown) { + n_pages_flushed = + buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max); + } else { + /* In a 'very fast' shutdown we do not flush the buffer pool + to data files: we set n_pages_flushed to 0 artificially. */ + + n_pages_flushed = 0; + } + + srv_main_thread_op_info = "reserving kernel mutex"; + + mutex_enter(&kernel_mutex); + if (srv_activity_count != old_activity_count) { + mutex_exit(&kernel_mutex); + goto loop; + } + mutex_exit(&kernel_mutex); + + srv_main_thread_op_info = "waiting for buffer pool flush to end"; + buf_flush_wait_batch_end(BUF_FLUSH_LIST); + + srv_main_thread_op_info = "flushing log"; + + log_buffer_flush_to_disk(); + + srv_main_thread_op_info = "making checkpoint"; + + log_checkpoint(TRUE, FALSE); + + if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) { + + /* Try to keep the number of modified pages in the + buffer pool under the limit wished by the user */ + + goto flush_loop; + } + + srv_main_thread_op_info = "reserving kernel mutex"; + + mutex_enter(&kernel_mutex); + if (srv_activity_count != old_activity_count) { + mutex_exit(&kernel_mutex); + goto loop; + } + mutex_exit(&kernel_mutex); +/* + srv_main_thread_op_info = "archiving log (if log archive is on)"; + + log_archive_do(FALSE, &n_bytes_archived); +*/ + n_bytes_archived = 0; + + /* Keep looping in the background loop if still work to do */ + + if (srv_fast_shutdown && srv_shutdown_state > 0) { + if (n_tables_to_drop + n_pages_flushed + + n_bytes_archived != 0) { + + /* If we are doing a fast shutdown (= the default) + we do not do purge or insert buffer merge. But we + flush the buffer pool completely to disk. + In a 'very fast' shutdown we do not flush the buffer + pool to data files: we have set n_pages_flushed to + 0 artificially. */ + + goto background_loop; + } + } else if (n_tables_to_drop + + n_pages_purged + n_bytes_merged + n_pages_flushed + + n_bytes_archived != 0) { + /* In a 'slow' shutdown we run purge and the insert buffer + merge to completion */ + + goto background_loop; + } + + /* There is no work for background operations either: suspend + master thread to wait for more server activity */ + +suspend_thread: + srv_main_thread_op_info = "suspending"; + + mutex_enter(&kernel_mutex); + + if (row_get_background_drop_list_len_low() > 0) { + mutex_exit(&kernel_mutex); + + goto loop; + } + + event = srv_suspend_thread(); + + mutex_exit(&kernel_mutex); + + srv_main_thread_op_info = "waiting for server activity"; + + os_event_wait(event); + + if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { + /* This is only extra safety, the thread should exit + already when the event wait ends */ + + os_thread_exit(NULL); + } + + /* When there is user activity, InnoDB will set the event and the main + thread goes back to loop: */ + + goto loop; + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. + The thread actually never comes here because it is exited in an + os_event_wait(). */ + + os_thread_exit(NULL); + +#ifndef __WIN__ + return(NULL); /* Not reached */ +#else + return(0); +#endif +} +#endif /* !UNIV_HOTBACKUP */ |