summaryrefslogtreecommitdiff
path: root/storage/innobase/srv/srv0srv.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/srv/srv0srv.c')
-rw-r--r--storage/innobase/srv/srv0srv.c2885
1 files changed, 0 insertions, 2885 deletions
diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
deleted file mode 100644
index 36c3d450aae..00000000000
--- a/storage/innobase/srv/srv0srv.c
+++ /dev/null
@@ -1,2885 +0,0 @@
-/******************************************************
-The database server main program
-
-NOTE: SQL Server 7 uses something which the documentation
-calls user mode scheduled threads (UMS threads). One such
-thread is usually allocated per processor. Win32
-documentation does not know any UMS threads, which suggests
-that the concept is internal to SQL Server 7. It may mean that
-SQL Server 7 does all the scheduling of threads itself, even
-in i/o waits. We should maybe modify InnoDB to use the same
-technique, because thread switches within NT may be too slow.
-
-SQL Server 7 also mentions fibers, which are cooperatively
-scheduled threads. They can boost performance by 5 %,
-according to the Delaney and Soukup's book.
-
-Windows 2000 will have something called thread pooling
-(see msdn website), which we could possibly use.
-
-Another possibility could be to use some very fast user space
-thread library. This might confuse NT though.
-
-(c) 1995 Innobase Oy
-
-Created 10/8/1995 Heikki Tuuri
-*******************************************************/
-/* Dummy comment */
-#include "srv0srv.h"
-
-#include "ut0mem.h"
-#include "os0proc.h"
-#include "mem0mem.h"
-#include "mem0pool.h"
-#include "sync0sync.h"
-#include "thr0loc.h"
-#include "que0que.h"
-#include "srv0que.h"
-#include "log0recv.h"
-#include "pars0pars.h"
-#include "usr0sess.h"
-#include "lock0lock.h"
-#include "trx0purge.h"
-#include "ibuf0ibuf.h"
-#include "buf0flu.h"
-#include "btr0sea.h"
-#include "dict0load.h"
-#include "dict0boot.h"
-#include "srv0start.h"
-#include "row0mysql.h"
-#include "ha_prototypes.h"
-
-/* This is set to TRUE if the MySQL user has set it in MySQL; currently
-affects only FOREIGN KEY definition parsing */
-ibool srv_lower_case_table_names = FALSE;
-
-/* The following counter is incremented whenever there is some user activity
-in the server */
-ulint srv_activity_count = 0;
-
-/* The following is the maximum allowed duration of a lock wait. */
-ulint srv_fatal_semaphore_wait_threshold = 600;
-
-/* How much data manipulation language (DML) statements need to be delayed,
-in microseconds, in order to reduce the lagging of the purge thread. */
-ulint srv_dml_needed_delay = 0;
-
-ibool srv_lock_timeout_and_monitor_active = FALSE;
-ibool srv_error_monitor_active = FALSE;
-
-const char* srv_main_thread_op_info = "";
-
-/* Prefix used by MySQL to indicate pre-5.1 table name encoding */
-const char srv_mysql50_table_name_prefix[9] = "#mysql50#";
-
-/* Server parameters which are read from the initfile */
-
-/* The following three are dir paths which are catenated before file
-names, where the file name itself may also contain a path */
-
-char* srv_data_home = NULL;
-#ifdef UNIV_LOG_ARCHIVE
-char* srv_arch_dir = NULL;
-#endif /* UNIV_LOG_ARCHIVE */
-
-ibool srv_file_per_table = FALSE; /* store to its own file each table
- created by an user; data dictionary
- tables are in the system tablespace
- 0 */
-ibool srv_locks_unsafe_for_binlog = FALSE; /* Place locks to
- records only i.e. do
- not use next-key
- locking except on
- duplicate key checking
- and foreign key
- checking */
-ulint srv_n_data_files = 0;
-char** srv_data_file_names = NULL;
-ulint* srv_data_file_sizes = NULL; /* size in database pages */
-
-ibool srv_auto_extend_last_data_file = FALSE; /* if TRUE, then we
- auto-extend the last data
- file */
-ulint srv_last_file_size_max = 0; /* if != 0, this tells
- the max size auto-extending
- may increase the last data
- file size */
-ulong srv_auto_extend_increment = 8; /* If the last data file is
- auto-extended, we add this
- many pages to it at a time */
-ulint* srv_data_file_is_raw_partition = NULL;
-
-/* If the following is TRUE we do not allow inserts etc. This protects
-the user from forgetting the 'newraw' keyword to my.cnf */
-
-ibool srv_created_new_raw = FALSE;
-
-char** srv_log_group_home_dirs = NULL;
-
-ulint srv_n_log_groups = ULINT_MAX;
-ulint srv_n_log_files = ULINT_MAX;
-ulint srv_log_file_size = ULINT_MAX; /* size in database pages */
-ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */
-ulong srv_flush_log_at_trx_commit = 1;
-
-byte srv_latin1_ordering[256] /* The sort order table of the latin1
- character set. The following table is
- the MySQL order as of Feb 10th, 2002 */
-= {
- 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F
-, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
-, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F
-, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27
-, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F
-, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37
-, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F
-, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
-, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
-, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
-, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F
-, 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47
-, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F
-, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57
-, 0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F
-, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
-, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F
-, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97
-, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F
-, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7
-, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF
-, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7
-, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF
-, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
-, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
-, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xD7
-, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xDF
-, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43
-, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49
-, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xF7
-, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF
-};
-
-ulint srv_pool_size = ULINT_MAX; /* size in pages; MySQL inits
- this to size in kilobytes but
- we normalize this to pages in
- srv_boot() */
-ulint srv_awe_window_size = 0; /* size in pages; MySQL inits
- this to bytes, but we
- normalize it to pages in
- srv_boot() */
-ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */
-ulint srv_lock_table_size = ULINT_MAX;
-
-
-ulint srv_io_capacity = ULINT_MAX; /* Number of IO operations per
- second the server can do */
-
-ibool srv_extra_dirty_writes = TRUE; /* Write dirty pages to disk when pct
- dirty < max dirty pct */
-
-ulint srv_n_read_io_threads = ULINT_MAX;
-ulint srv_n_write_io_threads = ULINT_MAX;
-ulint srv_max_merged_io = 64;
-
-#ifdef UNIV_LOG_ARCHIVE
-ibool srv_log_archive_on = FALSE;
-ibool srv_archive_recovery = 0;
-dulint srv_archive_recovery_limit_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
-ulint srv_lock_wait_timeout = 1024 * 1024 * 1024;
-
-/* This parameter is used to throttle the number of insert buffers that are
-merged in a batch. By increasing this parameter on a faster disk you can
-possibly reduce the number of I/O operations performed to complete the
-merge operation. The value of this parameter is used as is by the
-background loop when the system is idle (low load), on a busy system
-the parameter is scaled down by a factor of 4, this is to avoid putting
-a heavier load on the I/O sub system. */
-
-ulong srv_insert_buffer_batch_size = 20;
-
-char* srv_file_flush_method_str = NULL;
-ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-
-ulint srv_max_n_open_files = 300;
-
-/* The InnoDB main thread tries to keep the ratio of modified pages
-in the buffer pool to all database pages in the buffer pool smaller than
-the following number. But it is not guaranteed that the value stays below
-that during a time of heavy update/insert activity. */
-
-ulong srv_max_buf_pool_modified_pct = 90;
-
-/* variable counts amount of data read in total (in bytes) */
-ulint srv_data_read = 0;
-
-/* here we count the amount of data written in total (in bytes) */
-ulint srv_data_written = 0;
-
-/* the number of the log write requests done */
-ulint srv_log_write_requests = 0;
-
-/* the number of physical writes to the log performed */
-ulint srv_log_writes = 0;
-
-/* amount of data written to the log files in bytes */
-ulint srv_os_log_written = 0;
-
-/* amount of writes being done to the log files */
-ulint srv_os_log_pending_writes = 0;
-
-/* we increase this counter, when there we don't have enough space in the
-log buffer and have to flush it */
-ulint srv_log_waits = 0;
-
-/* this variable counts the amount of times, when the doublewrite buffer
-was flushed */
-ulint srv_dblwr_writes = 0;
-
-/* here we store the number of pages that have been flushed to the
-doublewrite buffer */
-ulint srv_dblwr_pages_written = 0;
-
-/* in this variable we store the number of write requests issued */
-ulint srv_buf_pool_write_requests = 0;
-
-/* here we store the number of times when we had to wait for a free page
-in the buffer pool. It happens when the buffer pool is full and we need
-to make a flush, in order to be able to read or create a page. */
-ulint srv_buf_pool_wait_free = 0;
-
-/* variable to count the number of pages that were written from buffer
-pool to the disk */
-ulint srv_buf_pool_flushed = 0;
-
-/* variable to count the number of buffer pool reads that led to the
-reading of a disk page */
-ulint srv_buf_pool_reads = 0;
-
-/* variable to count the number of sequential read-aheads */
-ulint srv_read_ahead_seq = 0;
-
-/* variable to count the number of random read-aheads */
-ulint srv_read_ahead_rnd = 0;
-
-/* An option to enable the fix for "Bug#43660 SHOW INDEXES/ANALYZE does
-NOT update cardinality for indexes of InnoDB table". By default we are
-running with the fix disabled because MySQL 5.1 is frozen for such
-behavioral changes. */
-char srv_use_legacy_cardinality_algorithm = TRUE;
-
-/* structure to pass status variables to MySQL */
-export_struc export_vars;
-
-/* If the following is != 0 we do not allow inserts etc. This protects
-the user from forgetting the innodb_force_recovery keyword to my.cnf */
-
-ulint srv_force_recovery = 0;
-/*-----------------------*/
-/* We are prepared for a situation that we have this many threads waiting for
-a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
-value. */
-
-ulint srv_max_n_threads = 0;
-
-/* The following controls how many threads we let inside InnoDB concurrently:
-threads waiting for locks are not counted into the number because otherwise
-we could get a deadlock. MySQL creates a thread for each user session, and
-semaphore contention and convoy problems can occur withput this restriction.
-Value 10 should be good if there are less than 4 processors + 4 disks in the
-computer. Bigger computers need bigger values. Value 0 will disable the
-concurrency check. */
-
-ibool srv_thread_concurrency_timer_based = TRUE;
-ulong srv_thread_concurrency = 0;
-ulong srv_commit_concurrency = 0;
-
-os_fast_mutex_t srv_conc_mutex; /* this mutex protects srv_conc data
- structures */
-lint srv_conc_n_threads = 0; /* number of transactions that
- have declared_to_be_inside_innodb
- set. It used to be a non-error
- for this value to drop below
- zero temporarily. This is no
- longer true. We'll, however,
- keep the lint datatype to add
- assertions to catch any corner
- cases that we may have
- missed. */
-ulint srv_conc_n_waiting_threads = 0; /* number of OS threads waiting in the
- FIFO for a permission to enter InnoDB
- */
-
-typedef struct srv_conc_slot_struct srv_conc_slot_t;
-struct srv_conc_slot_struct{
- os_event_t event; /* event to wait */
- ibool reserved; /* TRUE if slot
- reserved */
- ibool wait_ended; /* TRUE when another
- thread has already set
- the event and the
- thread in this slot is
- free to proceed; but
- reserved may still be
- TRUE at that point */
- UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /* queue node */
-};
-
-UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue; /* queue of threads
- waiting to get in */
-srv_conc_slot_t* srv_conc_slots; /* array of wait
- slots */
-
-/* Number of times a thread is allowed to enter InnoDB within the same
-SQL query after it has once got the ticket at srv_conc_enter_innodb */
-#define SRV_FREE_TICKETS_TO_ENTER srv_n_free_tickets_to_enter
-#define SRV_THREAD_SLEEP_DELAY srv_thread_sleep_delay
-/*-----------------------*/
-/* If the following is set to 1 then we do not run purge and insert buffer
-merge to completion before shutdown. If it is set to 2, do not even flush the
-buffer pool to data files at the shutdown: we effectively 'crash'
-InnoDB (but lose no committed transactions). */
-ulint srv_fast_shutdown = 0;
-
-/* Generate a innodb_status.<pid> file */
-ibool srv_innodb_status = FALSE;
-
-ibool srv_use_doublewrite_buf = TRUE;
-ibool srv_use_checksums = TRUE;
-
-ibool srv_set_thread_priorities = TRUE;
-int srv_query_thread_priority = 0;
-
-/* TRUE if the Address Windowing Extensions of Windows are used; then we must
-disable adaptive hash indexes */
-ibool srv_use_awe = FALSE;
-ibool srv_use_adaptive_hash_indexes = TRUE;
-
-/*-------------------------------------------*/
-ulong srv_n_spin_wait_rounds = 30;
-ulong srv_n_free_tickets_to_enter = 500;
-ulong srv_thread_sleep_delay = 10000;
-ulint srv_spin_wait_delay = 6;
-ibool srv_priority_boost = TRUE;
-
-ibool srv_print_thread_releases = FALSE;
-ibool srv_print_lock_waits = FALSE;
-ibool srv_print_buf_io = FALSE;
-ibool srv_print_log_io = FALSE;
-ibool srv_print_latch_waits = FALSE;
-
-ulint srv_n_rows_inserted = 0;
-ulint srv_n_rows_updated = 0;
-ulint srv_n_rows_deleted = 0;
-ulint srv_n_rows_read = 0;
-#ifndef UNIV_HOTBACKUP
-static ulint srv_n_rows_inserted_old = 0;
-static ulint srv_n_rows_updated_old = 0;
-static ulint srv_n_rows_deleted_old = 0;
-static ulint srv_n_rows_read_old = 0;
-#endif /* !UNIV_HOTBACKUP */
-
-ulint srv_n_lock_wait_count = 0;
-ulint srv_n_lock_wait_current_count = 0;
-ib_longlong srv_n_lock_wait_time = 0;
-ulint srv_n_lock_max_wait_time = 0;
-
-
-/*
- Set the following to 0 if you want InnoDB to write messages on
- stderr on startup/shutdown
-*/
-ibool srv_print_verbose_log = TRUE;
-ibool srv_print_innodb_monitor = FALSE;
-ibool srv_print_innodb_lock_monitor = FALSE;
-ibool srv_print_innodb_tablespace_monitor = FALSE;
-ibool srv_print_innodb_table_monitor = FALSE;
-
-/* Array of English strings describing the current state of an
-i/o handler thread */
-
-const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
-const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
-
-time_t srv_last_monitor_time;
-
-mutex_t srv_innodb_monitor_mutex;
-
-/* Mutex for locking srv_monitor_file */
-mutex_t srv_monitor_file_mutex;
-/* Temporary file for innodb monitor output */
-FILE* srv_monitor_file;
-/* Mutex for locking srv_dict_tmpfile.
-This mutex has a very high rank; threads reserving it should not
-be holding any InnoDB latches. */
-mutex_t srv_dict_tmpfile_mutex;
-/* Temporary file for output from the data dictionary */
-FILE* srv_dict_tmpfile;
-/* Mutex for locking srv_misc_tmpfile.
-This mutex has a very low rank; threads reserving it should not
-acquire any further latches or sleep before releasing this one. */
-mutex_t srv_misc_tmpfile_mutex;
-/* Temporary file for miscellanous diagnostic output */
-FILE* srv_misc_tmpfile;
-
-ulint srv_main_thread_process_no = 0;
-ulint srv_main_thread_id = 0;
-
-/* The following count work done by srv_master_thread. */
-
-/* Iterations by the 'once per second' loop */
-ulint srv_main_1_second_loops = 0;
-/* Calls to sleep by the 'once per second' loop */
-ulint srv_main_sleeps = 0;
-/* Iterations by the 'once per 10 seconds' loop */
-ulint srv_main_10_second_loops = 0;
-/* Iterations of the loop bounded by the 'background_loop' label */
-ulint srv_main_background_loops = 0;
-/* Iterations of the loop bounded by the 'flush_loop' label */
-ulint srv_main_flush_loops = 0;
-/* Calls to log_buffer_flush_to_disk */
-ulint srv_sync_flush = 0;
-/* Calls to log_buffer_flush_maybe_sync */
-ulint srv_async_flush = 0;
-
-/* Number of microseconds threads wait because of
-innodb_thread_concurrency */
-static ib_longlong srv_thread_wait_mics = 0;
-
-/* Number of microseconds for spinlock delay */
-static ib_longlong srv_timed_spin_delay = 0;
-
-/*
- IMPLEMENTATION OF THE SERVER MAIN PROGRAM
- =========================================
-
-There is the following analogue between this database
-server and an operating system kernel:
-
-DB concept equivalent OS concept
----------- ---------------------
-transaction -- process;
-
-query thread -- thread;
-
-lock -- semaphore;
-
-transaction set to
-the rollback state -- kill signal delivered to a process;
-
-kernel -- kernel;
-
-query thread execution:
-(a) without kernel mutex
-reserved -- process executing in user mode;
-(b) with kernel mutex reserved
- -- process executing in kernel mode;
-
-The server is controlled by a master thread which runs at
-a priority higher than normal, that is, higher than user threads.
-It sleeps most of the time, and wakes up, say, every 300 milliseconds,
-to check whether there is anything happening in the server which
-requires intervention of the master thread. Such situations may be,
-for example, when flushing of dirty blocks is needed in the buffer
-pool or old version of database rows have to be cleaned away.
-
-The threads which we call user threads serve the queries of
-the clients and input from the console of the server.
-They run at normal priority. The server may have several
-communications endpoints. A dedicated set of user threads waits
-at each of these endpoints ready to receive a client request.
-Each request is taken by a single user thread, which then starts
-processing and, when the result is ready, sends it to the client
-and returns to wait at the same endpoint the thread started from.
-
-So, we do not have dedicated communication threads listening at
-the endpoints and dealing the jobs to dedicated worker threads.
-Our architecture saves one thread swithch per request, compared
-to the solution with dedicated communication threads
-which amounts to 15 microseconds on 100 MHz Pentium
-running NT. If the client
-is communicating over a network, this saving is negligible, but
-if the client resides in the same machine, maybe in an SMP machine
-on a different processor from the server thread, the saving
-can be important as the threads can communicate over shared
-memory with an overhead of a few microseconds.
-
-We may later implement a dedicated communication thread solution
-for those endpoints which communicate over a network.
-
-Our solution with user threads has two problems: for each endpoint
-there has to be a number of listening threads. If there are many
-communication endpoints, it may be difficult to set the right number
-of concurrent threads in the system, as many of the threads
-may always be waiting at less busy endpoints. Another problem
-is queuing of the messages, as the server internally does not
-offer any queue for jobs.
-
-Another group of user threads is intended for splitting the
-queries and processing them in parallel. Let us call these
-parallel communication threads. These threads are waiting for
-parallelized tasks, suspended on event semaphores.
-
-A single user thread waits for input from the console,
-like a command to shut the database.
-
-Utility threads are a different group of threads which takes
-care of the buffer pool flushing and other, mainly background
-operations, in the server.
-Some of these utility threads always run at a lower than normal
-priority, so that they are always in background. Some of them
-may dynamically boost their priority by the pri_adjust function,
-even to higher than normal priority, if their task becomes urgent.
-The running of utilities is controlled by high- and low-water marks
-of urgency. The urgency may be measured by the number of dirty blocks
-in the buffer pool, in the case of the flush thread, for example.
-When the high-water mark is exceeded, an utility starts running, until
-the urgency drops under the low-water mark. Then the utility thread
-suspend itself to wait for an event. The master thread is
-responsible of signaling this event when the utility thread is
-again needed.
-
-For each individual type of utility, some threads always remain
-at lower than normal priority. This is because pri_adjust is implemented
-so that the threads at normal or higher priority control their
-share of running time by calling sleep. Thus, if the load of the
-system sudenly drops, these threads cannot necessarily utilize
-the system fully. The background priority threads make up for this,
-starting to run when the load drops.
-
-When there is no activity in the system, also the master thread
-suspends itself to wait for an event making
-the server totally silent. The responsibility to signal this
-event is on the user thread which again receives a message
-from a client.
-
-There is still one complication in our server design. If a
-background utility thread obtains a resource (e.g., mutex) needed by a user
-thread, and there is also some other user activity in the system,
-the user thread may have to wait indefinitely long for the
-resource, as the OS does not schedule a background thread if
-there is some other runnable user thread. This problem is called
-priority inversion in real-time programming.
-
-One solution to the priority inversion problem would be to
-keep record of which thread owns which resource and
-in the above case boost the priority of the background thread
-so that it will be scheduled and it can release the resource.
-This solution is called priority inheritance in real-time programming.
-A drawback of this solution is that the overhead of acquiring a mutex
-increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because
-the thread has to call os_thread_get_curr_id.
-This may be compared to 0.5 microsecond overhead for a mutex lock-unlock
-pair. Note that the thread
-cannot store the information in the resource, say mutex, itself,
-because competing threads could wipe out the information if it is
-stored before acquiring the mutex, and if it stored afterwards,
-the information is outdated for the time of one machine instruction,
-at least. (To be precise, the information could be stored to
-lock_word in mutex if the machine supports atomic swap.)
-
-The above solution with priority inheritance may become actual in the
-future, but at the moment we plan to implement a more coarse solution,
-which could be called a global priority inheritance. If a thread
-has to wait for a long time, say 300 milliseconds, for a resource,
-we just guess that it may be waiting for a resource owned by a background
-thread, and boost the the priority of all runnable background threads
-to the normal level. The background threads then themselves adjust
-their fixed priority back to background after releasing all resources
-they had (or, at some fixed points in their program code).
-
-What is the performance of the global priority inheritance solution?
-We may weigh the length of the wait time 300 milliseconds, during
-which the system processes some other thread
-to the cost of boosting the priority of each runnable background
-thread, rescheduling it, and lowering the priority again.
-On 100 MHz Pentium + NT this overhead may be of the order 100
-microseconds per thread. So, if the number of runnable background
-threads is not very big, say < 100, the cost is tolerable.
-Utility threads probably will access resources used by
-user threads not very often, so collisions of user threads
-to preempted utility threads should not happen very often.
-
-The thread table contains
-information of the current status of each thread existing in the system,
-and also the event semaphores used in suspending the master thread
-and utility and parallel communication threads when they have nothing to do.
-The thread table can be seen as an analogue to the process table
-in a traditional Unix implementation.
-
-The thread table is also used in the global priority inheritance
-scheme. This brings in one additional complication: threads accessing
-the thread table must have at least normal fixed priority,
-because the priority inheritance solution does not work if a background
-thread is preempted while possessing the mutex protecting the thread table.
-So, if a thread accesses the thread table, its priority has to be
-boosted at least to normal. This priority requirement can be seen similar to
-the privileged mode used when processing the kernel calls in traditional
-Unix.*/
-
-/* Thread slot in the thread table */
-struct srv_slot_struct{
- os_thread_id_t id; /* thread id */
- os_thread_t handle; /* thread handle */
- ulint type; /* thread type: user, utility etc. */
- ibool in_use; /* TRUE if this slot is in use */
- ibool suspended; /* TRUE if the thread is waiting
- for the event of this slot */
- ib_time_t suspend_time; /* time when the thread was
- suspended */
- os_event_t event; /* event used in suspending the
- thread when it has nothing to do */
- que_thr_t* thr; /* suspended query thread (only
- used for MySQL threads) */
-};
-
-/* Table for MySQL threads where they will be suspended to wait for locks */
-srv_slot_t* srv_mysql_table = NULL;
-
-os_event_t srv_lock_timeout_thread_event;
-
-srv_sys_t* srv_sys = NULL;
-
-byte srv_pad1[64]; /* padding to prevent other memory update
- hotspots from residing on the same memory
- cache line */
-mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs,
- query threads, and lock table */
-byte srv_pad2[64]; /* padding to prevent other memory update
- hotspots from residing on the same memory
- cache line */
-
-/* The following three values measure the urgency of the jobs of
-buffer, version, and insert threads. They may vary from 0 - 1000.
-The server mutex protects all these variables. The low-water values
-tell that the server can acquiesce the utility when the value
-drops below this low-water mark. */
-
-ulint srv_meter[SRV_MASTER + 1];
-ulint srv_meter_low_water[SRV_MASTER + 1];
-ulint srv_meter_high_water[SRV_MASTER + 1];
-ulint srv_meter_high_water2[SRV_MASTER + 1];
-ulint srv_meter_foreground[SRV_MASTER + 1];
-
-/* The following values give info about the activity going on in
-the database. They are protected by the server mutex. The arrays
-are indexed by the type of the thread. */
-
-ulint srv_n_threads_active[SRV_MASTER + 1];
-ulint srv_n_threads[SRV_MASTER + 1];
-
-static void time_spin_delay()
-{
- ulint start_sec, end_sec;
- ulint start_usec, end_usec;
- int i;
-
- srv_timed_spin_delay = 0;
-
- if (ut_usectime(&start_sec, &start_usec))
- return;
-
- for (i = 0; i < (int)SYNC_SPIN_ROUNDS; ++i)
- ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
-
- if (ut_usectime(&end_sec, &end_usec))
- return;
-
- srv_timed_spin_delay =ut_usecdiff(end_sec, end_usec,
- start_sec, start_usec);
-}
-
-/*************************************************************************
-Prints counters for work done by srv_master_thread. */
-
-static
-void
-srv_print_extra(
-/*===================*/
- FILE *file) /* in: output stream */
-{
- fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, "
- "%lu 10_second, %lu background, %lu flush\n",
- srv_main_1_second_loops, srv_main_sleeps,
- srv_main_10_second_loops, srv_main_background_loops,
- srv_main_flush_loops);
- fprintf(file, "srv_master_thread log flush: %lu sync, %lu async\n",
- srv_sync_flush, srv_async_flush);
- fprintf(file, "srv_wait_thread_mics %lld microseconds, %.1f seconds\n",
- srv_thread_wait_mics,
- (double) srv_thread_wait_mics / 1000000.0);
- fprintf(file,
- "spinlock delay for %d delay %d rounds is %lld mics\n",
- (int)srv_spin_wait_delay,
- (int)SYNC_SPIN_ROUNDS,
- srv_timed_spin_delay);
-}
-
-/*************************************************************************
-Sets the info describing an i/o thread current state. */
-
-void
-srv_set_io_thread_op_info(
-/*======================*/
- ulint i, /* in: the 'segment' of the i/o thread */
- const char* str) /* in: constant char string describing the
- state */
-{
- ut_a(i < SRV_MAX_N_IO_THREADS);
-
- srv_io_thread_op_info[i] = str;
-}
-
-/*************************************************************************
-Accessor function to get pointer to n'th slot in the server thread
-table. */
-static
-srv_slot_t*
-srv_table_get_nth_slot(
-/*===================*/
- /* out: pointer to the slot */
- ulint index) /* in: index of the slot */
-{
- ut_a(index < OS_THREAD_MAX_N);
-
- return(srv_sys->threads + index);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
-Gets the number of threads in the system. */
-
-ulint
-srv_get_n_threads(void)
-/*===================*/
-{
- ulint i;
- ulint n_threads = 0;
-
- mutex_enter(&kernel_mutex);
-
- for (i = SRV_COM; i < SRV_MASTER + 1; i++) {
-
- n_threads += srv_n_threads[i];
- }
-
- mutex_exit(&kernel_mutex);
-
- return(n_threads);
-}
-
-/*************************************************************************
-Reserves a slot in the thread table for the current thread. Also creates the
-thread local storage struct for the current thread. NOTE! The server mutex
-has to be reserved by the caller! */
-static
-ulint
-srv_table_reserve_slot(
-/*===================*/
- /* out: reserved slot index */
- ulint type) /* in: type of the thread: one of SRV_COM, ... */
-{
- srv_slot_t* slot;
- ulint i;
-
- ut_a(type > 0);
- ut_a(type <= SRV_MASTER);
-
- i = 0;
- slot = srv_table_get_nth_slot(i);
-
- while (slot->in_use) {
- i++;
- slot = srv_table_get_nth_slot(i);
- }
-
- ut_a(slot->in_use == FALSE);
-
- slot->in_use = TRUE;
- slot->suspended = FALSE;
- slot->id = os_thread_get_curr_id();
- slot->handle = os_thread_get_curr();
- slot->type = type;
-
- thr_local_create();
-
- thr_local_set_slot_no(os_thread_get_curr_id(), i);
-
- return(i);
-}
-
-/*************************************************************************
-Suspends the calling thread to wait for the event in its thread slot.
-NOTE! The server mutex has to be reserved by the caller! */
-static
-os_event_t
-srv_suspend_thread(void)
-/*====================*/
- /* out: event for the calling thread to wait */
-{
- srv_slot_t* slot;
- os_event_t event;
- ulint slot_no;
- ulint type;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
-
- if (srv_print_thread_releases) {
- fprintf(stderr,
- "Suspending thread %lu to slot %lu meter %lu\n",
- (ulong) os_thread_get_curr_id(), (ulong) slot_no,
- (ulong) srv_meter[SRV_RECOVERY]);
- }
-
- slot = srv_table_get_nth_slot(slot_no);
-
- type = slot->type;
-
- ut_ad(type >= SRV_WORKER);
- ut_ad(type <= SRV_MASTER);
-
- event = slot->event;
-
- slot->suspended = TRUE;
-
- ut_ad(srv_n_threads_active[type] > 0);
-
- srv_n_threads_active[type]--;
-
- os_event_reset(event);
-
- return(event);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*************************************************************************
-Releases threads of the type given from suspension in the thread table.
-NOTE! The server mutex has to be reserved by the caller! */
-
-ulint
-srv_release_threads(
-/*================*/
- /* out: number of threads released: this may be
- < n if not enough threads were suspended at the
- moment */
- ulint type, /* in: thread type */
- ulint n) /* in: number of threads to release */
-{
- srv_slot_t* slot;
- ulint i;
- ulint count = 0;
-
- ut_ad(type >= SRV_WORKER);
- ut_ad(type <= SRV_MASTER);
- ut_ad(n > 0);
- ut_ad(mutex_own(&kernel_mutex));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = srv_table_get_nth_slot(i);
-
- if (slot->in_use && slot->type == type && slot->suspended) {
-
- slot->suspended = FALSE;
-
- srv_n_threads_active[type]++;
-
- os_event_set(slot->event);
-
- if (srv_print_thread_releases) {
- fprintf(stderr,
- "Releasing thread %lu type %lu"
- " from slot %lu meter %lu\n",
- (ulong) slot->id, (ulong) type,
- (ulong) i,
- (ulong) srv_meter[SRV_RECOVERY]);
- }
-
- count++;
-
- if (count == n) {
- break;
- }
- }
- }
-
- return(count);
-}
-
-/*************************************************************************
-Returns the calling thread type. */
-
-ulint
-srv_get_thread_type(void)
-/*=====================*/
- /* out: SRV_COM, ... */
-{
- ulint slot_no;
- srv_slot_t* slot;
- ulint type;
-
- mutex_enter(&kernel_mutex);
-
- slot_no = thr_local_get_slot_no(os_thread_get_curr_id());
-
- slot = srv_table_get_nth_slot(slot_no);
-
- type = slot->type;
-
- ut_ad(type >= SRV_WORKER);
- ut_ad(type <= SRV_MASTER);
-
- mutex_exit(&kernel_mutex);
-
- return(type);
-}
-
-/*************************************************************************
-Initializes the server. */
-
-void
-srv_init(void)
-/*==========*/
-{
- srv_conc_slot_t* conc_slot;
- srv_slot_t* slot;
- dict_table_t* table;
- ulint i;
-
- time_spin_delay();
-
- srv_sys = mem_alloc(sizeof(srv_sys_t));
-
- kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
- mutex_create(&kernel_mutex, SYNC_KERNEL);
-
- mutex_create(&srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
-
- srv_sys->threads = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- slot = srv_table_get_nth_slot(i);
- slot->in_use = FALSE;
- slot->type=0; /* Avoid purify errors */
- slot->event = os_event_create(NULL);
- ut_a(slot->event);
- }
-
- srv_mysql_table = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- slot = srv_mysql_table + i;
- slot->in_use = FALSE;
- slot->type = 0;
- slot->event = os_event_create(NULL);
- ut_a(slot->event);
- }
-
- srv_lock_timeout_thread_event = os_event_create(NULL);
-
- for (i = 0; i < SRV_MASTER + 1; i++) {
- srv_n_threads_active[i] = 0;
- srv_n_threads[i] = 0;
- srv_meter[i] = 30;
- srv_meter_low_water[i] = 50;
- srv_meter_high_water[i] = 100;
- srv_meter_high_water2[i] = 200;
- srv_meter_foreground[i] = 250;
- }
-
- UT_LIST_INIT(srv_sys->tasks);
-
- /* create dummy table and index for old-style infimum and supremum */
- table = dict_mem_table_create("SYS_DUMMY1",
- DICT_HDR_SPACE, 1, 0);
- dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
- DATA_ENGLISH | DATA_NOT_NULL, 8);
-
- srv_sys->dummy_ind1 = dict_mem_index_create(
- "SYS_DUMMY1", "SYS_DUMMY1", DICT_HDR_SPACE, 0, 1);
- dict_index_add_col(srv_sys->dummy_ind1, table, (dict_col_t*)
- dict_table_get_nth_col(table, 0), 0);
- srv_sys->dummy_ind1->table = table;
- /* create dummy table and index for new-style infimum and supremum */
- table = dict_mem_table_create("SYS_DUMMY2",
- DICT_HDR_SPACE, 1, DICT_TF_COMPACT);
- dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR,
- DATA_ENGLISH | DATA_NOT_NULL, 8);
- srv_sys->dummy_ind2 = dict_mem_index_create(
- "SYS_DUMMY2", "SYS_DUMMY2", DICT_HDR_SPACE, 0, 1);
- dict_index_add_col(srv_sys->dummy_ind2, table, (dict_col_t*)
- dict_table_get_nth_col(table, 0), 0);
- srv_sys->dummy_ind2->table = table;
-
- /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
- srv_sys->dummy_ind1->cached = srv_sys->dummy_ind2->cached = TRUE;
-
- /* Init the server concurrency restriction data structures */
-
- os_fast_mutex_init(&srv_conc_mutex);
-
- UT_LIST_INIT(srv_conc_queue);
-
- srv_conc_slots = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- conc_slot = srv_conc_slots + i;
- conc_slot->reserved = FALSE;
- conc_slot->event = os_event_create(NULL);
- ut_a(conc_slot->event);
- }
-}
-
-/*************************************************************************
-Frees the OS fast mutex created in srv_init(). */
-
-void
-srv_free(void)
-/*==========*/
-{
- os_fast_mutex_free(&srv_conc_mutex);
-}
-
-/*************************************************************************
-Initializes the synchronization primitives, memory system, and the thread
-local storage. */
-
-void
-srv_general_init(void)
-/*==================*/
-{
- os_sync_init();
- sync_init();
- mem_init(srv_mem_pool_size);
- thr_local_init();
-}
-
-/*======================= InnoDB Server FIFO queue =======================*/
-
-/* Maximum allowable purge history length. <=0 means 'infinite'. */
-ulong srv_max_purge_lag = 0;
-
-/*************************************************************************
-Puts an OS thread to wait if there are too many concurrent threads
-(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
-
-#ifdef UNIV_SYNC_ATOMIC
-static void
-inc_srv_conc_n_threads(lint *n_threads)
-{
- *n_threads = os_atomic_increment(&srv_conc_n_threads, 1);
-}
-
-static void
-dec_srv_conc_n_threads()
-{
- os_atomic_increment(&srv_conc_n_threads, -1);
-}
-#endif
-
-static void
-print_already_in_error(trx_t* trx)
-{
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: trying to declare trx"
- " to enter InnoDB, but\n"
- "InnoDB: it already is declared.\n", stderr);
- trx_print(stderr, trx, 0);
- putc('\n', stderr);
- return;
-}
-
-#ifdef UNIV_SYNC_ATOMIC
-static void
-enter_innodb_with_tickets(trx_t* trx)
-{
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
- return;
-}
-
-static void
-srv_conc_enter_innodb_timer_based(trx_t* trx)
-{
- lint conc_n_threads;
- ibool has_yielded = FALSE;
- ulint has_slept = 0;
-
- if (trx->declared_to_be_inside_innodb) {
- print_already_in_error(trx);
- }
-retry:
- if (srv_conc_n_threads < (lint) srv_thread_concurrency) {
- inc_srv_conc_n_threads(&conc_n_threads);
- if (conc_n_threads <= (lint) srv_thread_concurrency) {
- enter_innodb_with_tickets(trx);
- return;
- }
- dec_srv_conc_n_threads(&conc_n_threads);
- }
- if (!has_yielded)
- {
- has_yielded = TRUE;
- os_thread_yield();
- goto retry;
- }
- if (trx->has_search_latch
- || NULL != UT_LIST_GET_FIRST(trx->trx_locks)) {
-
- inc_srv_conc_n_threads(&conc_n_threads);
- enter_innodb_with_tickets(trx);
- return;
- }
- if (has_slept < 2)
- {
- trx->op_info = "sleeping before entering InnoDB";
- os_thread_sleep(10000);
- trx->op_info = "";
- has_slept++;
- }
- inc_srv_conc_n_threads(&conc_n_threads);
- enter_innodb_with_tickets(trx);
- return;
-}
-
-static void
-srv_conc_exit_innodb_timer_based(trx_t* trx)
-{
- dec_srv_conc_n_threads();
- trx->declared_to_be_inside_innodb = FALSE;
- trx->n_tickets_to_enter_innodb = 0;
- return;
-}
-#endif
-
-void
-srv_conc_enter_innodb(
-/*==================*/
- trx_t* trx) /* in: transaction object associated with the
- thread */
-{
- ibool has_slept = FALSE;
- srv_conc_slot_t* slot = NULL;
- ulint i;
-
- if (trx->mysql_thd != NULL
- && thd_is_replication_slave_thread(trx->mysql_thd)) {
-
- /* TODO Do something more interesting (based on a config
- parameter). Some users what to give the replication
- thread very low priority, see http://bugs.mysql.com/25078
- This can be done by introducing
- innodb_replication_delay(ms) config parameter */
- return;
- }
-
- /* If trx has 'free tickets' to enter the engine left, then use one
- such ticket */
-
- if (trx->n_tickets_to_enter_innodb > 0) {
- trx->n_tickets_to_enter_innodb--;
-
- return;
- }
-
-#ifdef UNIV_SYNC_ATOMIC
- if (srv_thread_concurrency_timer_based) {
- srv_conc_enter_innodb_timer_based(trx);
- return;
- }
-#endif
-
- os_fast_mutex_lock(&srv_conc_mutex);
-retry:
- if (trx->declared_to_be_inside_innodb) {
- print_already_in_error(trx);
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- return;
- }
-
- ut_ad(srv_conc_n_threads >= 0);
-
- if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
-
- srv_conc_n_threads++;
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- return;
- }
-
- /* If the transaction is not holding resources, let it sleep
- for SRV_THREAD_SLEEP_DELAY microseconds, and try again then */
-
- if (!has_slept && !trx->has_search_latch
- && NULL == UT_LIST_GET_FIRST(trx->trx_locks)) {
-
- has_slept = TRUE; /* We let it sleep only once to avoid
- starvation */
-
- srv_conc_n_waiting_threads++;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- trx->op_info = "sleeping before joining InnoDB queue";
-
- /* Peter Zaitsev suggested that we take the sleep away
- altogether. But the sleep may be good in pathological
- situations of lots of thread switches. Simply put some
- threads aside for a while to reduce the number of thread
- switches. */
- if (SRV_THREAD_SLEEP_DELAY > 0) {
- os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
- }
-
- trx->op_info = "";
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- srv_conc_n_waiting_threads--;
-
- goto retry;
- }
-
- /* Too many threads inside: put the current thread to a queue */
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- slot = srv_conc_slots + i;
-
- if (!slot->reserved) {
-
- break;
- }
- }
-
- if (i == OS_THREAD_MAX_N) {
- /* Could not find a free wait slot, we must let the
- thread enter */
-
- srv_conc_n_threads++;
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = 0;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- return;
- }
-
- /* Release possible search system latch this thread has */
- if (trx->has_search_latch) {
- trx_search_latch_release_if_reserved(trx);
- }
-
- /* Add to the queue */
- slot->reserved = TRUE;
- slot->wait_ended = FALSE;
-
- UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
-
- os_event_reset(slot->event);
-
- srv_conc_n_waiting_threads++;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- /* Go to wait for the event; when a thread leaves InnoDB it will
- release this thread */
-
- trx->op_info = "waiting in InnoDB queue";
-
- os_event_wait(slot->event);
-
- trx->op_info = "";
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- srv_conc_n_waiting_threads--;
-
- /* NOTE that the thread which released this thread already
- incremented the thread counter on behalf of this thread */
-
- slot->reserved = FALSE;
-
- UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
-
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-}
-
-/*************************************************************************
-This lets a thread enter InnoDB regardless of the number of threads inside
-InnoDB. This must be called when a thread ends a lock wait. */
-
-void
-srv_conc_force_enter_innodb(
-/*========================*/
- trx_t* trx) /* in: transaction object associated with the
- thread */
-{
-
- if (UNIV_LIKELY(!srv_thread_concurrency)) {
-
- return;
- }
-
- ut_ad(srv_conc_n_threads >= 0);
-#ifdef UNIV_SYNC_ATOMIC
- if (srv_thread_concurrency_timer_based) {
- lint conc_n_threads;
-
- inc_srv_conc_n_threads(&conc_n_threads);
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = 1;
- return;
- }
-#endif
- os_fast_mutex_lock(&srv_conc_mutex);
- srv_conc_n_threads++;
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = 1;
- os_fast_mutex_unlock(&srv_conc_mutex);
-}
-
-/*************************************************************************
-This must be called when a thread exits InnoDB in a lock wait or at the
-end of an SQL statement. */
-
-void
-srv_conc_force_exit_innodb(
-/*=======================*/
- trx_t* trx) /* in: transaction object associated with the
- thread */
-{
- srv_conc_slot_t* slot = NULL;
-
- if (trx->mysql_thd != NULL
- && thd_is_replication_slave_thread(trx->mysql_thd)) {
-
- return;
- }
-
- if (trx->declared_to_be_inside_innodb == FALSE) {
-
- return;
- }
-
-#ifdef UNIV_SYNC_ATOMIC
- if (srv_thread_concurrency_timer_based)
- {
- srv_conc_exit_innodb_timer_based(trx);
- return;
- }
-#endif
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- ut_ad(srv_conc_n_threads > 0);
- srv_conc_n_threads--;
- trx->declared_to_be_inside_innodb = FALSE;
- trx->n_tickets_to_enter_innodb = 0;
-
- if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
- /* Look for a slot where a thread is waiting and no other
- thread has yet released the thread */
-
- slot = UT_LIST_GET_FIRST(srv_conc_queue);
-
- while (slot && slot->wait_ended == TRUE) {
- slot = UT_LIST_GET_NEXT(srv_conc_queue, slot);
- }
-
- if (slot != NULL) {
- slot->wait_ended = TRUE;
-
- /* We increment the count on behalf of the released
- thread */
-
- srv_conc_n_threads++;
- }
- }
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- if (slot != NULL) {
- os_event_set(slot->event);
- }
-}
-
-/*************************************************************************
-This must be called when a thread exits InnoDB. */
-
-void
-srv_conc_exit_innodb(
-/*=================*/
- trx_t* trx) /* in: transaction object associated with the
- thread */
-{
- if (trx->n_tickets_to_enter_innodb > 0) {
- /* We will pretend the thread is still inside InnoDB though it
- now leaves the InnoDB engine. In this way we save
- a lot of semaphore operations. srv_conc_force_exit_innodb is
- used to declare the thread definitely outside InnoDB. It
- should be called when there is a lock wait or an SQL statement
- ends. */
-
- return;
- }
-
- srv_conc_force_exit_innodb(trx);
-}
-
-/*========================================================================*/
-
-/*************************************************************************
-Normalizes init parameter values to use units we use inside InnoDB. */
-static
-ulint
-srv_normalize_init_values(void)
-/*===========================*/
- /* out: DB_SUCCESS or error code */
-{
- ulint n;
- ulint i;
-
- n = srv_n_data_files;
-
- for (i = 0; i < n; i++) {
- srv_data_file_sizes[i] = srv_data_file_sizes[i]
- * ((1024 * 1024) / UNIV_PAGE_SIZE);
- }
-
- srv_last_file_size_max = srv_last_file_size_max
- * ((1024 * 1024) / UNIV_PAGE_SIZE);
-
- srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
-
- srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
-
- srv_pool_size = srv_pool_size / (UNIV_PAGE_SIZE / 1024);
-
- srv_awe_window_size = srv_awe_window_size / UNIV_PAGE_SIZE;
-
- if (srv_use_awe) {
- /* If we are using AWE we must save memory in the 32-bit
- address space of the process, and cannot bind the lock
- table size to the real buffer pool size. */
-
- srv_lock_table_size = 20 * srv_awe_window_size;
- } else {
- srv_lock_table_size = 5 * srv_pool_size;
- }
-
- return(DB_SUCCESS);
-}
-
-/*************************************************************************
-Boots the InnoDB server. */
-
-ulint
-srv_boot(void)
-/*==========*/
- /* out: DB_SUCCESS or error code */
-{
- ulint err;
-
- /* Transform the init parameter values given by MySQL to
- use units we use inside InnoDB: */
-
- err = srv_normalize_init_values();
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- /* Initialize synchronization primitives, memory management, and thread
- local storage */
-
- srv_general_init();
-
- /* Initialize this module */
-
- srv_init();
-
- return(DB_SUCCESS);
-}
-
-#ifndef UNIV_HOTBACKUP
-/*************************************************************************
-Reserves a slot in the thread table for the current MySQL OS thread.
-NOTE! The kernel mutex has to be reserved by the caller! */
-static
-srv_slot_t*
-srv_table_reserve_slot_for_mysql(void)
-/*==================================*/
- /* out: reserved slot */
-{
- srv_slot_t* slot;
- ulint i;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- i = 0;
- slot = srv_mysql_table + i;
-
- while (slot->in_use) {
- i++;
-
- if (i >= OS_THREAD_MAX_N) {
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: There appear to be %lu MySQL"
- " threads currently waiting\n"
- "InnoDB: inside InnoDB, which is the"
- " upper limit. Cannot continue operation.\n"
- "InnoDB: We intentionally generate"
- " a seg fault to print a stack trace\n"
- "InnoDB: on Linux. But first we print"
- " a list of waiting threads.\n", (ulong) i);
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = srv_mysql_table + i;
-
- fprintf(stderr,
- "Slot %lu: thread id %lu, type %lu,"
- " in use %lu, susp %lu, time %lu\n",
- (ulong) i,
- (ulong) os_thread_pf(slot->id),
- (ulong) slot->type,
- (ulong) slot->in_use,
- (ulong) slot->suspended,
- (ulong) difftime(ut_time(),
- slot->suspend_time));
- }
-
- ut_error;
- }
-
- slot = srv_mysql_table + i;
- }
-
- ut_a(slot->in_use == FALSE);
-
- slot->in_use = TRUE;
- slot->id = os_thread_get_curr_id();
- slot->handle = os_thread_get_curr();
-
- return(slot);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*******************************************************************
-Puts a MySQL OS thread to wait for a lock to be released. If an error
-occurs during the wait trx->error_state associated with thr is
-!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
-are possible errors. DB_DEADLOCK is returned if selective deadlock
-resolution chose this transaction as a victim. */
-
-void
-srv_suspend_mysql_thread(
-/*=====================*/
- que_thr_t* thr) /* in: query thread associated with the MySQL
- OS thread */
-{
-#ifndef UNIV_HOTBACKUP
- srv_slot_t* slot;
- os_event_t event;
- double wait_time;
- trx_t* trx;
- ibool had_dict_lock = FALSE;
- ibool was_declared_inside_innodb = FALSE;
- ib_longlong start_time = 0;
- ib_longlong finish_time;
- ulint diff_time;
- ulint sec;
- ulint ms;
-
- ut_ad(!mutex_own(&kernel_mutex));
-
- trx = thr_get_trx(thr);
-
- os_event_set(srv_lock_timeout_thread_event);
-
- mutex_enter(&kernel_mutex);
-
- trx->error_state = DB_SUCCESS;
-
- if (thr->state == QUE_THR_RUNNING) {
-
- ut_ad(thr->is_active == TRUE);
-
- /* The lock has already been released or this transaction
- was chosen as a deadlock victim: no need to suspend */
-
- if (trx->was_chosen_as_deadlock_victim) {
-
- trx->error_state = DB_DEADLOCK;
- trx->was_chosen_as_deadlock_victim = FALSE;
- }
-
- mutex_exit(&kernel_mutex);
-
- return;
- }
-
- ut_ad(thr->is_active == FALSE);
-
- slot = srv_table_reserve_slot_for_mysql();
-
- event = slot->event;
-
- slot->thr = thr;
-
- os_event_reset(event);
-
- slot->suspend_time = ut_time();
-
- if (thr->lock_state == QUE_THR_LOCK_ROW) {
- srv_n_lock_wait_count++;
- srv_n_lock_wait_current_count++;
-
- if (ut_usectime(&sec, &ms) == -1) {
- start_time = -1;
- } else {
- start_time = (ib_longlong)sec * 1000000 + ms;
- }
- }
- /* Wake the lock timeout monitor thread, if it is suspended */
-
- os_event_set(srv_lock_timeout_thread_event);
-
- mutex_exit(&kernel_mutex);
-
- if (trx->declared_to_be_inside_innodb) {
-
- was_declared_inside_innodb = TRUE;
-
- /* We must declare this OS thread to exit InnoDB, since a
- possible other thread holding a lock which this thread waits
- for must be allowed to enter, sooner or later */
-
- srv_conc_force_exit_innodb(trx);
- }
-
- /* Release possible foreign key check latch */
- if (trx->dict_operation_lock_mode == RW_S_LATCH) {
-
- had_dict_lock = TRUE;
-
- row_mysql_unfreeze_data_dictionary(trx);
- }
-
- ut_a(trx->dict_operation_lock_mode == 0);
-
- /* Wait for the release */
-
- os_event_wait(event);
-
- if (had_dict_lock) {
-
- row_mysql_freeze_data_dictionary(trx);
- }
-
- if (was_declared_inside_innodb) {
-
- /* Return back inside InnoDB */
-
- srv_conc_force_enter_innodb(trx);
- }
-
- mutex_enter(&kernel_mutex);
-
- /* Release the slot for others to use */
-
- slot->in_use = FALSE;
-
- wait_time = ut_difftime(ut_time(), slot->suspend_time);
-
- if (thr->lock_state == QUE_THR_LOCK_ROW) {
- if (ut_usectime(&sec, &ms) == -1) {
- finish_time = -1;
- } else {
- finish_time = (ib_longlong)sec * 1000000 + ms;
- }
-
- diff_time = (ulint) (finish_time - start_time);
-
- srv_n_lock_wait_current_count--;
- srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time;
- if (diff_time > srv_n_lock_max_wait_time &&
- /* only update the variable if we successfully
- retrieved the start and finish times. See Bug#36819. */
- start_time != -1 && finish_time != -1) {
- srv_n_lock_max_wait_time = diff_time;
- }
- }
-
- if (trx->was_chosen_as_deadlock_victim) {
-
- trx->error_state = DB_DEADLOCK;
- trx->was_chosen_as_deadlock_victim = FALSE;
- }
-
- mutex_exit(&kernel_mutex);
-
- if (srv_lock_wait_timeout < 100000000
- && wait_time > (double)srv_lock_wait_timeout) {
-
- trx->error_state = DB_LOCK_WAIT_TIMEOUT;
- }
-#else /* UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
-#endif /* UNIV_HOTBACKUP */
-}
-
-/************************************************************************
-Releases a MySQL OS thread waiting for a lock to be released, if the
-thread is already suspended. */
-
-void
-srv_release_mysql_thread_if_suspended(
-/*==================================*/
- que_thr_t* thr) /* in: query thread associated with the
- MySQL OS thread */
-{
-#ifndef UNIV_HOTBACKUP
- srv_slot_t* slot;
- ulint i;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = srv_mysql_table + i;
-
- if (slot->in_use && slot->thr == thr) {
- /* Found */
-
- os_event_set(slot->event);
-
- return;
- }
- }
-
- /* not found */
-#else /* UNIV_HOTBACKUP */
- /* This function depends on MySQL code that is not included in
- InnoDB Hot Backup builds. Besides, this function should never
- be called in InnoDB Hot Backup. */
- ut_error;
-#endif /* UNIV_HOTBACKUP */
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************
-Refreshes the values used to calculate per-second averages. */
-static
-ibool
-srv_refresh_innodb_monitor_stats(void)
-/*==================================*/
-{
- /* Sometimes we will skip stats update to avoid deadlock, since
- since this function is called by the background wake-up thread */
- if (mutex_enter_nowait(&srv_innodb_monitor_mutex)) {
- /* mutex_enter_nowait returns 1 on failure */
- return FALSE;
- }
-
- srv_last_monitor_time = time(NULL);
-
- os_aio_refresh_stats();
-
- btr_cur_n_sea_old = btr_cur_n_sea;
- btr_cur_n_non_sea_old = btr_cur_n_non_sea;
-
- log_refresh_stats();
-
- buf_refresh_io_stats();
-
- srv_n_rows_inserted_old = srv_n_rows_inserted;
- srv_n_rows_updated_old = srv_n_rows_updated;
- srv_n_rows_deleted_old = srv_n_rows_deleted;
- srv_n_rows_read_old = srv_n_rows_read;
-
- mutex_exit(&srv_innodb_monitor_mutex);
- return TRUE;
-}
-
-/**********************************************************************
-Outputs to a file the output of the InnoDB Monitor. */
-
-void
-srv_printf_innodb_monitor(
-/*======================*/
- FILE* file) /* in: output stream */
-{
- double time_elapsed;
- time_t current_time;
- ulint n_reserved;
-
- mutex_enter(&srv_innodb_monitor_mutex);
-
- current_time = time(NULL);
-
- /* We add 0.001 seconds to time_elapsed to prevent division
- by zero if two users happen to call SHOW INNODB STATUS at the same
- time */
-
- time_elapsed = difftime(current_time, srv_last_monitor_time)
- + 0.001;
-
- srv_last_monitor_time = time(NULL);
-
- fputs("\n=====================================\n", file);
-
- ut_print_timestamp(file);
- fprintf(file,
- " INNODB MONITOR OUTPUT\n"
- "=====================================\n"
- "Per second averages calculated from the last %lu seconds\n",
- (ulong)time_elapsed);
-
- fputs("----------\n"
- "BACKGROUND THREAD\n"
- "----------\n", file);
- srv_print_extra(file);
-
- fputs("----------\n"
- "SEMAPHORES\n"
- "----------\n", file);
- sync_print(file);
-
- /* Conceptually, srv_innodb_monitor_mutex has a very high latching
- order level in sync0sync.h, while dict_foreign_err_mutex has a very
- low level 135. Therefore we can reserve the latter mutex here without
- a danger of a deadlock of threads. */
-
- mutex_enter(&dict_foreign_err_mutex);
-
- if (ftell(dict_foreign_err_file) != 0L) {
- fputs("------------------------\n"
- "LATEST FOREIGN KEY ERROR\n"
- "------------------------\n", file);
- ut_copy_file(file, dict_foreign_err_file);
- }
-
- mutex_exit(&dict_foreign_err_mutex);
-
- /* Print open transaction details */
- lock_print_info_summary(file);
-
- if (trx_start) {
- long t = ftell(file);
- if (t < 0) {
- *trx_start = ULINT_UNDEFINED;
- } else {
- *trx_start = (ulint) t;
- }
- }
- lock_print_info_all_transactions(file);
- if (trx_end) {
- long t = ftell(file);
- if (t < 0) {
- *trx_end = ULINT_UNDEFINED;
- } else {
- *trx_end = (ulint) t;
- }
- }
-
- fputs("--------\n"
- "FILE I/O\n"
- "--------\n", file);
- os_aio_print(file);
-
- fputs("-------------------------------------\n"
- "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
- "-------------------------------------\n", file);
- ibuf_print(file);
-
- ha_print_info(file, btr_search_sys->hash_index);
-
- fprintf(file,
- "%.2f hash searches/s, %.2f non-hash searches/s\n",
- (btr_cur_n_sea - btr_cur_n_sea_old)
- / time_elapsed,
- (btr_cur_n_non_sea - btr_cur_n_non_sea_old)
- / time_elapsed);
- btr_cur_n_sea_old = btr_cur_n_sea;
- btr_cur_n_non_sea_old = btr_cur_n_non_sea;
-
- fputs("---\n"
- "LOG\n"
- "---\n", file);
- log_print(file);
-
- fputs("----------------------\n"
- "BUFFER POOL AND MEMORY\n"
- "----------------------\n", file);
- fprintf(file,
- "Total memory allocated " ULINTPF
- "; in additional pool allocated " ULINTPF "\n",
- ut_total_allocated_memory,
- mem_pool_get_reserved(mem_comm_pool));
- fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
- dict_sys->size);
-
- if (srv_use_awe) {
- fprintf(file,
- "In addition to that %lu MB of AWE memory allocated\n",
- (ulong) (srv_pool_size
- / ((1024 * 1024) / UNIV_PAGE_SIZE)));
- }
-
- buf_print_io(file);
-
- fputs("--------------\n"
- "ROW OPERATIONS\n"
- "--------------\n", file);
- fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
- (long) srv_conc_n_threads,
- (ulong) srv_conc_n_waiting_threads);
-
- fprintf(file, "%lu read views open inside InnoDB\n",
- UT_LIST_GET_LEN(trx_sys->view_list));
-
- n_reserved = fil_space_get_n_reserved_extents(0);
- if (n_reserved > 0) {
- fprintf(file,
- "%lu tablespace extents now reserved for"
- " B-tree split operations\n",
- (ulong) n_reserved);
- }
-
-#ifdef UNIV_LINUX
- fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
- (ulong) srv_main_thread_process_no,
- (ulong) srv_main_thread_id,
- srv_main_thread_op_info);
-#else
- fprintf(file, "Main thread id %lu, state: %s\n",
- (ulong) srv_main_thread_id,
- srv_main_thread_op_info);
-#endif
- fprintf(file,
- "Number of rows inserted " ULINTPF
- ", updated " ULINTPF ", deleted " ULINTPF
- ", read " ULINTPF "\n",
- srv_n_rows_inserted,
- srv_n_rows_updated,
- srv_n_rows_deleted,
- srv_n_rows_read);
- fprintf(file,
- "%.2f inserts/s, %.2f updates/s,"
- " %.2f deletes/s, %.2f reads/s\n",
- (srv_n_rows_inserted - srv_n_rows_inserted_old)
- / time_elapsed,
- (srv_n_rows_updated - srv_n_rows_updated_old)
- / time_elapsed,
- (srv_n_rows_deleted - srv_n_rows_deleted_old)
- / time_elapsed,
- (srv_n_rows_read - srv_n_rows_read_old)
- / time_elapsed);
-
- srv_n_rows_inserted_old = srv_n_rows_inserted;
- srv_n_rows_updated_old = srv_n_rows_updated;
- srv_n_rows_deleted_old = srv_n_rows_deleted;
- srv_n_rows_read_old = srv_n_rows_read;
-
- fputs("----------------------------\n"
- "END OF INNODB MONITOR OUTPUT\n"
- "============================\n", file);
- mutex_exit(&srv_innodb_monitor_mutex);
- fflush(file);
-}
-
-/**********************************************************************
-Function to pass InnoDB status variables to MySQL */
-
-void
-srv_export_innodb_status(void)
-{
- mutex_enter(&srv_innodb_monitor_mutex);
-
- export_vars.innodb_data_pending_reads
- = os_n_pending_reads;
- export_vars.innodb_data_pending_writes
- = os_n_pending_writes;
- export_vars.innodb_data_pending_fsyncs
- = fil_n_pending_log_flushes
- + fil_n_pending_tablespace_flushes;
- export_vars.innodb_data_fsyncs = os_n_fsyncs;
- export_vars.innodb_data_read = srv_data_read;
- export_vars.innodb_data_reads = os_n_file_reads;
- export_vars.innodb_data_writes = os_n_file_writes;
- export_vars.innodb_data_written = srv_data_written;
- export_vars.innodb_buffer_pool_read_requests = buf_pool->n_page_gets;
- export_vars.innodb_buffer_pool_write_requests
- = srv_buf_pool_write_requests;
- export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
- export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
- export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
- export_vars.innodb_buffer_pool_read_ahead_rnd = srv_read_ahead_rnd;
- export_vars.innodb_buffer_pool_read_ahead_seq = srv_read_ahead_seq;
- export_vars.innodb_buffer_pool_pages_data
- = UT_LIST_GET_LEN(buf_pool->LRU);
- export_vars.innodb_buffer_pool_pages_dirty
- = UT_LIST_GET_LEN(buf_pool->flush_list);
- export_vars.innodb_buffer_pool_pages_free
- = UT_LIST_GET_LEN(buf_pool->free);
-#ifdef UNIV_DEBUG
- export_vars.innodb_buffer_pool_pages_latched
- = buf_get_latched_pages_number();
-#endif /* UNIV_DEBUG */
- export_vars.innodb_buffer_pool_pages_total = buf_pool->curr_size;
-
- export_vars.innodb_buffer_pool_pages_misc = buf_pool->max_size
- - UT_LIST_GET_LEN(buf_pool->LRU)
- - UT_LIST_GET_LEN(buf_pool->free);
-#ifdef UNIV_SYNC_ATOMIC
- export_vars.innodb_have_sync_atomic = 1;
-#else
- export_vars.innodb_have_sync_atomic = 0;
-#endif
-#ifdef UNIV_DISABLE_MEM_POOL
- export_vars.innodb_heap_enabled = 0;
-#else
- export_vars.innodb_heap_enabled = 1;
-#endif
- export_vars.innodb_page_size = UNIV_PAGE_SIZE;
- export_vars.innodb_log_waits = srv_log_waits;
- export_vars.innodb_os_log_written = srv_os_log_written;
- export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
- export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
- export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes;
- export_vars.innodb_log_write_requests = srv_log_write_requests;
- export_vars.innodb_log_writes = srv_log_writes;
- export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
- export_vars.innodb_dblwr_writes = srv_dblwr_writes;
- export_vars.innodb_pages_created = buf_pool->n_pages_created;
- export_vars.innodb_pages_read = buf_pool->n_pages_read;
- export_vars.innodb_pages_written = buf_pool->n_pages_written;
- export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
- export_vars.innodb_row_lock_current_waits
- = srv_n_lock_wait_current_count;
- export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000;
- if (srv_n_lock_wait_count > 0) {
- export_vars.innodb_row_lock_time_avg = (ulint)
- (srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count);
- } else {
- export_vars.innodb_row_lock_time_avg = 0;
- }
- export_vars.innodb_row_lock_time_max
- = srv_n_lock_max_wait_time / 1000;
- export_vars.innodb_rows_read = srv_n_rows_read;
- export_vars.innodb_rows_inserted = srv_n_rows_inserted;
- export_vars.innodb_rows_updated = srv_n_rows_updated;
- export_vars.innodb_rows_deleted = srv_n_rows_deleted;
- export_vars.innodb_wake_ups = sync_wake_ups;
-
- mutex_exit(&srv_innodb_monitor_mutex);
-}
-
-/*************************************************************************
-A thread which wakes up threads whose lock wait may have lasted too long.
-This also prints the info output by various InnoDB monitors. */
-
-os_thread_ret_t
-srv_lock_timeout_and_monitor_thread(
-/*================================*/
- /* out: a dummy parameter */
- void* arg __attribute__((unused)))
- /* in: a dummy parameter required by
- os_thread_create */
-{
- srv_slot_t* slot;
- double time_elapsed;
- time_t current_time;
- time_t last_table_monitor_time;
- time_t last_tablespace_monitor_time;
- time_t last_monitor_time;
- ibool some_waits;
- double wait_time;
- ulint i;
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Lock timeout thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
- UT_NOT_USED(arg);
- srv_last_monitor_time = time(NULL);
- last_table_monitor_time = time(NULL);
- last_tablespace_monitor_time = time(NULL);
- last_monitor_time = time(NULL);
-loop:
- srv_lock_timeout_and_monitor_active = TRUE;
-
- /* When someone is waiting for a lock, we wake up every second
- and check if a timeout has passed for a lock wait */
-
- os_thread_sleep(1000000);
-
- current_time = time(NULL);
-
- time_elapsed = difftime(current_time, last_monitor_time);
-
- if (time_elapsed > 15) {
- last_monitor_time = time(NULL);
-
- if (srv_print_innodb_monitor) {
- srv_printf_innodb_monitor(stderr);
- }
-
- if (srv_innodb_status) {
- mutex_enter(&srv_monitor_file_mutex);
- rewind(srv_monitor_file);
- srv_printf_innodb_monitor(srv_monitor_file);
- os_file_set_eof(srv_monitor_file);
- mutex_exit(&srv_monitor_file_mutex);
- }
-
- if (srv_print_innodb_tablespace_monitor
- && difftime(current_time,
- last_tablespace_monitor_time) > 60) {
- last_tablespace_monitor_time = time(NULL);
-
- fputs("========================"
- "========================\n",
- stderr);
-
- ut_print_timestamp(stderr);
-
- fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
- "========================"
- "========================\n",
- stderr);
-
- fsp_print(0);
- fputs("Validating tablespace\n", stderr);
- fsp_validate(0);
- fputs("Validation ok\n"
- "---------------------------------------\n"
- "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
- "=======================================\n",
- stderr);
- }
-
- if (srv_print_innodb_table_monitor
- && difftime(current_time, last_table_monitor_time) > 60) {
-
- last_table_monitor_time = time(NULL);
-
- fputs("===========================================\n",
- stderr);
-
- ut_print_timestamp(stderr);
-
- fputs(" INNODB TABLE MONITOR OUTPUT\n"
- "===========================================\n",
- stderr);
- dict_print();
-
- fputs("-----------------------------------\n"
- "END OF INNODB TABLE MONITOR OUTPUT\n"
- "==================================\n",
- stderr);
- }
- }
-
- mutex_enter(&kernel_mutex);
-
- some_waits = FALSE;
-
- /* Check of all slots if a thread is waiting there, and if it
- has exceeded the time limit */
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = srv_mysql_table + i;
-
- if (slot->in_use) {
- some_waits = TRUE;
-
- wait_time = ut_difftime(ut_time(), slot->suspend_time);
-
- if (srv_lock_wait_timeout < 100000000
- && (wait_time > (double) srv_lock_wait_timeout
- || wait_time < 0)) {
-
- /* Timeout exceeded or a wrap-around in system
- time counter: cancel the lock request queued
- by the transaction and release possible
- other transactions waiting behind; it is
- possible that the lock has already been
- granted: in that case do nothing */
-
- if (thr_get_trx(slot->thr)->wait_lock) {
- lock_cancel_waiting_and_release(
- thr_get_trx(slot->thr)
- ->wait_lock);
- }
- }
- }
- }
-
- os_event_reset(srv_lock_timeout_thread_event);
-
- mutex_exit(&kernel_mutex);
-
- if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
- goto exit_func;
- }
-
- if (some_waits || srv_print_innodb_monitor
- || srv_print_innodb_lock_monitor
- || srv_print_innodb_tablespace_monitor
- || srv_print_innodb_table_monitor) {
- goto loop;
- }
-
- /* No one was waiting for a lock and no monitor was active:
- suspend this thread */
-
- srv_lock_timeout_and_monitor_active = FALSE;
-
-#if 0
- /* The following synchronisation is disabled, since
- the InnoDB monitor output is to be updated every 15 seconds. */
- os_event_wait(srv_lock_timeout_thread_event);
-#endif
- goto loop;
-
-exit_func:
- srv_lock_timeout_and_monitor_active = FALSE;
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/*************************************************************************
-A thread which prints warnings about semaphore waits which have lasted
-too long. These can be used to track bugs which cause hangs.
-NOTE: This thread should not wait for any innodb mutexes or rw_locks.
-A deadlock could arise where the thread holding that lock requires waking
-by this background thread while this thread is blocked on that lock. */
-
-os_thread_ret_t
-srv_error_monitor_thread(
-/*=====================*/
- /* out: a dummy parameter */
- void* arg __attribute__((unused)))
- /* in: a dummy parameter required by
- os_thread_create */
-{
- /* number of successive fatal timeouts observed */
- ulint fatal_cnt = 0;
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Error monitor thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
-loop:
- srv_error_monitor_active = TRUE;
-
- if (difftime(time(NULL), srv_last_monitor_time) > 60) {
- /* We refresh InnoDB Monitor values so that averages are
- printed from at most 60 last seconds */
-
- srv_refresh_innodb_monitor_stats();
- }
-
- /* In case mutex_exit is not a memory barrier, it is
- theoretically possible some threads are left waiting though
- the semaphore is already released. Wake up those threads: */
-
- sync_arr_wake_threads_if_sema_free();
-
- if (sync_array_print_long_waits()) {
- fatal_cnt++;
- if (fatal_cnt > 10) {
-
- fprintf(stderr,
- "InnoDB: Error: semaphore wait has lasted"
- " > %lu seconds\n"
- "InnoDB: We intentionally crash the server,"
- " because it appears to be hung.\n",
- (ulong) srv_fatal_semaphore_wait_threshold);
-
- ut_error;
- }
- } else {
- fatal_cnt = 0;
- }
-
- /* Flush stderr so that a database user gets the output
- to possible MySQL error file */
-
- fflush(stderr);
-
- os_thread_sleep(1000000);
-
- if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
-
- goto loop;
- }
-
- srv_error_monitor_active = FALSE;
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/***********************************************************************
-Tells the InnoDB server that there has been activity in the database
-and wakes up the master thread if it is suspended (not sleeping). Used
-in the MySQL interface. Note that there is a small chance that the master
-thread stays suspended (we do not protect our operation with the kernel
-mutex, for performace reasons). */
-
-void
-srv_active_wake_master_thread(void)
-/*===============================*/
-{
- srv_activity_count++;
-
- if (srv_n_threads_active[SRV_MASTER] == 0) {
-
- mutex_enter(&kernel_mutex);
-
- srv_release_threads(SRV_MASTER, 1);
-
- mutex_exit(&kernel_mutex);
- }
-}
-
-/***********************************************************************
-Wakes up the master thread if it is suspended or being suspended. */
-
-void
-srv_wake_master_thread(void)
-/*========================*/
-{
- srv_activity_count++;
-
- mutex_enter(&kernel_mutex);
-
- srv_release_threads(SRV_MASTER, 1);
-
- mutex_exit(&kernel_mutex);
-}
-
-/*************************************************************************
-Returns the number of IO operations that is X percent of the capacity.
-
-PCT_IO(5) -> returns the number of IO operations that is 5% of the max
-where max is srv_io_capacity.
-*/
-#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0)))
-
-/*************************************************************************
-The master thread controlling the server. */
-
-os_thread_ret_t
-srv_master_thread(
-/*==============*/
- /* out: a dummy parameter */
- void* arg __attribute__((unused)))
- /* in: a dummy parameter required by
- os_thread_create */
-{
- os_event_t event;
- time_t last_flush_time;
- time_t current_time;
- ulint old_activity_count;
- ulint n_pages_purged;
- ulint n_bytes_merged;
- ulint n_pages_flushed;
- ulint n_bytes_archived;
- ulint n_tables_to_drop;
- ulint n_ios;
- ulint n_ios_old;
- ulint n_ios_very_old;
- ulint n_pend_ios;
- ibool skip_sleep = FALSE;
- ulint i;
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Master thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
- fprintf(stderr, "InnoDB master thread running with io_capacity %lu\n",
- srv_io_capacity);
-
- srv_main_thread_process_no = os_proc_get_number();
- srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
-
- srv_table_reserve_slot(SRV_MASTER);
-
- mutex_enter(&kernel_mutex);
-
- srv_n_threads_active[SRV_MASTER]++;
-
- mutex_exit(&kernel_mutex);
-
-loop:
- /*****************************************************************/
- /* ---- When there is database activity by users, we cycle in this
- loop */
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- n_ios_very_old = log_sys->n_log_ios + buf_pool->n_pages_read
- + buf_pool->n_pages_written;
- mutex_enter(&kernel_mutex);
-
- /* Store the user activity counter at the start of this loop */
- old_activity_count = srv_activity_count;
-
- mutex_exit(&kernel_mutex);
-
- if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
-
- goto suspend_thread;
- }
-
- /* ---- We run the following loop approximately once per second
- when there is database activity */
-
- skip_sleep = FALSE;
-
- for (i = 0; i < 10; i++) {
- n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read
- + buf_pool->n_pages_written;
- srv_main_thread_op_info = "sleeping";
- srv_main_1_second_loops++;
-
- if (!skip_sleep) {
-
- os_thread_sleep(1000000);
- srv_main_sleeps++;
- }
-
- skip_sleep = FALSE;
-
- /* ALTER TABLE in MySQL requires on Unix that the table handler
- can drop tables lazily after there no longer are SELECT
- queries to them. */
-
- srv_main_thread_op_info = "doing background drop tables";
-
- row_drop_tables_for_mysql_in_background();
-
- srv_main_thread_op_info = "";
-
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
-
- goto background_loop;
- }
-
- /* We flush the log once in a second even if no commit
- is issued or the we have specified in my.cnf no flush
- at transaction commit */
-
- srv_main_thread_op_info = "flushing log";
- log_buffer_flush_to_disk();
- srv_sync_flush++;
-
- srv_main_thread_op_info = "making checkpoint";
- log_free_check();
-
- /* If i/os during one second sleep were less than 5% of
- capacity, we assume that there is free disk i/o capacity
- available, and it makes sense to do an insert buffer merge. */
-
- n_pend_ios = buf_get_n_pending_ios()
- + log_sys->n_pending_writes;
- n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
- + buf_pool->n_pages_written;
- if (n_pend_ios < PCT_IO(3) && (n_ios - n_ios_old < PCT_IO(5))) {
- srv_main_thread_op_info = "doing insert buffer merge";
- ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4);
-
- srv_main_thread_op_info = "flushing log";
-
- /* No fsync when srv_flush_log_at_trx_commit != 1 */
- log_buffer_flush_maybe_sync();
- srv_async_flush++;
- }
-
- if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
- > srv_max_buf_pool_modified_pct)) {
-
- /* Try to keep the number of modified pages in the
- buffer pool under the limit wished by the user */
-
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
- PCT_IO(100),
- ut_dulint_max);
-
- /* If we had to do the flush, it may have taken
- even more than 1 second, and also, there may be more
- to flush. Do not sleep 1 second during the next
- iteration of this loop. */
-
- skip_sleep = TRUE;
- }
-
- if (srv_activity_count == old_activity_count) {
-
- /* There is no user activity at the moment, go to
- the background loop */
-
- goto background_loop;
- }
- }
-
- /* ---- We perform the following code approximately once per
- 10 seconds when there is database activity */
-
-#ifdef MEM_PERIODIC_CHECK
- /* Check magic numbers of every allocated mem block once in 10
- seconds */
- mem_validate_all_blocks();
-#endif
- /* If i/os during the 10 second period were less than 200% of
- capacity, we assume that there is free disk i/o capacity
- available, and it makes sense to flush srv_io_capacity pages.
-
- Note that this is done regardless of the fraction of dirty
- pages relative to the max requested by the user. The one second
- loop above requests writes for that case. The writes done here
- are not required, and may be disabled. */
-
- n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
- n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
- + buf_pool->n_pages_written;
- if (srv_extra_dirty_writes &&
- n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) {
-
- srv_main_thread_op_info = "flushing buffer pool pages";
- buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max);
-
- srv_main_thread_op_info = "flushing log";
- /* No fsync when srv_flush_log_at_trx_commit != 1 */
- log_buffer_flush_maybe_sync();
- srv_async_flush++;
- }
-
- /* We run a batch of insert buffer merge every 10 seconds,
- even if the server were active */
-
- srv_main_thread_op_info = "doing insert buffer merge";
- ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4);
-
- srv_main_thread_op_info = "flushing log";
- /* No fsync when srv_flush_log_at_trx_commit != 1 */
- log_buffer_flush_maybe_sync();
- srv_async_flush++;
-
- /* We run a full purge every 10 seconds, even if the server
- were active */
-
- n_pages_purged = 1;
-
- last_flush_time = time(NULL);
-
- while (n_pages_purged) {
-
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
-
- goto background_loop;
- }
-
- srv_main_thread_op_info = "purging";
- n_pages_purged = trx_purge();
-
- current_time = time(NULL);
-
- if (difftime(current_time, last_flush_time) > 1) {
- srv_main_thread_op_info = "flushing log";
-
- log_buffer_flush_to_disk();
- last_flush_time = current_time;
- srv_sync_flush++;
- }
- }
-
- srv_main_thread_op_info = "flushing buffer pool pages";
-
- /* Flush a few oldest pages to make a new checkpoint younger */
-
- if (buf_get_modified_ratio_pct() > 70) {
-
- /* If there are lots of modified pages in the buffer pool
- (> 70 %), we assume we can afford reserving the disk(s) for
- the time it requires to flush 100 pages */
-
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
- PCT_IO(100),
- ut_dulint_max);
- } else {
- /* Otherwise, we only flush a small number of pages so that
- we do not unnecessarily use much disk i/o capacity from
- other work */
-
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
- PCT_IO(10),
- ut_dulint_max);
- }
-
- srv_main_thread_op_info = "making checkpoint";
-
- /* Make a new checkpoint about once in 10 seconds */
-
- log_checkpoint(TRUE, FALSE);
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
-
- /* ---- When there is database activity, we jump from here back to
- the start of loop */
-
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
-
- mutex_exit(&kernel_mutex);
-
- /* If the database is quiet, we enter the background loop */
-
- /*****************************************************************/
-background_loop:
- /* ---- In this loop we run background operations when the server
- is quiet from user activity. Also in the case of a shutdown, we
- loop here, flushing the buffer pool to the data files. */
-
- /* The server has been quiet for a while: start running background
- operations */
- srv_main_background_loops++;
- srv_main_thread_op_info = "doing background drop tables";
-
- n_tables_to_drop = row_drop_tables_for_mysql_in_background();
-
- if (n_tables_to_drop > 0) {
- /* Do not monopolize the CPU even if there are tables waiting
- in the background drop queue. (It is essentially a bug if
- MySQL tries to drop a table while there are still open handles
- to it and we had to put it to the background drop queue.) */
-
- os_thread_sleep(100000);
- }
-
- srv_main_thread_op_info = "purging";
-
- /* Run a full purge */
-
- n_pages_purged = 1;
-
- last_flush_time = time(NULL);
-
- while (n_pages_purged) {
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
-
- break;
- }
-
- srv_main_thread_op_info = "purging";
- n_pages_purged = trx_purge();
-
- current_time = time(NULL);
-
- if (difftime(current_time, last_flush_time) > 1) {
- srv_main_thread_op_info = "flushing log";
-
- log_buffer_flush_to_disk();
- last_flush_time = current_time;
- srv_sync_flush++;
- }
- }
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
- mutex_exit(&kernel_mutex);
-
- srv_main_thread_op_info = "doing insert buffer merge";
-
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
- n_bytes_merged = 0;
- } else {
- /* This should do an amount of IO similar to the number of
- * dirty pages that will be flushed in the call to
- * buf_flush_batch below. Otherwise, the system favors
- * clean pages over cleanup throughput. */
- n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IO(100));
- }
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
- mutex_exit(&kernel_mutex);
-
-flush_loop:
- srv_main_thread_op_info = "flushing buffer pool pages";
- srv_main_flush_loops++;
- if (srv_fast_shutdown < 2) {
- n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
- PCT_IO(100),
- ut_dulint_max);
- } else {
- /* In the fastest shutdown we do not flush the buffer pool
- to data files: we set n_pages_flushed to 0 artificially. */
-
- n_pages_flushed = 0;
- }
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
- mutex_exit(&kernel_mutex);
-
- srv_main_thread_op_info = "waiting for buffer pool flush to end";
- buf_flush_wait_batch_end(BUF_FLUSH_LIST);
-
- srv_main_thread_op_info = "flushing log";
-
- current_time = time(NULL);
- if (difftime(current_time, last_flush_time) > 1) {
- srv_main_thread_op_info = (char*) "flushing log";
- log_buffer_flush_to_disk();
- last_flush_time = current_time;
- srv_sync_flush++;
- } else {
- /* No fsync when srv_flush_log_at_trx_commit != 1 */
- log_buffer_flush_maybe_sync();
- srv_async_flush++;
- }
-
- srv_main_thread_op_info = "making checkpoint";
-
- log_checkpoint(TRUE, FALSE);
-
- if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) {
-
- /* Try to keep the number of modified pages in the
- buffer pool under the limit wished by the user */
-
- goto flush_loop;
- }
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
- mutex_exit(&kernel_mutex);
- /*
- srv_main_thread_op_info = "archiving log (if log archive is on)";
-
- log_archive_do(FALSE, &n_bytes_archived);
- */
- n_bytes_archived = 0;
-
- /* Keep looping in the background loop if still work to do */
-
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
- if (n_tables_to_drop + n_pages_flushed
- + n_bytes_archived != 0) {
-
- /* If we are doing a fast shutdown (= the default)
- we do not do purge or insert buffer merge. But we
- flush the buffer pool completely to disk.
- In a 'very fast' shutdown we do not flush the buffer
- pool to data files: we have set n_pages_flushed to
- 0 artificially. */
-
- goto background_loop;
- }
- } else if (n_tables_to_drop
- + n_pages_purged + n_bytes_merged + n_pages_flushed
- + n_bytes_archived != 0) {
- /* In a 'slow' shutdown we run purge and the insert buffer
- merge to completion */
-
- goto background_loop;
- }
-
- /* There is no work for background operations either: suspend
- master thread to wait for more server activity */
-
-suspend_thread:
- srv_main_thread_op_info = "suspending";
-
- mutex_enter(&kernel_mutex);
-
- if (row_get_background_drop_list_len_low() > 0) {
- mutex_exit(&kernel_mutex);
-
- goto loop;
- }
-
- event = srv_suspend_thread();
-
- mutex_exit(&kernel_mutex);
-
- /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
- waits for database activity to die down when converting < 4.1.x
- databases, and relies on this string being exactly as it is. InnoDB
- manual also mentions this string in several places. */
- srv_main_thread_op_info = "waiting for server activity";
-
- os_event_wait(event);
-
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
- /* This is only extra safety, the thread should exit
- already when the event wait ends */
-
- os_thread_exit(NULL);
- }
-
- /* When there is user activity, InnoDB will set the event and the
- main thread goes back to loop. */
-
- goto loop;
-
- OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
-}
-#endif /* !UNIV_HOTBACKUP */