diff options
Diffstat (limited to 'storage/xtradb/srv')
-rw-r--r-- | storage/xtradb/srv/srv0conc.cc | 618 | ||||
-rw-r--r-- | storage/xtradb/srv/srv0mon.cc | 1930 | ||||
-rw-r--r-- | storage/xtradb/srv/srv0srv.cc | 3511 | ||||
-rw-r--r-- | storage/xtradb/srv/srv0start.cc | 3284 |
4 files changed, 9343 insertions, 0 deletions
diff --git a/storage/xtradb/srv/srv0conc.cc b/storage/xtradb/srv/srv0conc.cc new file mode 100644 index 00000000000..6c15753246a --- /dev/null +++ b/storage/xtradb/srv/srv0conc.cc @@ -0,0 +1,618 @@ +/***************************************************************************** + +Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +Portions of this file contain modifications contributed and copyrighted +by Percona Inc.. Those modifications are +gratefully acknowledged and are described briefly in the InnoDB +documentation. The contributions by Percona Inc. are incorporated with +their permission, and subject to the conditions contained in the file +COPYING.Percona. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file srv/srv0conc.cc + +InnoDB concurrency manager + +Created 2011/04/18 Sunny Bains +*******************************************************/ + +#include "srv0srv.h" +#include "sync0sync.h" +#include "btr0types.h" +#include "trx0trx.h" + +#include "mysql/plugin.h" + +/** Number of times a thread is allowed to enter InnoDB within the same +SQL query after it has once got the ticket. */ +UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500; + +#ifdef HAVE_ATOMIC_BUILTINS +/** Maximum sleep delay (in micro-seconds), value of 0 disables it. */ +UNIV_INTERN ulong srv_adaptive_max_sleep_delay = 150000; +#endif /* HAVE_ATOMIC_BUILTINS */ + +UNIV_INTERN ulong srv_thread_sleep_delay = 10000; + + +/** We are prepared for a situation that we have this many threads waiting for +a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the +value. */ + +UNIV_INTERN ulint srv_max_n_threads = 0; + +/** The following controls how many threads we let inside InnoDB concurrently: +threads waiting for locks are not counted into the number because otherwise +we could get a deadlock. Value of 0 will disable the concurrency check. */ + +UNIV_INTERN ulong srv_thread_concurrency = 0; + +#ifndef HAVE_ATOMIC_BUILTINS + +/** This mutex protects srv_conc data structures */ +static os_fast_mutex_t srv_conc_mutex; + +/** Concurrency list node */ +typedef UT_LIST_NODE_T(struct srv_conc_slot_t) srv_conc_node_t; + +/** Slot for a thread waiting in the concurrency control queue. */ +struct srv_conc_slot_t{ + os_event_t event; /*!< event to wait */ + ibool reserved; /*!< TRUE if slot + reserved */ + ibool wait_ended; /*!< TRUE when another thread has + already set the event and the thread + in this slot is free to proceed; but + reserved may still be TRUE at that + point */ + srv_conc_node_t srv_conc_queue; /*!< queue node */ +}; + +/** Queue of threads waiting to get in */ +typedef UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue_t; + +static srv_conc_queue_t srv_conc_queue; + +/** Array of wait slots */ +static srv_conc_slot_t* srv_conc_slots; + +#if defined(UNIV_PFS_MUTEX) +/* Key to register srv_conc_mutex_key with performance schema */ +UNIV_INTERN mysql_pfs_key_t srv_conc_mutex_key; +#endif /* UNIV_PFS_MUTEX */ + +#endif /* !HAVE_ATOMIC_BUILTINS */ + +/** Variables tracking the active and waiting threads. */ +struct srv_conc_t { + char pad[64 - (sizeof(ulint) + sizeof(lint))]; + + /** Number of transactions that have declared_to_be_inside_innodb set. + It used to be a non-error for this value to drop below zero temporarily. + This is no longer true. We'll, however, keep the lint datatype to add + assertions to catch any corner cases that we may have missed. */ + + volatile lint n_active; + + /** Number of OS threads waiting in the FIFO for permission to + enter InnoDB */ + volatile lint n_waiting; +}; + +/* Control variables for tracking concurrency. */ +static srv_conc_t srv_conc; + +/*********************************************************************//** +Initialise the concurrency management data structures */ +void +srv_conc_init(void) +/*===============*/ +{ +#ifndef HAVE_ATOMIC_BUILTINS + ulint i; + + /* Init the server concurrency restriction data structures */ + + os_fast_mutex_init(srv_conc_mutex_key, &srv_conc_mutex); + + UT_LIST_INIT(srv_conc_queue); + + srv_conc_slots = static_cast<srv_conc_slot_t*>( + mem_zalloc(OS_THREAD_MAX_N * sizeof(*srv_conc_slots))); + + for (i = 0; i < OS_THREAD_MAX_N; i++) { + srv_conc_slot_t* conc_slot = &srv_conc_slots[i]; + + conc_slot->event = os_event_create(); + ut_a(conc_slot->event); + } +#endif /* !HAVE_ATOMIC_BUILTINS */ +} + +/*********************************************************************//** +Free the concurrency management data structures */ +void +srv_conc_free(void) +/*===============*/ +{ +#ifndef HAVE_ATOMIC_BUILTINS + os_fast_mutex_free(&srv_conc_mutex); + mem_free(srv_conc_slots); + srv_conc_slots = NULL; +#endif /* !HAVE_ATOMIC_BUILTINS */ +} + +#ifdef HAVE_ATOMIC_BUILTINS +/*********************************************************************//** +Note that a user thread is entering InnoDB. */ +static +void +srv_enter_innodb_with_tickets( +/*==========================*/ + trx_t* trx) /*!< in/out: transaction that wants + to enter InnoDB */ +{ + trx->declared_to_be_inside_innodb = TRUE; + trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter; +} + +/*********************************************************************//** +Handle the scheduling of a user thread that wants to enter InnoDB. Setting +srv_adaptive_max_sleep_delay > 0 switches the adaptive sleep calibration to +ON. When set, we want to wait in the queue for as little time as possible. +However, very short waits will result in a lot of context switches and that +is also not desirable. When threads need to sleep multiple times we increment +os_thread_sleep_delay by one. When we see threads getting a slot without +waiting and there are no other threads waiting in the queue, we try and reduce +the wait as much as we can. Currently we reduce it by half each time. If the +thread only had to wait for one turn before it was able to enter InnoDB we +decrement it by one. This is to try and keep the sleep time stable around the +"optimum" sleep time. */ +static +void +srv_conc_enter_innodb_with_atomics( +/*===============================*/ + trx_t* trx) /*!< in/out: transaction that wants + to enter InnoDB */ +{ + ulint n_sleeps = 0; + ibool notified_mysql = FALSE; + + ut_a(!trx->declared_to_be_inside_innodb); + + for (;;) { + ulint sleep_in_us; + + if (srv_conc.n_active < (lint) srv_thread_concurrency) { + ulint n_active; + + /* Check if there are any free tickets. */ + n_active = os_atomic_increment_lint( + &srv_conc.n_active, 1); + + if (n_active <= srv_thread_concurrency) { + + srv_enter_innodb_with_tickets(trx); + + if (notified_mysql) { + + (void) os_atomic_decrement_lint( + &srv_conc.n_waiting, 1); + + thd_wait_end(trx->mysql_thd); + } + + if (srv_adaptive_max_sleep_delay > 0) { + if (srv_thread_sleep_delay > 20 + && n_sleeps == 1) { + + --srv_thread_sleep_delay; + } + + if (srv_conc.n_waiting == 0) { + srv_thread_sleep_delay >>= 1; + } + } + + return; + } + + /* Since there were no free seats, we relinquish + the overbooked ticket. */ + + (void) os_atomic_decrement_lint( + &srv_conc.n_active, 1); + } + + if (!notified_mysql) { + (void) os_atomic_increment_lint( + &srv_conc.n_waiting, 1); + + /* Release possible search system latch this + thread has */ + + if (trx->has_search_latch) { + trx_search_latch_release_if_reserved(trx); + } + + thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK); + + notified_mysql = TRUE; + } + + trx->op_info = "sleeping before entering InnoDB"; + + sleep_in_us = srv_thread_sleep_delay; + + /* Guard against overflow when adaptive sleep delay is on. */ + + if (srv_adaptive_max_sleep_delay > 0 + && sleep_in_us > srv_adaptive_max_sleep_delay) { + + sleep_in_us = srv_adaptive_max_sleep_delay; + srv_thread_sleep_delay = static_cast<ulong>(sleep_in_us); + } + + os_thread_sleep(sleep_in_us); + trx->innodb_que_wait_timer += sleep_in_us; + + trx->op_info = ""; + + ++n_sleeps; + + if (srv_adaptive_max_sleep_delay > 0 && n_sleeps > 1) { + ++srv_thread_sleep_delay; + } + } +} + +/*********************************************************************//** +Note that a user thread is leaving InnoDB code. */ +static +void +srv_conc_exit_innodb_with_atomics( +/*==============================*/ + trx_t* trx) /*!< in/out: transaction */ +{ + trx->n_tickets_to_enter_innodb = 0; + trx->declared_to_be_inside_innodb = FALSE; + + (void) os_atomic_decrement_lint(&srv_conc.n_active, 1); +} +#else +/*********************************************************************//** +Note that a user thread is leaving InnoDB code. */ +static +void +srv_conc_exit_innodb_without_atomics( +/*=================================*/ + trx_t* trx) /*!< in/out: transaction */ +{ + srv_conc_slot_t* slot; + + os_fast_mutex_lock(&srv_conc_mutex); + + ut_ad(srv_conc.n_active > 0); + srv_conc.n_active--; + trx->declared_to_be_inside_innodb = FALSE; + trx->n_tickets_to_enter_innodb = 0; + + slot = NULL; + + if (srv_conc.n_active < (lint) srv_thread_concurrency) { + /* Look for a slot where a thread is waiting and no other + thread has yet released the thread */ + + for (slot = UT_LIST_GET_FIRST(srv_conc_queue); + slot != NULL && slot->wait_ended == TRUE; + slot = UT_LIST_GET_NEXT(srv_conc_queue, slot)) { + + /* No op */ + } + + if (slot != NULL) { + slot->wait_ended = TRUE; + + /* We increment the count on behalf of the released + thread */ + + srv_conc.n_active++; + } + } + + os_fast_mutex_unlock(&srv_conc_mutex); + + if (slot != NULL) { + os_event_set(slot->event); + } +} + +/*********************************************************************//** +Handle the scheduling of a user thread that wants to enter InnoDB. */ +static +void +srv_conc_enter_innodb_without_atomics( +/*==================================*/ + trx_t* trx) /*!< in/out: transaction that wants + to enter InnoDB */ +{ + ulint i; + srv_conc_slot_t* slot = NULL; + ibool has_slept = FALSE; + ib_uint64_t start_time = 0L; + ib_uint64_t finish_time = 0L; + ulint sec; + ulint ms; + + os_fast_mutex_lock(&srv_conc_mutex); +retry: + if (UNIV_UNLIKELY(trx->declared_to_be_inside_innodb)) { + os_fast_mutex_unlock(&srv_conc_mutex); + ut_print_timestamp(stderr); + fputs(" InnoDB: Error: trying to declare trx" + " to enter InnoDB, but\n" + "InnoDB: it already is declared.\n", stderr); + trx_print(stderr, trx, 0); + putc('\n', stderr); + return; + } + + ut_ad(srv_conc.n_active >= 0); + + if (srv_conc.n_active < (lint) srv_thread_concurrency) { + + srv_conc.n_active++; + trx->declared_to_be_inside_innodb = TRUE; + trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter; + + os_fast_mutex_unlock(&srv_conc_mutex); + + return; + } + + /* If the transaction is not holding resources, let it sleep + for srv_thread_sleep_delay microseconds, and try again then */ + + if (!has_slept && !trx->has_search_latch + && NULL == UT_LIST_GET_FIRST(trx->lock.trx_locks)) { + + has_slept = TRUE; /* We let it sleep only once to avoid + starvation */ + + srv_conc.n_waiting++; + + os_fast_mutex_unlock(&srv_conc_mutex); + + trx->op_info = "sleeping before joining InnoDB queue"; + + /* Peter Zaitsev suggested that we take the sleep away + altogether. But the sleep may be good in pathological + situations of lots of thread switches. Simply put some + threads aside for a while to reduce the number of thread + switches. */ + if (srv_thread_sleep_delay > 0) { + os_thread_sleep(srv_thread_sleep_delay); + trx->innodb_que_wait_timer += sleep_in_us; + } + + trx->op_info = ""; + + os_fast_mutex_lock(&srv_conc_mutex); + + srv_conc.n_waiting--; + + goto retry; + } + + /* Too many threads inside: put the current thread to a queue */ + + for (i = 0; i < OS_THREAD_MAX_N; i++) { + slot = srv_conc_slots + i; + + if (!slot->reserved) { + + break; + } + } + + if (i == OS_THREAD_MAX_N) { + /* Could not find a free wait slot, we must let the + thread enter */ + + srv_conc.n_active++; + trx->declared_to_be_inside_innodb = TRUE; + trx->n_tickets_to_enter_innodb = 0; + + os_fast_mutex_unlock(&srv_conc_mutex); + + return; + } + + /* Release possible search system latch this thread has */ + if (trx->has_search_latch) { + trx_search_latch_release_if_reserved(trx); + } + + /* Add to the queue */ + slot->reserved = TRUE; + slot->wait_ended = FALSE; + + UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot); + + os_event_reset(slot->event); + + srv_conc.n_waiting++; + + os_fast_mutex_unlock(&srv_conc_mutex); + + /* Go to wait for the event; when a thread leaves InnoDB it will + release this thread */ + + ut_ad(!trx->has_search_latch); +#ifdef UNIV_SYNC_DEBUG + ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); +#endif /* UNIV_SYNC_DEBUG */ + + if (UNIV_UNLIKELY(trx->take_stats)) { + ut_usectime(&sec, &ms); + start_time = (ib_uint64_t)sec * 1000000 + ms; + } else { + start_time = 0; + } + + trx->op_info = "waiting in InnoDB queue"; + + thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK); + + os_event_wait(slot->event); + thd_wait_end(trx->mysql_thd); + + trx->op_info = ""; + + if (UNIV_UNLIKELY(start_time != 0)) { + ut_usectime(&sec, &ms); + finish_time = (ib_uint64_t)sec * 1000000 + ms; + trx->innodb_que_wait_timer += (ulint)(finish_time - start_time); + } + + os_fast_mutex_lock(&srv_conc_mutex); + + srv_conc.n_waiting--; + + /* NOTE that the thread which released this thread already + incremented the thread counter on behalf of this thread */ + + slot->reserved = FALSE; + + UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot); + + trx->declared_to_be_inside_innodb = TRUE; + trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter; + + os_fast_mutex_unlock(&srv_conc_mutex); +} +#endif /* HAVE_ATOMIC_BUILTINS */ + +/*********************************************************************//** +Puts an OS thread to wait if there are too many concurrent threads +(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */ +UNIV_INTERN +void +srv_conc_enter_innodb( +/*==================*/ + trx_t* trx) /*!< in: transaction object associated with the + thread */ +{ +#ifdef UNIV_SYNC_DEBUG + ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); +#endif /* UNIV_SYNC_DEBUG */ + +#ifdef HAVE_ATOMIC_BUILTINS + srv_conc_enter_innodb_with_atomics(trx); +#else + srv_conc_enter_innodb_without_atomics(trx); +#endif /* HAVE_ATOMIC_BUILTINS */ +} + +/*********************************************************************//** +This lets a thread enter InnoDB regardless of the number of threads inside +InnoDB. This must be called when a thread ends a lock wait. */ +UNIV_INTERN +void +srv_conc_force_enter_innodb( +/*========================*/ + trx_t* trx) /*!< in: transaction object associated with the + thread */ +{ +#ifdef UNIV_SYNC_DEBUG + ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); +#endif /* UNIV_SYNC_DEBUG */ + + if (!srv_thread_concurrency) { + + return; + } + + ut_ad(srv_conc.n_active >= 0); + +#ifdef HAVE_ATOMIC_BUILTINS + (void) os_atomic_increment_lint(&srv_conc.n_active, 1); +#else + os_fast_mutex_lock(&srv_conc_mutex); + ++srv_conc.n_active; + os_fast_mutex_unlock(&srv_conc_mutex); +#endif /* HAVE_ATOMIC_BUILTINS */ + + trx->n_tickets_to_enter_innodb = 1; + trx->declared_to_be_inside_innodb = TRUE; +} + +/*********************************************************************//** +This must be called when a thread exits InnoDB in a lock wait or at the +end of an SQL statement. */ +UNIV_INTERN +void +srv_conc_force_exit_innodb( +/*=======================*/ + trx_t* trx) /*!< in: transaction object associated with the + thread */ +{ + if ((trx->mysql_thd != NULL + && thd_is_replication_slave_thread(trx->mysql_thd)) + || trx->declared_to_be_inside_innodb == FALSE) { + + return; + } + +#ifdef HAVE_ATOMIC_BUILTINS + srv_conc_exit_innodb_with_atomics(trx); +#else + srv_conc_exit_innodb_without_atomics(trx); +#endif /* HAVE_ATOMIC_BUILTINS */ + +#ifdef UNIV_SYNC_DEBUG + ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch)); +#endif /* UNIV_SYNC_DEBUG */ +} + +/*********************************************************************//** +Get the count of threads waiting inside InnoDB. */ +UNIV_INTERN +ulint +srv_conc_get_waiting_threads(void) +/*==============================*/ +{ + return(srv_conc.n_waiting); +} + +/*********************************************************************//** +Get the count of threads active inside InnoDB. */ +UNIV_INTERN +ulint +srv_conc_get_active_threads(void) +/*==============================*/ +{ + return(srv_conc.n_active); + } + diff --git a/storage/xtradb/srv/srv0mon.cc b/storage/xtradb/srv/srv0mon.cc new file mode 100644 index 00000000000..64417b1e5fb --- /dev/null +++ b/storage/xtradb/srv/srv0mon.cc @@ -0,0 +1,1930 @@ +/***************************************************************************** + +Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2012, Facebook Inc. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file srv/srv0mon.cc +Database monitor counter interfaces + +Created 12/9/2009 Jimmy Yang +*******************************************************/ + +#ifndef UNIV_HOTBACKUP +#include "os0file.h" +#include "mach0data.h" +#include "srv0mon.h" +#include "srv0srv.h" +#include "buf0buf.h" +#include "trx0sys.h" +#include "trx0rseg.h" +#include "lock0lock.h" +#include "ibuf0ibuf.h" +#ifdef UNIV_NONINL +#include "srv0mon.ic" +#endif + +/* Macro to standardize the counter names for counters in the +"monitor_buf_page" module as they have very structured defines */ +#define MONITOR_BUF_PAGE(name, description, code, op, op_code) \ + {"buffer_page_" op "_" name, "buffer_page_io", \ + "Number of " description " Pages " op, \ + MONITOR_GROUP_MODULE, MONITOR_DEFAULT_START, \ + MONITOR_##code##_##op_code} + +#define MONITOR_BUF_PAGE_READ(name, description, code) \ + MONITOR_BUF_PAGE(name, description, code, "read", PAGE_READ) + +#define MONITOR_BUF_PAGE_WRITTEN(name, description, code) \ + MONITOR_BUF_PAGE(name, description, code, "written", PAGE_WRITTEN) + + +/** This array defines basic static information of monitor counters, +including each monitor's name, module it belongs to, a short +description and its property/type and corresponding monitor_id. +Please note: If you add a monitor here, please add its corresponding +monitor_id to "enum monitor_id_value" structure in srv0mon.h file. */ + +static monitor_info_t innodb_counter_info[] = +{ + /* A dummy item to mark the module start, this is + to accomodate the default value (0) set for the + global variables with the control system. */ + {"module_start", "module_start", "module_start", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_DEFAULT_START}, + + /* ========== Counters for Server Metadata ========== */ + {"module_metadata", "metadata", "Server Metadata", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_METADATA}, + + {"metadata_table_handles_opened", "metadata", + "Number of table handles opened", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TABLE_OPEN}, + + {"metadata_table_handles_closed", "metadata", + "Number of table handles closed", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TABLE_CLOSE}, + + {"metadata_table_reference_count", "metadata", + "Table reference counter", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TABLE_REFERENCE}, + + {"metadata_mem_pool_size", "metadata", + "Size of a memory pool InnoDB uses to store data dictionary" + " and internal data structures in bytes", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_OVLD_META_MEM_POOL}, + + /* ========== Counters for Lock Module ========== */ + {"module_lock", "lock", "Lock Module", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_LOCK}, + + {"lock_deadlocks", "lock", "Number of deadlocks", + MONITOR_DEFAULT_ON, + MONITOR_DEFAULT_START, MONITOR_DEADLOCK}, + + {"lock_timeouts", "lock", "Number of lock timeouts", + MONITOR_DEFAULT_ON, + MONITOR_DEFAULT_START, MONITOR_TIMEOUT}, + + {"lock_rec_lock_waits", "lock", + "Number of times enqueued into record lock wait queue", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_LOCKREC_WAIT}, + + {"lock_table_lock_waits", "lock", + "Number of times enqueued into table lock wait queue", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TABLELOCK_WAIT}, + + {"lock_rec_lock_requests", "lock", + "Number of record locks requested", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_NUM_RECLOCK_REQ}, + + {"lock_rec_lock_created", "lock", "Number of record locks created", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_RECLOCK_CREATED}, + + {"lock_rec_lock_removed", "lock", + "Number of record locks removed from the lock queue", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_RECLOCK_REMOVED}, + + {"lock_rec_locks", "lock", + "Current number of record locks on tables", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_NUM_RECLOCK}, + + {"lock_table_lock_created", "lock", "Number of table locks created", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TABLELOCK_CREATED}, + + {"lock_table_lock_removed", "lock", + "Number of table locks removed from the lock queue", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TABLELOCK_REMOVED}, + + {"lock_table_locks", "lock", + "Current number of table locks on tables", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_NUM_TABLELOCK}, + + {"lock_row_lock_current_waits", "lock", + "Number of row locks currently being waited for" + " (innodb_row_lock_current_waits)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT}, + + {"lock_row_lock_time", "lock", + "Time spent in acquiring row locks, in milliseconds" + " (innodb_row_lock_time)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_WAIT_TIME}, + + {"lock_row_lock_time_max", "lock", + "The maximum time to acquire a row lock, in milliseconds" + " (innodb_row_lock_time_max)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_MAX_WAIT_TIME}, + + {"lock_row_lock_waits", "lock", + "Number of times a row lock had to be waited for" + " (innodb_row_lock_waits)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_ROW_LOCK_WAIT}, + + {"lock_row_lock_time_avg", "lock", + "The average time to acquire a row lock, in milliseconds" + " (innodb_row_lock_time_avg)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_AVG_WAIT_TIME}, + + /* ========== Counters for Buffer Manager and I/O ========== */ + {"module_buffer", "buffer", "Buffer Manager Module", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_BUFFER}, + + {"buffer_pool_size", "server", + "Server buffer pool size (all buffer pools) in bytes", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUFFER_POOL_SIZE}, + + {"buffer_pool_reads", "buffer", + "Number of reads directly from disk (innodb_buffer_pool_reads)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READS}, + + {"buffer_pool_read_requests", "buffer", + "Number of logical read requests (innodb_buffer_pool_read_requests)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_REQUESTS}, + + {"buffer_pool_write_requests", "buffer", + "Number of write requests (innodb_buffer_pool_write_requests)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_WRITE_REQUEST}, + + {"buffer_pool_wait_free", "buffer", + "Number of times waited for free buffer" + " (innodb_buffer_pool_wait_free)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_WAIT_FREE}, + + {"buffer_pool_read_ahead", "buffer", + "Number of pages read as read ahead (innodb_buffer_pool_read_ahead)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_AHEAD}, + + {"buffer_pool_read_ahead_evicted", "buffer", + "Read-ahead pages evicted without being accessed" + " (innodb_buffer_pool_read_ahead_evicted)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED}, + + {"buffer_pool_pages_total", "buffer", + "Total buffer pool size in pages (innodb_buffer_pool_pages_total)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGE_TOTAL}, + + {"buffer_pool_pages_misc", "buffer", + "Buffer pages for misc use such as row locks or the adaptive" + " hash index (innodb_buffer_pool_pages_misc)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGE_MISC}, + + {"buffer_pool_pages_data", "buffer", + "Buffer pages containing data (innodb_buffer_pool_pages_data)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DATA}, + + {"buffer_pool_bytes_data", "buffer", + "Buffer bytes containing data (innodb_buffer_pool_bytes_data)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DATA}, + + {"buffer_pool_pages_dirty", "buffer", + "Buffer pages currently dirty (innodb_buffer_pool_pages_dirty)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DIRTY}, + + {"buffer_pool_bytes_dirty", "buffer", + "Buffer bytes currently dirty (innodb_buffer_pool_bytes_dirty)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DIRTY}, + + {"buffer_pool_pages_free", "buffer", + "Buffer pages currently free (innodb_buffer_pool_pages_free)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_FREE}, + + {"buffer_pages_created", "buffer", + "Number of pages created (innodb_pages_created)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_CREATED}, + + {"buffer_pages_written", "buffer", + "Number of pages written (innodb_pages_written)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_WRITTEN}, + + {"buffer_pages_read", "buffer", + "Number of pages read (innodb_pages_read)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_READ}, + + {"buffer_data_reads", "buffer", + "Amount of data read in bytes (innodb_data_reads)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BYTE_READ}, + + {"buffer_data_written", "buffer", + "Amount of data written in bytes (innodb_data_written)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_BYTE_WRITTEN}, + + /* Cumulative counter for scanning in flush batches */ + {"buffer_flush_batch_scanned", "buffer", + "Total pages scanned as part of flush batch", + MONITOR_SET_OWNER, + MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL, + MONITOR_FLUSH_BATCH_SCANNED}, + + {"buffer_flush_batch_num_scan", "buffer", + "Number of times buffer flush list flush is called", + MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_SCANNED, + MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL}, + + {"buffer_flush_batch_scanned_per_call", "buffer", + "Pages scanned per flush batch scan", + MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_SCANNED, + MONITOR_FLUSH_BATCH_SCANNED_PER_CALL}, + + {"buffer_flush_batch_rescan", "buffer", + "Number of times rescan of flush list forced", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_FLUSH_HP_RESCAN}, + + /* Cumulative counter for pages flushed in flush batches */ + {"buffer_flush_batch_total_pages", "buffer", + "Total pages flushed as part of flush batch", + MONITOR_SET_OWNER, MONITOR_FLUSH_BATCH_COUNT, + MONITOR_FLUSH_BATCH_TOTAL_PAGE}, + + {"buffer_flush_batches", "buffer", + "Number of flush batches", + MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_TOTAL_PAGE, + MONITOR_FLUSH_BATCH_COUNT}, + + {"buffer_flush_batch_pages", "buffer", + "Pages queued as a flush batch", + MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_TOTAL_PAGE, + MONITOR_FLUSH_BATCH_PAGES}, + + /* Cumulative counter for flush batches because of neighbor */ + {"buffer_flush_neighbor_total_pages", "buffer", + "Total neighbors flushed as part of neighbor flush", + MONITOR_SET_OWNER, MONITOR_FLUSH_NEIGHBOR_COUNT, + MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE}, + + {"buffer_flush_neighbor", "buffer", + "Number of times neighbors flushing is invoked", + MONITOR_SET_MEMBER, MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE, + MONITOR_FLUSH_NEIGHBOR_COUNT}, + + {"buffer_flush_neighbor_pages", "buffer", + "Pages queued as a neighbor batch", + MONITOR_SET_MEMBER, MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE, + MONITOR_FLUSH_NEIGHBOR_PAGES}, + + {"buffer_flush_n_to_flush_requested", "buffer", + "Number of pages requested for flushing.", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_FLUSH_N_TO_FLUSH_REQUESTED}, + + {"buffer_flush_avg_page_rate", "buffer", + "Average number of pages at which flushing is happening", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_FLUSH_AVG_PAGE_RATE}, + + {"buffer_flush_lsn_avg_rate", "buffer", + "Average redo generation rate", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_FLUSH_LSN_AVG_RATE}, + + {"buffer_flush_pct_for_dirty", "buffer", + "Percent of IO capacity used to avoid max dirty page limit", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_FLUSH_PCT_FOR_DIRTY}, + + {"buffer_flush_pct_for_lsn", "buffer", + "Percent of IO capacity used to avoid reusable redo space limit", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_FLUSH_PCT_FOR_LSN}, + + {"buffer_flush_sync_waits", "buffer", + "Number of times a wait happens due to sync flushing", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_FLUSH_SYNC_WAITS}, + + /* Cumulative counter for flush batches for adaptive flushing */ + {"buffer_flush_adaptive_total_pages", "buffer", + "Total pages flushed as part of adaptive flushing", + MONITOR_SET_OWNER, MONITOR_FLUSH_ADAPTIVE_COUNT, + MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE}, + + {"buffer_flush_adaptive", "buffer", + "Number of adaptive batches", + MONITOR_SET_MEMBER, MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE, + MONITOR_FLUSH_ADAPTIVE_COUNT}, + + {"buffer_flush_adaptive_pages", "buffer", + "Pages queued as an adaptive batch", + MONITOR_SET_MEMBER, MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE, + MONITOR_FLUSH_ADAPTIVE_PAGES}, + + /* Cumulative counter for flush batches because of sync */ + {"buffer_flush_sync_total_pages", "buffer", + "Total pages flushed as part of sync batches", + MONITOR_SET_OWNER, MONITOR_FLUSH_SYNC_COUNT, + MONITOR_FLUSH_SYNC_TOTAL_PAGE}, + + {"buffer_flush_sync", "buffer", + "Number of sync batches", + MONITOR_SET_MEMBER, MONITOR_FLUSH_SYNC_TOTAL_PAGE, + MONITOR_FLUSH_SYNC_COUNT}, + + {"buffer_flush_sync_pages", "buffer", + "Pages queued as a sync batch", + MONITOR_SET_MEMBER, MONITOR_FLUSH_SYNC_TOTAL_PAGE, + MONITOR_FLUSH_SYNC_PAGES}, + + /* Cumulative counter for flush batches because of background */ + {"buffer_flush_background_total_pages", "buffer", + "Total pages flushed as part of background batches", + MONITOR_SET_OWNER, MONITOR_FLUSH_BACKGROUND_COUNT, + MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE}, + + {"buffer_flush_background", "buffer", + "Number of background batches", + MONITOR_SET_MEMBER, MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE, + MONITOR_FLUSH_BACKGROUND_COUNT}, + + {"buffer_flush_background_pages", "buffer", + "Pages queued as a background batch", + MONITOR_SET_MEMBER, MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE, + MONITOR_FLUSH_BACKGROUND_PAGES}, + + /* Cumulative counter for LRU batch scan */ + {"buffer_LRU_batch_scanned", "buffer", + "Total pages scanned as part of LRU batch", + MONITOR_SET_OWNER, MONITOR_LRU_BATCH_SCANNED_NUM_CALL, + MONITOR_LRU_BATCH_SCANNED}, + + {"buffer_LRU_batch_num_scan", "buffer", + "Number of times LRU batch is called", + MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_SCANNED, + MONITOR_LRU_BATCH_SCANNED_NUM_CALL}, + + {"buffer_LRU_batch_scanned_per_call", "buffer", + "Pages scanned per LRU batch call", + MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_SCANNED, + MONITOR_LRU_BATCH_SCANNED_PER_CALL}, + + /* Cumulative counter for LRU batch pages flushed */ + {"buffer_LRU_batch_total_pages", "buffer", + "Total pages flushed as part of LRU batches", + MONITOR_SET_OWNER, MONITOR_LRU_BATCH_COUNT, + MONITOR_LRU_BATCH_TOTAL_PAGE}, + + {"buffer_LRU_batches", "buffer", + "Number of LRU batches", + MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_TOTAL_PAGE, + MONITOR_LRU_BATCH_COUNT}, + + {"buffer_LRU_batch_pages", "buffer", + "Pages queued as an LRU batch", + MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_TOTAL_PAGE, + MONITOR_LRU_BATCH_PAGES}, + + /* Cumulative counter for single page LRU scans */ + {"buffer_LRU_single_flush_scanned", "buffer", + "Total pages scanned as part of single page LRU flush", + MONITOR_SET_OWNER, + MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL, + MONITOR_LRU_SINGLE_FLUSH_SCANNED}, + + {"buffer_LRU_single_flush_num_scan", "buffer", + "Number of times single page LRU flush is called", + MONITOR_SET_MEMBER, MONITOR_LRU_SINGLE_FLUSH_SCANNED, + MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL}, + + {"buffer_LRU_single_flush_scanned_per_call", "buffer", + "Page scanned per single LRU flush", + MONITOR_SET_MEMBER, MONITOR_LRU_SINGLE_FLUSH_SCANNED, + MONITOR_LRU_SINGLE_FLUSH_SCANNED_PER_CALL}, + + {"buffer_LRU_single_flush_failure_count", "Buffer", + "Number of times attempt to flush a single page from LRU failed", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT}, + + {"buffer_LRU_get_free_search", "Buffer", + "Number of searches performed for a clean page", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_LRU_GET_FREE_SEARCH}, + + /* Cumulative counter for LRU search scans */ + {"buffer_LRU_search_scanned", "buffer", + "Total pages scanned as part of LRU search", + MONITOR_SET_OWNER, + MONITOR_LRU_SEARCH_SCANNED_NUM_CALL, + MONITOR_LRU_SEARCH_SCANNED}, + + {"buffer_LRU_search_num_scan", "buffer", + "Number of times LRU search is performed", + MONITOR_SET_MEMBER, MONITOR_LRU_SEARCH_SCANNED, + MONITOR_LRU_SEARCH_SCANNED_NUM_CALL}, + + {"buffer_LRU_search_scanned_per_call", "buffer", + "Page scanned per single LRU search", + MONITOR_SET_MEMBER, MONITOR_LRU_SEARCH_SCANNED, + MONITOR_LRU_SEARCH_SCANNED_PER_CALL}, + + /* Cumulative counter for LRU unzip search scans */ + {"buffer_LRU_unzip_search_scanned", "buffer", + "Total pages scanned as part of LRU unzip search", + MONITOR_SET_OWNER, + MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL, + MONITOR_LRU_UNZIP_SEARCH_SCANNED}, + + {"buffer_LRU_unzip_search_num_scan", "buffer", + "Number of times LRU unzip search is performed", + MONITOR_SET_MEMBER, MONITOR_LRU_UNZIP_SEARCH_SCANNED, + MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL}, + + {"buffer_LRU_unzip_search_scanned_per_call", "buffer", + "Page scanned per single LRU unzip search", + MONITOR_SET_MEMBER, MONITOR_LRU_UNZIP_SEARCH_SCANNED, + MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL}, + + /* ========== Counters for Buffer Page I/O ========== */ + {"module_buffer_page", "buffer_page_io", "Buffer Page I/O Module", + static_cast<monitor_type_t>( + MONITOR_MODULE | MONITOR_GROUP_MODULE), + MONITOR_DEFAULT_START, MONITOR_MODULE_BUF_PAGE}, + + MONITOR_BUF_PAGE_READ("index_leaf","Index Leaf", INDEX_LEAF), + + MONITOR_BUF_PAGE_READ("index_non_leaf","Index Non-leaf", + INDEX_NON_LEAF), + + MONITOR_BUF_PAGE_READ("index_ibuf_leaf", "Insert Buffer Index Leaf", + INDEX_IBUF_LEAF), + + MONITOR_BUF_PAGE_READ("index_ibuf_non_leaf", + "Insert Buffer Index Non-Leaf", + INDEX_IBUF_NON_LEAF), + + MONITOR_BUF_PAGE_READ("undo_log", "Undo Log", UNDO_LOG), + + MONITOR_BUF_PAGE_READ("index_inode", "Index Inode", INODE), + + MONITOR_BUF_PAGE_READ("ibuf_free_list", "Insert Buffer Free List", + IBUF_FREELIST), + + MONITOR_BUF_PAGE_READ("ibuf_bitmap", "Insert Buffer Bitmap", + IBUF_BITMAP), + + MONITOR_BUF_PAGE_READ("system_page", "System", SYSTEM), + + MONITOR_BUF_PAGE_READ("trx_system", "Transaction System", TRX_SYSTEM), + + MONITOR_BUF_PAGE_READ("fsp_hdr", "File Space Header", FSP_HDR), + + MONITOR_BUF_PAGE_READ("xdes", "Extent Descriptor", XDES), + + MONITOR_BUF_PAGE_READ("blob", "Uncompressed BLOB", BLOB), + + MONITOR_BUF_PAGE_READ("zblob", "First Compressed BLOB", ZBLOB), + + MONITOR_BUF_PAGE_READ("zblob2", "Subsequent Compressed BLOB", ZBLOB2), + + MONITOR_BUF_PAGE_READ("other", "other/unknown (old version of InnoDB)", + OTHER), + + MONITOR_BUF_PAGE_WRITTEN("index_leaf","Index Leaf", INDEX_LEAF), + + MONITOR_BUF_PAGE_WRITTEN("index_non_leaf","Index Non-leaf", + INDEX_NON_LEAF), + + MONITOR_BUF_PAGE_WRITTEN("index_ibuf_leaf", "Insert Buffer Index Leaf", + INDEX_IBUF_LEAF), + + MONITOR_BUF_PAGE_WRITTEN("index_ibuf_non_leaf", + "Insert Buffer Index Non-Leaf", + INDEX_IBUF_NON_LEAF), + + MONITOR_BUF_PAGE_WRITTEN("undo_log", "Undo Log", UNDO_LOG), + + MONITOR_BUF_PAGE_WRITTEN("index_inode", "Index Inode", INODE), + + MONITOR_BUF_PAGE_WRITTEN("ibuf_free_list", "Insert Buffer Free List", + IBUF_FREELIST), + + MONITOR_BUF_PAGE_WRITTEN("ibuf_bitmap", "Insert Buffer Bitmap", + IBUF_BITMAP), + + MONITOR_BUF_PAGE_WRITTEN("system_page", "System", SYSTEM), + + MONITOR_BUF_PAGE_WRITTEN("trx_system", "Transaction System", + TRX_SYSTEM), + + MONITOR_BUF_PAGE_WRITTEN("fsp_hdr", "File Space Header", FSP_HDR), + + MONITOR_BUF_PAGE_WRITTEN("xdes", "Extent Descriptor", XDES), + + MONITOR_BUF_PAGE_WRITTEN("blob", "Uncompressed BLOB", BLOB), + + MONITOR_BUF_PAGE_WRITTEN("zblob", "First Compressed BLOB", ZBLOB), + + MONITOR_BUF_PAGE_WRITTEN("zblob2", "Subsequent Compressed BLOB", + ZBLOB2), + + MONITOR_BUF_PAGE_WRITTEN("other", "other/unknown (old version InnoDB)", + OTHER), + + /* ========== Counters for OS level operations ========== */ + {"module_os", "os", "OS Level Operation", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_OS}, + + {"os_data_reads", "os", + "Number of reads initiated (innodb_data_reads)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FILE_READ}, + + {"os_data_writes", "os", + "Number of writes initiated (innodb_data_writes)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FILE_WRITE}, + + {"os_data_fsyncs", "os", + "Number of fsync() calls (innodb_data_fsyncs)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FSYNC}, + + {"os_pending_reads", "os", "Number of reads pending", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OS_PENDING_READS}, + + {"os_pending_writes", "os", "Number of writes pending", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OS_PENDING_WRITES}, + + {"os_log_bytes_written", "os", + "Bytes of log written (innodb_os_log_written)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_WRITTEN}, + + {"os_log_fsyncs", "os", + "Number of fsync log writes (innodb_os_log_fsyncs)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_FSYNC}, + + {"os_log_pending_fsyncs", "os", + "Number of pending fsync write (innodb_os_log_pending_fsyncs)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_PENDING_FSYNC}, + + {"os_log_pending_writes", "os", + "Number of pending log file writes (innodb_os_log_pending_writes)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_PENDING_WRITES}, + + /* ========== Counters for Transaction Module ========== */ + {"module_trx", "transaction", "Transaction Manager", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_TRX}, + + {"trx_rw_commits", "transaction", "Number of read-write transactions " + "committed", + MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_RW_COMMIT}, + + {"trx_ro_commits", "transaction", "Number of read-only transactions " + "committed", + MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_RO_COMMIT}, + + {"trx_nl_ro_commits", "transaction", "Number of non-locking " + "auto-commit read-only transactions committed", + MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_NL_RO_COMMIT}, + + {"trx_commits_insert_update", "transaction", + "Number of transactions committed with inserts and updates", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TRX_COMMIT_UNDO}, + + {"trx_rollbacks", "transaction", + "Number of transactions rolled back", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK}, + + {"trx_rollbacks_savepoint", "transaction", + "Number of transactions rolled back to savepoint", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK_SAVEPOINT}, + + {"trx_rollback_active", "transaction", + "Number of resurrected active transactions rolled back", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK_ACTIVE}, + + {"trx_active_transactions", "transaction", + "Number of active transactions", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_TRX_ACTIVE}, + + {"trx_rseg_history_len", "transaction", + "Length of the TRX_RSEG_HISTORY list", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_RSEG_HISTORY_LEN}, + + {"trx_undo_slots_used", "transaction", "Number of undo slots used", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_NUM_UNDO_SLOT_USED}, + + {"trx_undo_slots_cached", "transaction", + "Number of undo slots cached", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_NUM_UNDO_SLOT_CACHED}, + + {"trx_rseg_current_size", "transaction", + "Current rollback segment size in pages", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_RSEG_CUR_SIZE}, + + /* ========== Counters for Purge Module ========== */ + {"module_purge", "purge", "Purge Module", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_PURGE}, + + {"purge_del_mark_records", "purge", + "Number of delete-marked rows purged", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_N_DEL_ROW_PURGE}, + + {"purge_upd_exist_or_extern_records", "purge", + "Number of purges on updates of existing records and " + " updates on delete marked record with externally stored field", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_N_UPD_EXIST_EXTERN}, + + {"purge_invoked", "purge", + "Number of times purge was invoked", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_PURGE_INVOKED}, + + {"purge_undo_log_pages", "purge", + "Number of undo log pages handled by the purge", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_PURGE_N_PAGE_HANDLED}, + + {"purge_dml_delay_usec", "purge", + "Microseconds DML to be delayed due to purge lagging", + MONITOR_DISPLAY_CURRENT, + MONITOR_DEFAULT_START, MONITOR_DML_PURGE_DELAY}, + + {"purge_stop_count", "purge", + "Number of times purge was stopped", + MONITOR_DISPLAY_CURRENT, + MONITOR_DEFAULT_START, MONITOR_PURGE_STOP_COUNT}, + + {"purge_resume_count", "purge", + "Number of times purge was resumed", + MONITOR_DISPLAY_CURRENT, + MONITOR_DEFAULT_START, MONITOR_PURGE_RESUME_COUNT}, + + /* ========== Counters for Recovery Module ========== */ + {"module_log", "recovery", "Recovery Module", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_RECOVERY}, + + {"log_checkpoints", "recovery", "Number of checkpoints", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_NUM_CHECKPOINT}, + + {"log_lsn_last_flush", "recovery", "LSN of Last flush", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_FLUSHDISK}, + + {"log_lsn_last_checkpoint", "recovery", "LSN at last checkpoint", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_CHECKPOINT}, + + {"log_lsn_current", "recovery", "Current LSN value", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_CURRENT}, + + {"log_lsn_checkpoint_age", "recovery", + "Current LSN value minus LSN at last checkpoint", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_LSN_CHECKPOINT_AGE}, + + {"log_lsn_buf_pool_oldest", "recovery", + "The oldest modified block LSN in the buffer pool", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_OLDEST_LSN}, + + {"log_max_modified_age_async", "recovery", + "Maximum LSN difference; when exceeded, start asynchronous preflush", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_OVLD_MAX_AGE_ASYNC}, + + {"log_max_modified_age_sync", "recovery", + "Maximum LSN difference; when exceeded, start synchronous preflush", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_OVLD_MAX_AGE_SYNC}, + + {"log_pending_log_writes", "recovery", "Pending log writes", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_PENDING_LOG_WRITE}, + + {"log_pending_checkpoint_writes", "recovery", "Pending checkpoints", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_PENDING_CHECKPOINT_WRITE}, + + {"log_num_log_io", "recovery", "Number of log I/Os", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_LOG_IO}, + + {"log_waits", "recovery", + "Number of log waits due to small log buffer (innodb_log_waits)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WAITS}, + + {"log_write_requests", "recovery", + "Number of log write requests (innodb_log_write_requests)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WRITE_REQUEST}, + + {"log_writes", "recovery", + "Number of log writes (innodb_log_writes)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WRITES}, + + /* ========== Counters for Page Compression ========== */ + {"module_compress", "compression", "Page Compression Info", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_PAGE}, + + {"compress_pages_compressed", "compression", + "Number of pages compressed", MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_PAGE_COMPRESS}, + + {"compress_pages_decompressed", "compression", + "Number of pages decompressed", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_PAGE_DECOMPRESS}, + + {"compression_pad_increments", "compression", + "Number of times padding is incremented to avoid compression failures", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_PAD_INCREMENTS}, + + {"compression_pad_decrements", "compression", + "Number of times padding is decremented due to good compressibility", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_PAD_DECREMENTS}, + + /* ========== Counters for Index ========== */ + {"module_index", "index", "Index Manager", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_INDEX}, + + {"index_page_splits", "index", "Number of index page splits", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_INDEX_SPLIT}, + + {"index_page_merge_attempts", "index", + "Number of index page merge attempts", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_INDEX_MERGE_ATTEMPTS}, + + {"index_page_merge_successful", "index", + "Number of successful index page merges", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_INDEX_MERGE_SUCCESSFUL}, + + {"index_page_reorg_attempts", "index", + "Number of index page reorganization attempts", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_INDEX_REORG_ATTEMPTS}, + + {"index_page_reorg_successful", "index", + "Number of successful index page reorganizations", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_INDEX_REORG_SUCCESSFUL}, + + {"index_page_discards", "index", "Number of index pages discarded", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_INDEX_DISCARD}, + + /* ========== Counters for Adaptive Hash Index ========== */ + {"module_adaptive_hash", "adaptive_hash_index", "Adpative Hash Index", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_ADAPTIVE_HASH}, + + {"adaptive_hash_searches", "adaptive_hash_index", + "Number of successful searches using Adaptive Hash Index", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_ADAPTIVE_HASH_SEARCH}, + + {"adaptive_hash_searches_btree", "adaptive_hash_index", + "Number of searches using B-tree on an index search", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE}, + + {"adaptive_hash_pages_added", "adaptive_hash_index", + "Number of index pages on which the Adaptive Hash Index is built", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_PAGE_ADDED}, + + {"adaptive_hash_pages_removed", "adaptive_hash_index", + "Number of index pages whose corresponding Adaptive Hash Index" + " entries were removed", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_PAGE_REMOVED}, + + {"adaptive_hash_rows_added", "adaptive_hash_index", + "Number of Adaptive Hash Index rows added", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_ADDED}, + + {"adaptive_hash_rows_removed", "adaptive_hash_index", + "Number of Adaptive Hash Index rows removed", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_REMOVED}, + + {"adaptive_hash_rows_deleted_no_hash_entry", "adaptive_hash_index", + "Number of rows deleted that did not have corresponding Adaptive Hash" + " Index entries", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND}, + + {"adaptive_hash_rows_updated", "adaptive_hash_index", + "Number of Adaptive Hash Index rows updated", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_UPDATED}, + + /* ========== Counters for tablespace ========== */ + {"module_file", "file_system", "Tablespace and File System Manager", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_FIL_SYSTEM}, + + {"file_num_open_files", "file_system", + "Number of files currently open (innodb_num_open_files)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_N_FILE_OPENED}, + + /* ========== Counters for Change Buffer ========== */ + {"module_ibuf_system", "change_buffer", "InnoDB Change Buffer", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_IBUF_SYSTEM}, + + {"ibuf_merges_insert", "change_buffer", + "Number of inserted records merged by change buffering", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_INSERT}, + + {"ibuf_merges_delete_mark", "change_buffer", + "Number of deleted records merged by change buffering", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DELETE}, + + {"ibuf_merges_delete", "change_buffer", + "Number of purge records merged by change buffering", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_PURGE}, + + {"ibuf_merges_discard_insert", "change_buffer", + "Number of insert merged operations discarded", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT}, + + {"ibuf_merges_discard_delete_mark", "change_buffer", + "Number of deleted merged operations discarded", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE}, + + {"ibuf_merges_discard_delete", "change_buffer", + "Number of purge merged operations discarded", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE}, + + {"ibuf_merges", "change_buffer", "Number of change buffer merges", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGES}, + + {"ibuf_size", "change_buffer", "Change buffer size in pages", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_SIZE}, + + /* ========== Counters for server operations ========== */ + {"module_innodb", "innodb", + "Counter for general InnoDB server wide operations and properties", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_SERVER}, + + {"innodb_master_thread_sleeps", "server", + "Number of times (seconds) master thread sleeps", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_MASTER_THREAD_SLEEP}, + + {"innodb_activity_count", "server", "Current server activity count", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_SERVER_ACTIVITY}, + + {"innodb_master_active_loops", "server", + "Number of times master thread performs its tasks when" + " server is active", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_MASTER_ACTIVE_LOOPS}, + + {"innodb_master_idle_loops", "server", + "Number of times master thread performs its tasks when server is idle", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_MASTER_IDLE_LOOPS}, + + {"innodb_background_drop_table_usec", "server", + "Time (in microseconds) spent to process drop table list", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND}, + + {"innodb_ibuf_merge_usec", "server", + "Time (in microseconds) spent to process change buffer merge", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_SRV_IBUF_MERGE_MICROSECOND}, + + {"innodb_log_flush_usec", "server", + "Time (in microseconds) spent to flush log records", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_SRV_LOG_FLUSH_MICROSECOND}, + + {"innodb_mem_validate_usec", "server", + "Time (in microseconds) spent to do memory validation", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_SRV_MEM_VALIDATE_MICROSECOND}, + + {"innodb_master_purge_usec", "server", + "Time (in microseconds) spent by master thread to purge records", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_SRV_PURGE_MICROSECOND}, + + {"innodb_dict_lru_usec", "server", + "Time (in microseconds) spent to process DICT LRU list", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_SRV_DICT_LRU_MICROSECOND}, + + {"innodb_checkpoint_usec", "server", + "Time (in microseconds) spent by master thread to do checkpoint", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_SRV_CHECKPOINT_MICROSECOND}, + + {"innodb_dblwr_writes", "server", + "Number of doublewrite operations that have been performed" + " (innodb_dblwr_writes)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_DBLWR_WRITES}, + + {"innodb_dblwr_pages_written", "server", + "Number of pages that have been written for doublewrite operations" + " (innodb_dblwr_pages_written)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN}, + + {"innodb_page_size", "server", + "InnoDB page size in bytes (innodb_page_size)", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT), + MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_PAGE_SIZE}, + + {"innodb_rwlock_s_spin_waits", "server", + "Number of rwlock spin waits due to shared latch request", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_S_SPIN_WAITS}, + + {"innodb_rwlock_x_spin_waits", "server", + "Number of rwlock spin waits due to exclusive latch request", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_SPIN_WAITS}, + + {"innodb_rwlock_s_spin_rounds", "server", + "Number of rwlock spin loop rounds due to shared latch request", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS}, + + {"innodb_rwlock_x_spin_rounds", "server", + "Number of rwlock spin loop rounds due to exclusive latch request", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS}, + + {"innodb_rwlock_s_os_waits", "server", + "Number of OS waits due to shared latch request", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_S_OS_WAITS}, + + {"innodb_rwlock_x_os_waits", "server", + "Number of OS waits due to exclusive latch request", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_OS_WAITS}, + + /* ========== Counters for DML operations ========== */ + {"module_dml", "dml", "Statistics for DMLs", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_DML_STATS}, + + {"dml_reads", "dml", "Number of rows read", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_READ}, + + {"dml_inserts", "dml", "Number of rows inserted", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_INSERTED}, + + {"dml_deletes", "dml", "Number of rows deleted", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_DELETED}, + + {"dml_updates", "dml", "Number of rows updated", + static_cast<monitor_type_t>( + MONITOR_EXISTING | MONITOR_DEFAULT_ON), + MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_UPDTATED}, + + /* ========== Counters for DDL operations ========== */ + {"module_ddl", "ddl", "Statistics for DDLs", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_DDL_STATS}, + + {"ddl_background_drop_indexes", "ddl", + "Number of indexes waiting to be dropped after failed index creation", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_BACKGROUND_DROP_INDEX}, + + {"ddl_background_drop_tables", "ddl", + "Number of tables in background drop table list", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_BACKGROUND_DROP_TABLE}, + + {"ddl_online_create_index", "ddl", + "Number of indexes being created online", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ONLINE_CREATE_INDEX}, + + {"ddl_pending_alter_table", "ddl", + "Number of ALTER TABLE, CREATE INDEX, DROP INDEX in progress", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_PENDING_ALTER_TABLE}, + + /* ===== Counters for ICP (Index Condition Pushdown) Module ===== */ + {"module_icp", "icp", "Index Condition Pushdown", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_MODULE_ICP}, + + {"icp_attempts", "icp", + "Number of attempts for index push-down condition checks", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ICP_ATTEMPTS}, + + {"icp_no_match", "icp", "Index push-down condition does not match", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ICP_NO_MATCH}, + + {"icp_out_of_range", "icp", "Index push-down condition out of range", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ICP_OUT_OF_RANGE}, + + {"icp_match", "icp", "Index push-down condition matches", + MONITOR_NONE, + MONITOR_DEFAULT_START, MONITOR_ICP_MATCH}, + + /* ========== To turn on/off reset all counters ========== */ + {"all", "All Counters", "Turn on/off and reset all counters", + MONITOR_MODULE, + MONITOR_DEFAULT_START, MONITOR_ALL_COUNTER} +}; + +/* The "innodb_counter_value" array stores actual counter values */ +UNIV_INTERN monitor_value_t innodb_counter_value[NUM_MONITOR]; + +/* monitor_set_tbl is used to record and determine whether a monitor +has been turned on/off. */ +UNIV_INTERN ulint monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT + - 1) / NUM_BITS_ULINT]; + +#ifndef HAVE_ATOMIC_BUILTINS_64 +/** Mutex protecting atomic operations on platforms that lack +built-in operations for atomic memory access */ +ib_mutex_t monitor_mutex; + +/** Key to register monitor_mutex with performance schema */ +UNIV_INTERN mysql_pfs_key_t monitor_mutex_key; + +/****************************************************************//** +Initialize the monitor subsystem. */ +UNIV_INTERN +void +srv_mon_create(void) +/*================*/ +{ + mutex_create(monitor_mutex_key, &monitor_mutex, SYNC_ANY_LATCH); +} +/****************************************************************//** +Close the monitor subsystem. */ +UNIV_INTERN +void +srv_mon_free(void) +/*==============*/ +{ + mutex_free(&monitor_mutex); +} +#endif /* !HAVE_ATOMIC_BUILTINS_64 */ + +/****************************************************************//** +Get a monitor's "monitor_info" by its monitor id (index into the +innodb_counter_info array. +@return Point to corresponding monitor_info_t, or NULL if no such +monitor */ +UNIV_INTERN +monitor_info_t* +srv_mon_get_info( +/*=============*/ + monitor_id_t monitor_id) /*!< id indexing into the + innodb_counter_info array */ +{ + ut_a(monitor_id < NUM_MONITOR); + + return((monitor_id < NUM_MONITOR) + ? &innodb_counter_info[monitor_id] + : NULL); +} + +/****************************************************************//** +Get monitor's name by its monitor id (indexing into the +innodb_counter_info array. +@return corresponding monitor name, or NULL if no such +monitor */ +UNIV_INTERN +const char* +srv_mon_get_name( +/*=============*/ + monitor_id_t monitor_id) /*!< id index into the + innodb_counter_info array */ +{ + ut_a(monitor_id < NUM_MONITOR); + + return((monitor_id < NUM_MONITOR) + ? innodb_counter_info[monitor_id].monitor_name + : NULL); +} + +/****************************************************************//** +Turn on/off, reset monitor counters in a module. If module_id +is MONITOR_ALL_COUNTER then turn on all monitor counters. +turned on because it has already been turned on. */ +UNIV_INTERN +void +srv_mon_set_module_control( +/*=======================*/ + monitor_id_t module_id, /*!< in: Module ID as in + monitor_counter_id. If it is + set to MONITOR_ALL_COUNTER, this means + we shall turn on all the counters */ + mon_option_t set_option) /*!< in: Turn on/off reset the + counter */ +{ + ulint ix; + ulint start_id; + ibool set_current_module = FALSE; + + ut_a(module_id <= NUM_MONITOR); + ut_a(UT_ARR_SIZE(innodb_counter_info) == NUM_MONITOR); + + /* The module_id must be an ID of MONITOR_MODULE type */ + ut_a(innodb_counter_info[module_id].monitor_type & MONITOR_MODULE); + + /* start with the first monitor in the module. If module_id + is MONITOR_ALL_COUNTER, this means we need to turn on all + monitor counters. */ + if (module_id == MONITOR_ALL_COUNTER) { + start_id = 1; + } else if (innodb_counter_info[module_id].monitor_type + & MONITOR_GROUP_MODULE) { + /* Counters in this module are set as a group together + and cannot be turned on/off individually. Need to set + the on/off bit in the module counter */ + start_id = module_id; + set_current_module = TRUE; + + } else { + start_id = module_id + 1; + } + + for (ix = start_id; ix < NUM_MONITOR; ix++) { + /* if we hit the next module counter, we will + continue if we want to turn on all monitor counters, + and break if just turn on the counters in the + current module. */ + if (innodb_counter_info[ix].monitor_type & MONITOR_MODULE) { + + if (set_current_module) { + /* Continue to set on/off bit on current + module */ + set_current_module = FALSE; + } else if (module_id == MONITOR_ALL_COUNTER) { + continue; + } else { + /* Hitting the next module, stop */ + break; + } + } + + /* Cannot turn on a monitor already been turned on. User + should be aware some counters are already on before + turn them on again (which could reset counter value) */ + if (MONITOR_IS_ON(ix) && (set_option == MONITOR_TURN_ON)) { + fprintf(stderr, "Monitor '%s' is already enabled.\n", + srv_mon_get_name((monitor_id_t) ix)); + continue; + } + + /* For some existing counters (server status variables), + we will get its counter value at the start/stop time + to calculate the actual value during the time. */ + if (innodb_counter_info[ix].monitor_type & MONITOR_EXISTING) { + srv_mon_process_existing_counter( + static_cast<monitor_id_t>(ix), set_option); + } + + /* Currently support 4 operations on the monitor counters: + turn on, turn off, reset and reset all operations. */ + switch (set_option) { + case MONITOR_TURN_ON: + MONITOR_ON(ix); + MONITOR_INIT(ix); + MONITOR_SET_START(ix); + break; + + case MONITOR_TURN_OFF: + MONITOR_OFF(ix); + MONITOR_SET_OFF(ix); + break; + + case MONITOR_RESET_VALUE: + srv_mon_reset(static_cast<monitor_id_t>(ix)); + break; + + case MONITOR_RESET_ALL_VALUE: + srv_mon_reset_all(static_cast<monitor_id_t>(ix)); + break; + + default: + ut_error; + } + } +} + +/****************************************************************//** +Get transaction system's rollback segment size in pages +@return size in pages */ +static +ulint +srv_mon_get_rseg_size(void) +/*=======================*/ +{ + ulint i; + ulint value = 0; + + /* rseg_array is a static array, so we can go through it without + mutex protection. In addition, we provide an estimate of the + total rollback segment size and to avoid mutex contention we + don't acquire the rseg->mutex" */ + for (i = 0; i < TRX_SYS_N_RSEGS; ++i) { + const trx_rseg_t* rseg = trx_sys->rseg_array[i]; + + if (rseg != NULL) { + value += rseg->curr_size; + } + } + + return(value); +} + +/****************************************************************//** +This function consolidates some existing server counters used +by "system status variables". These existing system variables do not have +mechanism to start/stop and reset the counters, so we simulate these +controls by remembering the corresponding counter values when the +corresponding monitors are turned on/off/reset, and do appropriate +mathematics to deduct the actual value. Please also refer to +srv_export_innodb_status() for related global counters used by +the existing status variables.*/ +UNIV_INTERN +void +srv_mon_process_existing_counter( +/*=============================*/ + monitor_id_t monitor_id, /*!< in: the monitor's ID as in + monitor_counter_id */ + mon_option_t set_option) /*!< in: Turn on/off reset the + counter */ +{ + mon_type_t value; + monitor_info_t* monitor_info; + ibool update_min = FALSE; + buf_pool_stat_t stat; + buf_pools_list_size_t buf_pools_list_size; + ulint LRU_len; + ulint free_len; + ulint flush_list_len; + + monitor_info = srv_mon_get_info(monitor_id); + + ut_a(monitor_info->monitor_type & MONITOR_EXISTING); + ut_a(monitor_id < NUM_MONITOR); + + /* Get the value from corresponding global variable */ + switch (monitor_id) { + case MONITOR_OVLD_META_MEM_POOL: + value = srv_mem_pool_size; + break; + + /* export_vars.innodb_buffer_pool_reads. Num Reads from + disk (page not in buffer) */ + case MONITOR_OVLD_BUF_POOL_READS: + value = srv_stats.buf_pool_reads; + break; + + /* innodb_buffer_pool_read_requests, the number of logical + read requests */ + case MONITOR_OVLD_BUF_POOL_READ_REQUESTS: + buf_get_total_stat(&stat); + value = stat.n_page_gets; + break; + + /* innodb_buffer_pool_write_requests, the number of + write request */ + case MONITOR_OVLD_BUF_POOL_WRITE_REQUEST: + value = srv_stats.buf_pool_write_requests; + break; + + /* innodb_buffer_pool_wait_free */ + case MONITOR_OVLD_BUF_POOL_WAIT_FREE: + value = srv_stats.buf_pool_wait_free; + break; + + /* innodb_buffer_pool_read_ahead */ + case MONITOR_OVLD_BUF_POOL_READ_AHEAD: + buf_get_total_stat(&stat); + value = stat.n_ra_pages_read; + break; + + /* innodb_buffer_pool_read_ahead_evicted */ + case MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED: + buf_get_total_stat(&stat); + value = stat.n_ra_pages_evicted; + break; + + /* innodb_buffer_pool_pages_total */ + case MONITOR_OVLD_BUF_POOL_PAGE_TOTAL: + value = buf_pool_get_n_pages(); + break; + + /* innodb_buffer_pool_pages_misc */ + case MONITOR_OVLD_BUF_POOL_PAGE_MISC: + buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len); + value = buf_pool_get_n_pages() - LRU_len - free_len; + break; + + /* innodb_buffer_pool_pages_data */ + case MONITOR_OVLD_BUF_POOL_PAGES_DATA: + buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len); + value = LRU_len; + break; + + /* innodb_buffer_pool_bytes_data */ + case MONITOR_OVLD_BUF_POOL_BYTES_DATA: + buf_get_total_list_size_in_bytes(&buf_pools_list_size); + value = buf_pools_list_size.LRU_bytes + + buf_pools_list_size.unzip_LRU_bytes; + break; + + /* innodb_buffer_pool_pages_dirty */ + case MONITOR_OVLD_BUF_POOL_PAGES_DIRTY: + buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len); + value = flush_list_len; + break; + + /* innodb_buffer_pool_bytes_dirty */ + case MONITOR_OVLD_BUF_POOL_BYTES_DIRTY: + buf_get_total_list_size_in_bytes(&buf_pools_list_size); + value = buf_pools_list_size.flush_list_bytes; + break; + + /* innodb_buffer_pool_pages_free */ + case MONITOR_OVLD_BUF_POOL_PAGES_FREE: + buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len); + value = free_len; + break; + + /* innodb_pages_created, the number of pages created */ + case MONITOR_OVLD_PAGE_CREATED: + buf_get_total_stat(&stat); + value = stat.n_pages_created; + break; + + /* innodb_pages_written, the number of page written */ + case MONITOR_OVLD_PAGES_WRITTEN: + buf_get_total_stat(&stat); + value = stat.n_pages_written; + break; + + /* innodb_pages_read */ + case MONITOR_OVLD_PAGES_READ: + buf_get_total_stat(&stat); + value = stat.n_pages_read; + break; + + /* innodb_data_reads, the total number of data reads */ + case MONITOR_OVLD_BYTE_READ: + value = srv_stats.data_read; + break; + + /* innodb_data_writes, the total number of data writes. */ + case MONITOR_OVLD_BYTE_WRITTEN: + value = srv_stats.data_written; + break; + + /* innodb_data_reads, the total number of data reads. */ + case MONITOR_OVLD_OS_FILE_READ: + value = os_n_file_reads; + break; + + /* innodb_data_writes, the total number of data writes*/ + case MONITOR_OVLD_OS_FILE_WRITE: + value = os_n_file_writes; + break; + + /* innodb_data_fsyncs, number of fsync() operations so far. */ + case MONITOR_OVLD_OS_FSYNC: + value = os_n_fsyncs; + break; + + /* innodb_os_log_written */ + case MONITOR_OVLD_OS_LOG_WRITTEN: + value = (mon_type_t) srv_stats.os_log_written; + break; + + /* innodb_os_log_fsyncs */ + case MONITOR_OVLD_OS_LOG_FSYNC: + value = fil_n_log_flushes; + break; + + /* innodb_os_log_pending_fsyncs */ + case MONITOR_OVLD_OS_LOG_PENDING_FSYNC: + value = fil_n_pending_log_flushes; + update_min = TRUE; + break; + + /* innodb_os_log_pending_writes */ + case MONITOR_OVLD_OS_LOG_PENDING_WRITES: + value = srv_stats.os_log_pending_writes; + update_min = TRUE; + break; + + /* innodb_log_waits */ + case MONITOR_OVLD_LOG_WAITS: + value = srv_stats.log_waits; + break; + + /* innodb_log_write_requests */ + case MONITOR_OVLD_LOG_WRITE_REQUEST: + value = srv_stats.log_write_requests; + break; + + /* innodb_log_writes */ + case MONITOR_OVLD_LOG_WRITES: + value = srv_stats.log_writes; + break; + + /* innodb_dblwr_writes */ + case MONITOR_OVLD_SRV_DBLWR_WRITES: + value = srv_stats.dblwr_writes; + break; + + /* innodb_dblwr_pages_written */ + case MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN: + value = srv_stats.dblwr_pages_written; + break; + + /* innodb_page_size */ + case MONITOR_OVLD_SRV_PAGE_SIZE: + value = UNIV_PAGE_SIZE; + break; + + case MONITOR_OVLD_RWLOCK_S_SPIN_WAITS: + value = rw_lock_stats.rw_s_spin_wait_count; + break; + + case MONITOR_OVLD_RWLOCK_X_SPIN_WAITS: + value = rw_lock_stats.rw_x_spin_wait_count; + break; + + case MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS: + value = rw_lock_stats.rw_s_spin_round_count; + break; + + case MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS: + value = rw_lock_stats.rw_x_spin_round_count; + break; + + case MONITOR_OVLD_RWLOCK_S_OS_WAITS: + value = rw_lock_stats.rw_s_os_wait_count; + break; + + case MONITOR_OVLD_RWLOCK_X_OS_WAITS: + value = rw_lock_stats.rw_x_os_wait_count; + break; + + case MONITOR_OVLD_BUFFER_POOL_SIZE: + value = srv_buf_pool_size; + break; + + /* innodb_rows_read */ + case MONITOR_OLVD_ROW_READ: + value = srv_stats.n_rows_read; + break; + + /* innodb_rows_inserted */ + case MONITOR_OLVD_ROW_INSERTED: + value = srv_stats.n_rows_inserted; + break; + + /* innodb_rows_deleted */ + case MONITOR_OLVD_ROW_DELETED: + value = srv_stats.n_rows_deleted; + break; + + /* innodb_rows_updated */ + case MONITOR_OLVD_ROW_UPDTATED: + value = srv_stats.n_rows_updated; + break; + + /* innodb_row_lock_current_waits */ + case MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT: + value = srv_stats.n_lock_wait_current_count; + break; + + /* innodb_row_lock_time */ + case MONITOR_OVLD_LOCK_WAIT_TIME: + value = srv_stats.n_lock_wait_time / 1000; + break; + + /* innodb_row_lock_time_max */ + case MONITOR_OVLD_LOCK_MAX_WAIT_TIME: + value = lock_sys->n_lock_max_wait_time / 1000; + break; + + /* innodb_row_lock_time_avg */ + case MONITOR_OVLD_LOCK_AVG_WAIT_TIME: + if (srv_stats.n_lock_wait_count > 0) { + value = srv_stats.n_lock_wait_time / 1000 + / srv_stats.n_lock_wait_count; + } else { + value = 0; + } + break; + + /* innodb_row_lock_waits */ + case MONITOR_OVLD_ROW_LOCK_WAIT: + value = srv_stats.n_lock_wait_count; + break; + + case MONITOR_RSEG_HISTORY_LEN: + value = trx_sys->rseg_history_len; + break; + + case MONITOR_RSEG_CUR_SIZE: + value = srv_mon_get_rseg_size(); + break; + + case MONITOR_OVLD_N_FILE_OPENED: + value = fil_n_file_opened; + break; + + case MONITOR_OVLD_IBUF_MERGE_INSERT: + value = ibuf->n_merged_ops[IBUF_OP_INSERT]; + break; + + case MONITOR_OVLD_IBUF_MERGE_DELETE: + value = ibuf->n_merged_ops[IBUF_OP_DELETE_MARK]; + break; + + case MONITOR_OVLD_IBUF_MERGE_PURGE: + value = ibuf->n_merged_ops[IBUF_OP_DELETE]; + break; + + case MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT: + value = ibuf->n_discarded_ops[IBUF_OP_INSERT]; + break; + + case MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE: + value = ibuf->n_discarded_ops[IBUF_OP_DELETE_MARK]; + break; + + case MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE: + value = ibuf->n_discarded_ops[IBUF_OP_DELETE]; + break; + + case MONITOR_OVLD_IBUF_MERGES: + value = ibuf->n_merges; + break; + + case MONITOR_OVLD_IBUF_SIZE: + value = ibuf->size; + break; + + case MONITOR_OVLD_SERVER_ACTIVITY: + value = srv_get_activity_count(); + break; + + case MONITOR_OVLD_LSN_FLUSHDISK: + value = (mon_type_t) log_sys->flushed_to_disk_lsn; + break; + + case MONITOR_OVLD_LSN_CURRENT: + value = (mon_type_t) log_sys->lsn; + break; + + case MONITOR_OVLD_BUF_OLDEST_LSN: + value = (mon_type_t) buf_pool_get_oldest_modification(); + break; + + case MONITOR_OVLD_LSN_CHECKPOINT: + value = (mon_type_t) log_sys->last_checkpoint_lsn; + break; + + case MONITOR_OVLD_MAX_AGE_ASYNC: + value = log_sys->max_modified_age_async; + break; + + case MONITOR_OVLD_MAX_AGE_SYNC: + value = log_sys->max_modified_age_sync; + break; + + case MONITOR_OVLD_ADAPTIVE_HASH_SEARCH: + value = btr_cur_n_sea; + break; + + case MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE: + value = btr_cur_n_non_sea; + break; + + default: + ut_error; + } + + switch (set_option) { + case MONITOR_TURN_ON: + /* Save the initial counter value in mon_start_value + field */ + MONITOR_SAVE_START(monitor_id, value); + return; + + case MONITOR_TURN_OFF: + /* Save the counter value to mon_last_value when we + turn off the monitor but not yet reset. Note the + counter has not yet been set to off in the bitmap + table for normal turn off. We need to check the + count status (on/off) to avoid reset the value + for an already off conte */ + if (MONITOR_IS_ON(monitor_id)) { + srv_mon_process_existing_counter(monitor_id, + MONITOR_GET_VALUE); + MONITOR_SAVE_LAST(monitor_id); + } + return; + + case MONITOR_GET_VALUE: + if (MONITOR_IS_ON(monitor_id)) { + + /* If MONITOR_DISPLAY_CURRENT bit is on, we + only record the current value, rather than + incremental value over a period. Most of +` this type of counters are resource related + counters such as number of buffer pages etc. */ + if (monitor_info->monitor_type + & MONITOR_DISPLAY_CURRENT) { + MONITOR_SET(monitor_id, value); + } else { + /* Most status counters are montonically + increasing, no need to update their + minimum values. Only do so + if "update_min" set to TRUE */ + MONITOR_SET_DIFF(monitor_id, value); + + if (update_min + && (MONITOR_VALUE(monitor_id) + < MONITOR_MIN_VALUE(monitor_id))) { + MONITOR_MIN_VALUE(monitor_id) = + MONITOR_VALUE(monitor_id); + } + } + } + return; + + case MONITOR_RESET_VALUE: + if (!MONITOR_IS_ON(monitor_id)) { + MONITOR_LAST_VALUE(monitor_id) = 0; + } + return; + + /* Nothing special for reset all operation for these existing + counters */ + case MONITOR_RESET_ALL_VALUE: + return; + } +} + +/*************************************************************//** +Reset a monitor, create a new base line with the current monitor +value. This baseline is recorded by MONITOR_VALUE_RESET(monitor) */ +UNIV_INTERN +void +srv_mon_reset( +/*==========*/ + monitor_id_t monitor) /*!< in: monitor id */ +{ + ibool monitor_was_on; + + monitor_was_on = MONITOR_IS_ON(monitor); + + if (monitor_was_on) { + /* Temporarily turn off the counter for the resetting + operation */ + MONITOR_OFF(monitor); + } + + /* Before resetting the current monitor value, first + calculate and set the max/min value since monitor + start */ + srv_mon_calc_max_since_start(monitor); + srv_mon_calc_min_since_start(monitor); + + /* Monitors with MONITOR_DISPLAY_CURRENT bit + are not incremental, no need to remember + the reset value. */ + if (innodb_counter_info[monitor].monitor_type + & MONITOR_DISPLAY_CURRENT) { + MONITOR_VALUE_RESET(monitor) = 0; + } else { + /* Remember the new baseline */ + MONITOR_VALUE_RESET(monitor) = MONITOR_VALUE_RESET(monitor) + + MONITOR_VALUE(monitor); + } + + /* Reset the counter value */ + MONITOR_VALUE(monitor) = 0; + MONITOR_MAX_VALUE(monitor) = MAX_RESERVED; + MONITOR_MIN_VALUE(monitor) = MIN_RESERVED; + + MONITOR_FIELD((monitor), mon_reset_time) = time(NULL); + + if (monitor_was_on) { + MONITOR_ON(monitor); + } +} + +/*************************************************************//** +Turn on monitor counters that are marked as default ON. */ +UNIV_INTERN +void +srv_mon_default_on(void) +/*====================*/ +{ + ulint ix; + + for (ix = 0; ix < NUM_MONITOR; ix++) { + if (innodb_counter_info[ix].monitor_type + & MONITOR_DEFAULT_ON) { + /* Turn on monitor counters that are default on */ + MONITOR_ON(ix); + MONITOR_INIT(ix); + MONITOR_SET_START(ix); + } + } +} +#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc new file mode 100644 index 00000000000..806c3aea70a --- /dev/null +++ b/storage/xtradb/srv/srv0srv.cc @@ -0,0 +1,3511 @@ +/***************************************************************************** + +Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 2008, 2009 Google Inc. +Copyright (c) 2009, Percona Inc. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +Portions of this file contain modifications contributed and copyrighted +by Percona Inc.. Those modifications are +gratefully acknowledged and are described briefly in the InnoDB +documentation. The contributions by Percona Inc. are incorporated with +their permission, and subject to the conditions contained in the file +COPYING.Percona. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file srv/srv0srv.cc +The database server main program + +Created 10/8/1995 Heikki Tuuri +*******************************************************/ + +/* Dummy comment */ +#include "srv0srv.h" + +#include "ut0mem.h" +#include "ut0ut.h" +#include "os0proc.h" +#include "mem0mem.h" +#include "mem0pool.h" +#include "sync0sync.h" +#include "que0que.h" +#include "log0online.h" +#include "log0recv.h" +#include "pars0pars.h" +#include "usr0sess.h" +#include "lock0lock.h" +#include "trx0purge.h" +#include "ibuf0ibuf.h" +#include "buf0flu.h" +#include "buf0lru.h" +#include "btr0sea.h" +#include "dict0load.h" +#include "dict0boot.h" +#include "dict0stats_bg.h" /* dict_stats_event */ +#include "srv0start.h" +#include "row0mysql.h" +#include "ha_prototypes.h" +#include "trx0i_s.h" +#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */ +#include "srv0mon.h" +#include "ut0crc32.h" +#include "os0file.h" + +#include "mysql/plugin.h" +#include "mysql/service_thd_wait.h" + +/* prototypes of new functions added to ha_innodb.cc for kill_idle_transaction */ +ibool innobase_thd_is_idle(const void* thd); +ib_int64_t innobase_thd_get_start_time(const void* thd); +void innobase_thd_kill(ulong thd_id); +ulong innobase_thd_get_thread_id(const void* thd); + +/* prototypes for new functions added to ha_innodb.cc */ +ibool innobase_get_slow_log(); + +/* The following is the maximum allowed duration of a lock wait. */ +UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600; + +/**/ +UNIV_INTERN lint srv_kill_idle_transaction = 0; + +/* How much data manipulation language (DML) statements need to be delayed, +in microseconds, in order to reduce the lagging of the purge thread. */ +UNIV_INTERN ulint srv_dml_needed_delay = 0; + +UNIV_INTERN ibool srv_monitor_active = FALSE; +UNIV_INTERN ibool srv_error_monitor_active = FALSE; + +UNIV_INTERN ibool srv_buf_dump_thread_active = FALSE; + +UNIV_INTERN ibool srv_dict_stats_thread_active = FALSE; + +UNIV_INTERN const char* srv_main_thread_op_info = ""; + +/** Prefix used by MySQL to indicate pre-5.1 table name encoding */ +const char srv_mysql50_table_name_prefix[10] = "#mysql50#"; + +/* Server parameters which are read from the initfile */ + +/* The following three are dir paths which are catenated before file +names, where the file name itself may also contain a path */ + +UNIV_INTERN char* srv_data_home = NULL; + +/** Rollback files directory, can be absolute. */ +UNIV_INTERN char* srv_undo_dir = NULL; + +/** The number of tablespaces to use for rollback segments. */ +UNIV_INTERN ulong srv_undo_tablespaces = 8; + +/** The number of UNDO tablespaces that are open and ready to use. */ +UNIV_INTERN ulint srv_undo_tablespaces_open = 8; + +/* The number of rollback segments to use */ +UNIV_INTERN ulong srv_undo_logs = 1; + +#ifdef UNIV_LOG_ARCHIVE +UNIV_INTERN char* srv_arch_dir = NULL; +UNIV_INTERN ulong srv_log_arch_expire_sec = 0; +#endif /* UNIV_LOG_ARCHIVE */ + +/** Set if InnoDB must operate in read-only mode. We don't do any +recovery and open all tables in RO mode instead of RW mode. We don't +sync the max trx id to disk either. */ +UNIV_INTERN my_bool srv_read_only_mode; +/** store to its own file each table created by an user; data +dictionary tables are in the system tablespace 0 */ +UNIV_INTERN my_bool srv_file_per_table; +/** The file format to use on new *.ibd files. */ +UNIV_INTERN ulint srv_file_format = 0; +/** Whether to check file format during startup. A value of +UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to +set it to the highest format we support. */ +UNIV_INTERN ulint srv_max_file_format_at_startup = UNIV_FORMAT_MAX; + +#if UNIV_FORMAT_A +# error "UNIV_FORMAT_A must be 0!" +#endif + +/** Place locks to records only i.e. do not use next-key locking except +on duplicate key checking and foreign key checking */ +UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE; +/** Sort buffer size in index creation */ +UNIV_INTERN ulong srv_sort_buf_size = 1048576; +/** Maximum modification log file size for online index creation */ +UNIV_INTERN unsigned long long srv_online_max_size; + +/* If this flag is TRUE, then we will use the native aio of the +OS (provided we compiled Innobase with it in), otherwise we will +use simulated aio we build below with threads. +Currently we support native aio on windows and linux */ +UNIV_INTERN my_bool srv_use_native_aio = TRUE; + +#ifdef __WIN__ +/* Windows native condition variables. We use runtime loading / function +pointers, because they are not available on Windows Server 2003 and +Windows XP/2000. + +We use condition for events on Windows if possible, even if os_event +resembles Windows kernel event object well API-wise. The reason is +performance, kernel objects are heavyweights and WaitForSingleObject() is a +performance killer causing calling thread to context switch. Besides, Innodb +is preallocating large number (often millions) of os_events. With kernel event +objects it takes a big chunk out of non-paged pool, which is better suited +for tasks like IO than for storing idle event objects. */ +UNIV_INTERN ibool srv_use_native_conditions = FALSE; +#endif /* __WIN__ */ + +UNIV_INTERN ulint srv_n_data_files = 0; +UNIV_INTERN char** srv_data_file_names = NULL; +/* size in database pages */ +UNIV_INTERN ulint* srv_data_file_sizes = NULL; + +UNIV_INTERN my_bool srv_track_changed_pages = FALSE; + +UNIV_INTERN ulonglong srv_max_bitmap_file_size = 100 * 1024 * 1024; + +UNIV_INTERN ulonglong srv_max_changed_pages = 0; + +/** When TRUE, fake change transcations take S rather than X row locks. + When FALSE, row locks are not taken at all. */ +UNIV_INTERN my_bool srv_fake_changes_locks = TRUE; + +/* if TRUE, then we auto-extend the last data file */ +UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE; +/* if != 0, this tells the max size auto-extending may increase the +last data file size */ +UNIV_INTERN ulint srv_last_file_size_max = 0; +/* If the last data file is auto-extended, we add this +many pages to it at a time */ +UNIV_INTERN ulong srv_auto_extend_increment = 8; +UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL; + +/* If the following is TRUE we do not allow inserts etc. This protects +the user from forgetting the 'newraw' keyword to my.cnf */ + +UNIV_INTERN ibool srv_created_new_raw = FALSE; + +UNIV_INTERN char* srv_log_group_home_dir = NULL; + +UNIV_INTERN ulong srv_n_log_files = SRV_N_LOG_FILES_MAX; +/* size in database pages */ +UNIV_INTERN ib_uint64_t srv_log_file_size = IB_UINT64_MAX; +UNIV_INTERN ib_uint64_t srv_log_file_size_requested; +/* size in database pages */ +UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX; +UNIV_INTERN uint srv_flush_log_at_timeout = 1; +UNIV_INTERN ulong srv_page_size = UNIV_PAGE_SIZE_DEF; +UNIV_INTERN ulong srv_page_size_shift = UNIV_PAGE_SIZE_SHIFT_DEF; +UNIV_INTERN char srv_use_global_flush_log_at_trx_commit = TRUE; + +/* Try to flush dirty pages so as to avoid IO bursts at +the checkpoints. */ +UNIV_INTERN char srv_adaptive_flushing = TRUE; + +UNIV_INTERN ulint srv_show_locks_held = 10; +UNIV_INTERN ulint srv_show_verbose_locks = 0; + +/** Maximum number of times allowed to conditionally acquire +mutex before switching to blocking wait on the mutex */ +#define MAX_MUTEX_NOWAIT 20 + +/** Check whether the number of failed nonblocking mutex +acquisition attempts exceeds maximum allowed value. If so, +srv_printf_innodb_monitor() will request mutex acquisition +with mutex_enter(), which will wait until it gets the mutex. */ +#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT) + +/** The sort order table of the MySQL latin1_swedish_ci character set +collation */ +UNIV_INTERN const byte* srv_latin1_ordering; + +/* use os/external memory allocator */ +UNIV_INTERN my_bool srv_use_sys_malloc = TRUE; +/* requested size in kilobytes */ +UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX; +/* force virtual page preallocation (prefault) */ +UNIV_INTERN my_bool srv_buf_pool_populate = FALSE; +/* requested number of buffer pool instances */ +UNIV_INTERN ulint srv_buf_pool_instances = 1; +/* number of locks to protect buf_pool->page_hash */ +UNIV_INTERN ulong srv_n_page_hash_locks = 16; +/** Scan depth for LRU flush batch i.e.: number of blocks scanned*/ +UNIV_INTERN ulong srv_LRU_scan_depth = 1024; +/** whether or not to flush neighbors of a block */ +UNIV_INTERN ulong srv_flush_neighbors = 1; +/* previously requested size */ +UNIV_INTERN ulint srv_buf_pool_old_size; +/* current size in kilobytes */ +UNIV_INTERN ulint srv_buf_pool_curr_size = 0; +/* size in bytes */ +UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX; +UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX; + +/** Query thread preflush algorithm */ +UNIV_INTERN ulint srv_foreground_preflush + = SRV_FOREGROUND_PREFLUSH_EXP_BACKOFF; + +/** The maximum time limit for a single LRU tail flush iteration by the page +cleaner thread */ +UNIV_INTERN ulint srv_cleaner_max_lru_time = 1000; + +/** The maximum time limit for a single flush list flush iteration by the page +cleaner thread */ +UNIV_INTERN ulint srv_cleaner_max_flush_time = 1000; + +/** Page cleaner flush list flush batches are further divided into this chunk +size */ +UNIV_INTERN ulint srv_cleaner_flush_chunk_size = 100; + +/** Page cleaner LRU list flush batches are further divided into this chunk +size */ +UNIV_INTERN ulint srv_cleaner_lru_chunk_size = 100; + +/** If free list length is lower than this percentage of srv_LRU_scan_depth, +page cleaner LRU flushes will issue flush batches to the same instance in a +row */ +UNIV_INTERN ulint srv_cleaner_free_list_lwm = 10; + +/** If TRUE, page cleaner heuristics use evicted instead of flushed page counts +for its heuristics */ +UNIV_INTERN my_bool srv_cleaner_eviction_factor = FALSE; + +/** Page cleaner LSN age factor formula option */ +UNIV_INTERN ulong srv_cleaner_lsn_age_factor + = SRV_CLEANER_LSN_AGE_FACTOR_HIGH_CHECKPOINT; + +/** Empty free list for a query thread handling algorithm option */ +UNIV_INTERN ulong srv_empty_free_list_algorithm + = SRV_EMPTY_FREE_LIST_BACKOFF; + +/* This parameter is deprecated. Use srv_n_io_[read|write]_threads +instead. */ +UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX; +UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX; +UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX; + +/* Switch to enable random read ahead. */ +UNIV_INTERN my_bool srv_random_read_ahead = FALSE; + +/* The log block size */ +UNIV_INTERN ulint srv_log_block_size = 0; + +/* User settable value of the number of pages that must be present +in the buffer cache and accessed sequentially for InnoDB to trigger a +readahead request. */ +UNIV_INTERN ulong srv_read_ahead_threshold = 56; + +#ifdef UNIV_LOG_ARCHIVE +UNIV_INTERN ibool srv_log_archive_on = FALSE; +UNIV_INTERN ibool srv_archive_recovery = 0; +UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn; +#endif /* UNIV_LOG_ARCHIVE */ + +/* This parameter is used to throttle the number of insert buffers that are +merged in a batch. By increasing this parameter on a faster disk you can +possibly reduce the number of I/O operations performed to complete the +merge operation. The value of this parameter is used as is by the +background loop when the system is idle (low load), on a busy system +the parameter is scaled down by a factor of 4, this is to avoid putting +a heavier load on the I/O sub system. */ + +UNIV_INTERN ulong srv_insert_buffer_batch_size = 20; + +UNIV_INTERN char* srv_file_flush_method_str = NULL; +UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC; +UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; + +UNIV_INTERN ulint srv_max_n_open_files = 300; + +/* Number of IO operations per second the server can do */ +UNIV_INTERN ulong srv_io_capacity = 200; +UNIV_INTERN ulong srv_max_io_capacity = 400; + +/* The InnoDB main thread tries to keep the ratio of modified pages +in the buffer pool to all database pages in the buffer pool smaller than +the following number. But it is not guaranteed that the value stays below +that during a time of heavy update/insert activity. */ + +UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75; +UNIV_INTERN ulong srv_max_dirty_pages_pct_lwm = 50; + +/* This is the percentage of log capacity at which adaptive flushing, +if enabled, will kick in. */ +UNIV_INTERN ulong srv_adaptive_flushing_lwm = 10; + +/* Number of iterations over which adaptive flushing is averaged. */ +UNIV_INTERN ulong srv_flushing_avg_loops = 30; + +/* The tid of the cleaner thread */ +UNIV_INTERN os_tid_t srv_cleaner_tid; + +/* The tid of the LRU manager thread */ +UNIV_INTERN os_tid_t srv_lru_manager_tid; + +/* The tids of the purge threads */ +UNIV_INTERN os_tid_t srv_purge_tids[SRV_MAX_N_PURGE_THREADS]; + +/* The tids of the I/O threads */ +UNIV_INTERN os_tid_t srv_io_tids[SRV_MAX_N_IO_THREADS]; + +/* The tid of the master thread */ +UNIV_INTERN os_tid_t srv_master_tid; + +/* The relative scheduling priority of the cleaner and LRU manager threads */ +UNIV_INTERN ulint srv_sched_priority_cleaner = 19; + +/* The relative scheduling priority of the purge threads */ +UNIV_INTERN ulint srv_sched_priority_purge = 19; + +/* The relative scheduling priority of the I/O threads */ +UNIV_INTERN ulint srv_sched_priority_io = 19; + +/* The relative scheduling priority of the master thread */ +UNIV_INTERN ulint srv_sched_priority_master = 19; + +/* The relative priority of the current thread. If 0, low priority; if 1, high +priority. */ +UNIV_INTERN UNIV_THREAD_LOCAL ulint srv_current_thread_priority = 0; + +/* The relative priority of the purge coordinator and worker threads. */ +UNIV_INTERN my_bool srv_purge_thread_priority = FALSE; + +/* The relative priority of the I/O threads. */ +UNIV_INTERN my_bool srv_io_thread_priority = FALSE; + +/* The relative priority of the cleaner thread. */ +UNIV_INTERN my_bool srv_cleaner_thread_priority = FALSE; + +/* The relative priority of the master thread. */ +UNIV_INTERN my_bool srv_master_thread_priority = FALSE; + +/* The number of purge threads to use.*/ +UNIV_INTERN ulong srv_n_purge_threads = 1; + +/* the number of pages to purge in one batch */ +UNIV_INTERN ulong srv_purge_batch_size = 20; + +/* Internal setting for "innodb_stats_method". Decides how InnoDB treats +NULL value when collecting statistics. By default, it is set to +SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */ +UNIV_INTERN ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL; + +UNIV_INTERN srv_stats_t srv_stats; + +/* structure to pass status variables to MySQL */ +UNIV_INTERN export_var_t export_vars; + +/** Normally 0. When nonzero, skip some phases of crash recovery, +starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered +by SELECT or mysqldump. When this is nonzero, we do not allow any user +modifications to the data. */ +UNIV_INTERN ulong srv_force_recovery; +#ifndef DBUG_OFF +/** Inject a crash at different steps of the recovery process. +This is for testing and debugging only. */ +UNIV_INTERN ulong srv_force_recovery_crash; +#endif /* !DBUG_OFF */ + +/** Print all user-level transactions deadlocks to mysqld stderr */ + +UNIV_INTERN my_bool srv_print_all_deadlocks = FALSE; + +/** Enable INFORMATION_SCHEMA.innodb_cmp_per_index */ +UNIV_INTERN my_bool srv_cmp_per_index_enabled = FALSE; + +/* If the following is set to 1 then we do not run purge and insert buffer +merge to completion before shutdown. If it is set to 2, do not even flush the +buffer pool to data files at the shutdown: we effectively 'crash' +InnoDB (but lose no committed transactions). */ +UNIV_INTERN ulint srv_fast_shutdown = 0; + +/* Generate a innodb_status.<pid> file */ +UNIV_INTERN ibool srv_innodb_status = FALSE; + +/* When estimating number of different key values in an index, sample +this many index pages, there are 2 ways to calculate statistics: +* persistent stats that are calculated by ANALYZE TABLE and saved + in the innodb database. +* quick transient stats, that are used if persistent stats for the given + table/index are not found in the innodb database */ +UNIV_INTERN unsigned long long srv_stats_transient_sample_pages = 8; +UNIV_INTERN my_bool srv_stats_persistent = TRUE; +UNIV_INTERN unsigned long long srv_stats_persistent_sample_pages = 20; +UNIV_INTERN my_bool srv_stats_auto_recalc = TRUE; + +UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE; +UNIV_INTERN ibool srv_use_atomic_writes = FALSE; +#ifdef HAVE_POSIX_FALLOCATE +UNIV_INTERN ibool srv_use_posix_fallocate = FALSE; +#endif + +/** doublewrite buffer is 1MB is size i.e.: it can hold 128 16K pages. +The following parameter is the size of the buffer that is used for +batch flushing i.e.: LRU flushing and flush_list flushing. The rest +of the pages are used for single page flushing. */ +UNIV_INTERN ulong srv_doublewrite_batch_size = 120; + +UNIV_INTERN ulong srv_replication_delay = 0; + +UNIV_INTERN ulint srv_pass_corrupt_table = 0; /* 0:disable 1:enable */ + +UNIV_INTERN ulint srv_log_checksum_algorithm = + SRV_CHECKSUM_ALGORITHM_INNODB; + +/*-------------------------------------------*/ +UNIV_INTERN ulong srv_n_spin_wait_rounds = 30; +UNIV_INTERN ulong srv_spin_wait_delay = 6; +UNIV_INTERN ibool srv_priority_boost = TRUE; + +#ifdef UNIV_DEBUG +UNIV_INTERN ibool srv_print_thread_releases = FALSE; +UNIV_INTERN ibool srv_print_lock_waits = FALSE; +UNIV_INTERN ibool srv_print_buf_io = FALSE; +UNIV_INTERN ibool srv_print_log_io = FALSE; +UNIV_INTERN ibool srv_print_latch_waits = FALSE; +#endif /* UNIV_DEBUG */ + +static ulint srv_n_rows_inserted_old = 0; +static ulint srv_n_rows_updated_old = 0; +static ulint srv_n_rows_deleted_old = 0; +static ulint srv_n_rows_read_old = 0; + +UNIV_INTERN ulint srv_truncated_status_writes = 0; +UNIV_INTERN ulint srv_available_undo_logs = 0; + +/* Ensure status variables are on separate cache lines */ + +#define CACHE_LINE_SIZE 64 +#define CACHE_ALIGNED __attribute__ ((aligned (CACHE_LINE_SIZE))) + +UNIV_INTERN byte +counters_pad_start[CACHE_LINE_SIZE] __attribute__((unused)) = {0}; + +UNIV_INTERN ulint srv_read_views_memory CACHE_ALIGNED = 0; +UNIV_INTERN ulint srv_descriptors_memory CACHE_ALIGNED = 0; + +UNIV_INTERN byte +counters_pad_end[CACHE_LINE_SIZE] __attribute__((unused)) = {0}; + +/* Set the following to 0 if you want InnoDB to write messages on +stderr on startup/shutdown. */ +UNIV_INTERN ibool srv_print_verbose_log = TRUE; +UNIV_INTERN my_bool srv_print_innodb_monitor = FALSE; +UNIV_INTERN my_bool srv_print_innodb_lock_monitor = FALSE; +UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE; +UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE; + +/* Array of English strings describing the current state of an +i/o handler thread */ + +UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS]; +UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS]; + +UNIV_INTERN time_t srv_last_monitor_time; + +UNIV_INTERN ib_mutex_t srv_innodb_monitor_mutex; + +/* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */ +UNIV_INTERN ib_mutex_t srv_monitor_file_mutex; + +#ifdef UNIV_PFS_MUTEX +# ifndef HAVE_ATOMIC_BUILTINS +/* Key to register server_mutex with performance schema */ +UNIV_INTERN mysql_pfs_key_t server_mutex_key; +# endif /* !HAVE_ATOMIC_BUILTINS */ +/** Key to register srv_innodb_monitor_mutex with performance schema */ +UNIV_INTERN mysql_pfs_key_t srv_innodb_monitor_mutex_key; +/** Key to register srv_monitor_file_mutex with performance schema */ +UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key; +/** Key to register srv_dict_tmpfile_mutex with performance schema */ +UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key; +/** Key to register the mutex with performance schema */ +UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key; +/** Key to register srv_sys_t::mutex with performance schema */ +UNIV_INTERN mysql_pfs_key_t srv_sys_mutex_key; +/** Key to register srv_sys_t::tasks_mutex with performance schema */ +UNIV_INTERN mysql_pfs_key_t srv_sys_tasks_mutex_key; +#endif /* UNIV_PFS_MUTEX */ + +/** Temporary file for innodb monitor output */ +UNIV_INTERN FILE* srv_monitor_file; +/** Mutex for locking srv_dict_tmpfile. Not created if srv_read_only_mode. +This mutex has a very high rank; threads reserving it should not +be holding any InnoDB latches. */ +UNIV_INTERN ib_mutex_t srv_dict_tmpfile_mutex; +/** Temporary file for output from the data dictionary */ +UNIV_INTERN FILE* srv_dict_tmpfile; +/** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode. +This mutex has a very low rank; threads reserving it should not +acquire any further latches or sleep before releasing this one. */ +UNIV_INTERN ib_mutex_t srv_misc_tmpfile_mutex; +/** Temporary file for miscellanous diagnostic output */ +UNIV_INTERN FILE* srv_misc_tmpfile; + +UNIV_INTERN ulint srv_main_thread_process_no = 0; +UNIV_INTERN ulint srv_main_thread_id = 0; + +/* The following counts are used by the srv_master_thread. */ + +/** Iterations of the loop bounded by 'srv_active' label. */ +static ulint srv_main_active_loops = 0; +/** Iterations of the loop bounded by the 'srv_idle' label. */ +static ulint srv_main_idle_loops = 0; +/** Iterations of the loop bounded by the 'srv_shutdown' label. */ +static ulint srv_main_shutdown_loops = 0; +/** Log writes involving flush. */ +static ulint srv_log_writes_and_flush = 0; + +/* This is only ever touched by the master thread. It records the +time when the last flush of log file has happened. The master +thread ensures that we flush the log files at least once per +second. */ +static time_t srv_last_log_flush_time; + +/* Interval in seconds at which various tasks are performed by the +master thread when server is active. In order to balance the workload, +we should try to keep intervals such that they are not multiple of +each other. For example, if we have intervals for various tasks +defined as 5, 10, 15, 60 then all tasks will be performed when +current_time % 60 == 0 and no tasks will be performed when +current_time % 5 != 0. */ + +# define SRV_MASTER_CHECKPOINT_INTERVAL (7) +# define SRV_MASTER_PURGE_INTERVAL (10) +#ifdef MEM_PERIODIC_CHECK +# define SRV_MASTER_MEM_VALIDATE_INTERVAL (13) +#endif /* MEM_PERIODIC_CHECK */ +# define SRV_MASTER_DICT_LRU_INTERVAL (47) + +/** Acquire the system_mutex. */ +#define srv_sys_mutex_enter() do { \ + mutex_enter(&srv_sys->mutex); \ +} while (0) + +/** Test if the system mutex is owned. */ +#define srv_sys_mutex_own() (mutex_own(&srv_sys->mutex) \ + && !srv_read_only_mode) + +/** Release the system mutex. */ +#define srv_sys_mutex_exit() do { \ + mutex_exit(&srv_sys->mutex); \ +} while (0) + +#define fetch_lock_wait_timeout(trx) \ + ((trx)->lock.allowed_to_wait \ + ? thd_lock_wait_timeout((trx)->mysql_thd) \ + : 0) + +/* + IMPLEMENTATION OF THE SERVER MAIN PROGRAM + ========================================= + +There is the following analogue between this database +server and an operating system kernel: + +DB concept equivalent OS concept +---------- --------------------- +transaction -- process; + +query thread -- thread; + +lock -- semaphore; + +kernel -- kernel; + +query thread execution: +(a) without lock mutex +reserved -- process executing in user mode; +(b) with lock mutex reserved + -- process executing in kernel mode; + +The server has several backgroind threads all running at the same +priority as user threads. It periodically checks if here is anything +happening in the server which requires intervention of the master +thread. Such situations may be, for example, when flushing of dirty +blocks is needed in the buffer pool or old version of database rows +have to be cleaned away (purged). The user can configure a separate +dedicated purge thread(s) too, in which case the master thread does not +do any purging. + +The threads which we call user threads serve the queries of the MySQL +server. They run at normal priority. + +When there is no activity in the system, also the master thread +suspends itself to wait for an event making the server totally silent. + +There is still one complication in our server design. If a +background utility thread obtains a resource (e.g., mutex) needed by a user +thread, and there is also some other user activity in the system, +the user thread may have to wait indefinitely long for the +resource, as the OS does not schedule a background thread if +there is some other runnable user thread. This problem is called +priority inversion in real-time programming. + +One solution to the priority inversion problem would be to keep record +of which thread owns which resource and in the above case boost the +priority of the background thread so that it will be scheduled and it +can release the resource. This solution is called priority inheritance +in real-time programming. A drawback of this solution is that the overhead +of acquiring a mutex increases slightly, maybe 0.2 microseconds on a 100 +MHz Pentium, because the thread has to call os_thread_get_curr_id. This may +be compared to 0.5 microsecond overhead for a mutex lock-unlock pair. Note +that the thread cannot store the information in the resource , say mutex, +itself, because competing threads could wipe out the information if it is +stored before acquiring the mutex, and if it stored afterwards, the +information is outdated for the time of one machine instruction, at least. +(To be precise, the information could be stored to lock_word in mutex if +the machine supports atomic swap.) + +The above solution with priority inheritance may become actual in the +future, currently we do not implement any priority twiddling solution. +Our general aim is to reduce the contention of all mutexes by making +them more fine grained. + +The thread table contains information of the current status of each +thread existing in the system, and also the event semaphores used in +suspending the master thread and utility threads when they have nothing +to do. The thread table can be seen as an analogue to the process table +in a traditional Unix implementation. */ + +/** The server system struct */ +struct srv_sys_t{ + ib_mutex_t tasks_mutex; /*!< variable protecting the + tasks queue */ + UT_LIST_BASE_NODE_T(que_thr_t) + tasks; /*!< task queue */ + + ib_mutex_t mutex; /*!< variable protecting the + fields below. */ + ulint n_sys_threads; /*!< size of the sys_threads + array */ + + srv_slot_t* sys_threads; /*!< server thread table */ + + ulint n_threads_active[SRV_MASTER + 1]; + /*!< number of threads active + in a thread class */ + + srv_stats_t::ulint_ctr_1_t + activity_count; /*!< For tracking server + activity */ +}; + +#ifndef HAVE_ATOMIC_BUILTINS +/** Mutex protecting some server global variables. */ +UNIV_INTERN ib_mutex_t server_mutex; +#endif /* !HAVE_ATOMIC_BUILTINS */ + +static srv_sys_t* srv_sys = NULL; + +/** Event to signal the monitor thread. */ +UNIV_INTERN os_event_t srv_monitor_event; + +/** Event to signal the error thread */ +UNIV_INTERN os_event_t srv_error_event; + +/** Event to signal the buffer pool dump/load thread */ +UNIV_INTERN os_event_t srv_buf_dump_event; + +/** The buffer pool dump/load file name */ +UNIV_INTERN char* srv_buf_dump_filename; + +/** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown +and/or load it during startup. */ +UNIV_INTERN char srv_buffer_pool_dump_at_shutdown = FALSE; +UNIV_INTERN char srv_buffer_pool_load_at_startup = FALSE; + +/** Slot index in the srv_sys->sys_threads array for the purge thread. */ +static const ulint SRV_PURGE_SLOT = 1; + +/** Slot index in the srv_sys->sys_threads array for the master thread. */ +static const ulint SRV_MASTER_SLOT = 0; + +UNIV_INTERN os_event_t srv_checkpoint_completed_event; + +UNIV_INTERN os_event_t srv_redo_log_tracked_event; + +UNIV_INTERN bool srv_redo_log_thread_started = false; + +/*********************************************************************//** +Prints counters for work done by srv_master_thread. */ +static +void +srv_print_master_thread_info( +/*=========================*/ + FILE *file) /* in: output stream */ +{ + fprintf(file, "srv_master_thread loops: %lu srv_active, " + "%lu srv_shutdown, %lu srv_idle\n", + srv_main_active_loops, + srv_main_shutdown_loops, + srv_main_idle_loops); + fprintf(file, "srv_master_thread log flush and writes: %lu\n", + srv_log_writes_and_flush); +} + +/*********************************************************************//** +Sets the info describing an i/o thread current state. */ +UNIV_INTERN +void +srv_set_io_thread_op_info( +/*======================*/ + ulint i, /*!< in: the 'segment' of the i/o thread */ + const char* str) /*!< in: constant char string describing the + state */ +{ + ut_a(i < SRV_MAX_N_IO_THREADS); + + srv_io_thread_op_info[i] = str; +} + +/*********************************************************************//** +Resets the info describing an i/o thread current state. */ +UNIV_INTERN +void +srv_reset_io_thread_op_info() +/*=========================*/ +{ + for (ulint i = 0; i < UT_ARR_SIZE(srv_io_thread_op_info); ++i) { + srv_io_thread_op_info[i] = "not started yet"; + } +} + +#ifdef UNIV_DEBUG +/*********************************************************************//** +Validates the type of a thread table slot. +@return TRUE if ok */ +static +ibool +srv_thread_type_validate( +/*=====================*/ + srv_thread_type type) /*!< in: thread type */ +{ + switch (type) { + case SRV_NONE: + break; + case SRV_WORKER: + case SRV_PURGE: + case SRV_MASTER: + return(TRUE); + } + ut_error; + return(FALSE); +} +#endif /* UNIV_DEBUG */ + +/*********************************************************************//** +Gets the type of a thread table slot. +@return thread type */ +static +srv_thread_type +srv_slot_get_type( +/*==============*/ + const srv_slot_t* slot) /*!< in: thread slot */ +{ + srv_thread_type type = slot->type; + ut_ad(srv_thread_type_validate(type)); + return(type); +} + +/*********************************************************************//** +Reserves a slot in the thread table for the current thread. +@return reserved slot */ +static +srv_slot_t* +srv_reserve_slot( +/*=============*/ + srv_thread_type type) /*!< in: type of the thread */ +{ + srv_slot_t* slot = 0; + + srv_sys_mutex_enter(); + + ut_ad(srv_thread_type_validate(type)); + + switch (type) { + case SRV_MASTER: + slot = &srv_sys->sys_threads[SRV_MASTER_SLOT]; + break; + + case SRV_PURGE: + slot = &srv_sys->sys_threads[SRV_PURGE_SLOT]; + break; + + case SRV_WORKER: + /* Find an empty slot, skip the master and purge slots. */ + for (slot = &srv_sys->sys_threads[2]; + slot->in_use; + ++slot) { + + ut_a(slot < &srv_sys->sys_threads[ + srv_sys->n_sys_threads]); + } + break; + + case SRV_NONE: + ut_error; + } + + ut_a(!slot->in_use); + + slot->in_use = TRUE; + slot->suspended = FALSE; + slot->type = type; + + ut_ad(srv_slot_get_type(slot) == type); + + ++srv_sys->n_threads_active[type]; + + srv_sys_mutex_exit(); + + return(slot); +} + +/*********************************************************************//** +Suspends the calling thread to wait for the event in its thread slot. +@return the current signal count of the event. */ +static +ib_int64_t +srv_suspend_thread_low( +/*===================*/ + srv_slot_t* slot) /*!< in/out: thread slot */ +{ + + ut_ad(!srv_read_only_mode); + ut_ad(srv_sys_mutex_own()); + + ut_ad(slot->in_use); + + srv_thread_type type = srv_slot_get_type(slot); + + switch (type) { + case SRV_NONE: + ut_error; + + case SRV_MASTER: + /* We have only one master thread and it + should be the first entry always. */ + ut_a(srv_sys->n_threads_active[type] == 1); + break; + + case SRV_PURGE: + /* We have only one purge coordinator thread + and it should be the second entry always. */ + ut_a(srv_sys->n_threads_active[type] == 1); + break; + + case SRV_WORKER: + ut_a(srv_n_purge_threads > 1); + ut_a(srv_sys->n_threads_active[type] > 0); + break; + } + + ut_a(!slot->suspended); + slot->suspended = TRUE; + + ut_a(srv_sys->n_threads_active[type] > 0); + + srv_sys->n_threads_active[type]--; + + return(os_event_reset(slot->event)); +} + +/*********************************************************************//** +Suspends the calling thread to wait for the event in its thread slot. +@return the current signal count of the event. */ +static +ib_int64_t +srv_suspend_thread( +/*===============*/ + srv_slot_t* slot) /*!< in/out: thread slot */ +{ + srv_sys_mutex_enter(); + + ib_int64_t sig_count = srv_suspend_thread_low(slot); + + srv_sys_mutex_exit(); + + return(sig_count); +} + +/*********************************************************************//** +Releases threads of the type given from suspension in the thread table. +NOTE! The server mutex has to be reserved by the caller! +@return number of threads released: this may be less than n if not + enough threads were suspended at the moment. */ +UNIV_INTERN +ulint +srv_release_threads( +/*================*/ + srv_thread_type type, /*!< in: thread type */ + ulint n) /*!< in: number of threads to release */ +{ + ulint i; + ulint count = 0; + + ut_ad(srv_thread_type_validate(type)); + ut_ad(n > 0); + + srv_sys_mutex_enter(); + + for (i = 0; i < srv_sys->n_sys_threads; i++) { + srv_slot_t* slot; + + slot = &srv_sys->sys_threads[i]; + + if (slot->in_use + && srv_slot_get_type(slot) == type + && slot->suspended) { + + switch (type) { + case SRV_NONE: + ut_error; + + case SRV_MASTER: + /* We have only one master thread and it + should be the first entry always. */ + ut_a(n == 1); + ut_a(i == SRV_MASTER_SLOT); + ut_a(srv_sys->n_threads_active[type] == 0); + break; + + case SRV_PURGE: + /* We have only one purge coordinator thread + and it should be the second entry always. */ + ut_a(n == 1); + ut_a(i == SRV_PURGE_SLOT); + ut_a(srv_n_purge_threads > 0); + ut_a(srv_sys->n_threads_active[type] == 0); + break; + + case SRV_WORKER: + ut_a(srv_n_purge_threads > 1); + ut_a(srv_sys->n_threads_active[type] + < srv_n_purge_threads - 1); + break; + } + + slot->suspended = FALSE; + + ++srv_sys->n_threads_active[type]; + + os_event_set(slot->event); + + if (++count == n) { + break; + } + } + } + + srv_sys_mutex_exit(); + + return(count); +} + +/*********************************************************************//** +Release a thread's slot. */ +static +void +srv_free_slot( +/*==========*/ + srv_slot_t* slot) /*!< in/out: thread slot */ +{ + srv_sys_mutex_enter(); + + if (!slot->suspended) { + /* Mark the thread as inactive. */ + srv_suspend_thread_low(slot); + } + + /* Free the slot for reuse. */ + ut_ad(slot->in_use); + slot->in_use = FALSE; + + srv_sys_mutex_exit(); +} + +/*********************************************************************//** +Initializes the server. */ +UNIV_INTERN +void +srv_init(void) +/*==========*/ +{ + ulint n_sys_threads = 0; + ulint srv_sys_sz = sizeof(*srv_sys); + +#ifndef HAVE_ATOMIC_BUILTINS + mutex_create(server_mutex_key, &server_mutex, SYNC_ANY_LATCH); +#endif /* !HAVE_ATOMIC_BUILTINS */ + + mutex_create(srv_innodb_monitor_mutex_key, + &srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK); + + if (!srv_read_only_mode) { + + /* Number of purge threads + master thread */ + n_sys_threads = srv_n_purge_threads + 1; + + srv_sys_sz += n_sys_threads * sizeof(*srv_sys->sys_threads); + } + + srv_sys = static_cast<srv_sys_t*>(mem_zalloc(srv_sys_sz)); + + srv_sys->n_sys_threads = n_sys_threads; + + if (!srv_read_only_mode) { + + mutex_create(srv_sys_mutex_key, &srv_sys->mutex, SYNC_THREADS); + + mutex_create(srv_sys_tasks_mutex_key, + &srv_sys->tasks_mutex, SYNC_ANY_LATCH); + + srv_sys->sys_threads = (srv_slot_t*) &srv_sys[1]; + + for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) { + srv_slot_t* slot = &srv_sys->sys_threads[i]; + + slot->event = os_event_create(); + + ut_a(slot->event); + } + + srv_error_event = os_event_create(); + + srv_monitor_event = os_event_create(); + + srv_buf_dump_event = os_event_create(); + + srv_checkpoint_completed_event = os_event_create(); + + if (srv_track_changed_pages) { + srv_redo_log_tracked_event = os_event_create(); + os_event_set(srv_redo_log_tracked_event); + } + + UT_LIST_INIT(srv_sys->tasks); + } + + /* page_zip_stat_per_index_mutex is acquired from: + 1. page_zip_compress() (after SYNC_FSP) + 2. page_zip_decompress() + 3. i_s_cmp_per_index_fill_low() (where SYNC_DICT is acquired) + 4. innodb_cmp_per_index_update(), no other latches + since we do not acquire any other latches while holding this mutex, + it can have very low level. We pick SYNC_ANY_LATCH for it. */ + + mutex_create( + page_zip_stat_per_index_mutex_key, + &page_zip_stat_per_index_mutex, SYNC_ANY_LATCH); + + /* Create dummy indexes for infimum and supremum records */ + + dict_ind_init(); + + srv_conc_init(); + + /* Initialize some INFORMATION SCHEMA internal structures */ + trx_i_s_cache_init(trx_i_s_cache); + + ut_crc32_init(); + + dict_mem_init(); +} + +/*********************************************************************//** +Frees the data structures created in srv_init(). */ +UNIV_INTERN +void +srv_free(void) +/*==========*/ +{ + srv_conc_free(); + + /* The mutexes srv_sys->mutex and srv_sys->tasks_mutex should have + been freed by sync_close() already. */ + mem_free(srv_sys); + srv_sys = NULL; + + trx_i_s_cache_free(trx_i_s_cache); + + if (!srv_read_only_mode) { + os_event_free(srv_buf_dump_event); + srv_buf_dump_event = NULL; + } +} + +/*********************************************************************//** +Initializes the synchronization primitives, memory system, and the thread +local storage. */ +UNIV_INTERN +void +srv_general_init(void) +/*==================*/ +{ + ut_mem_init(); + /* Reset the system variables in the recovery module. */ + recv_sys_var_init(); + os_sync_init(); + sync_init(); + mem_init(srv_mem_pool_size); + que_init(); + row_mysql_init(); +} + +/*********************************************************************//** +Normalizes init parameter values to use units we use inside InnoDB. */ +static +void +srv_normalize_init_values(void) +/*===========================*/ +{ + ulint n; + ulint i; + + n = srv_n_data_files; + + for (i = 0; i < n; i++) { + srv_data_file_sizes[i] = srv_data_file_sizes[i] + * ((1024 * 1024) / UNIV_PAGE_SIZE); + } + + srv_last_file_size_max = srv_last_file_size_max + * ((1024 * 1024) / UNIV_PAGE_SIZE); + + srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE; + + srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE; + + srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE); +} + +/*********************************************************************//** +Boots the InnoDB server. */ +UNIV_INTERN +void +srv_boot(void) +/*==========*/ +{ + /* Transform the init parameter values given by MySQL to + use units we use inside InnoDB: */ + + srv_normalize_init_values(); + + /* Initialize synchronization primitives, memory management, and thread + local storage */ + + srv_general_init(); + + /* Initialize this module */ + + srv_init(); + srv_mon_create(); +} + +/******************************************************************//** +Refreshes the values used to calculate per-second averages. */ +static +void +srv_refresh_innodb_monitor_stats(void) +/*==================================*/ +{ + mutex_enter(&srv_innodb_monitor_mutex); + + srv_last_monitor_time = time(NULL); + + os_aio_refresh_stats(); + + btr_cur_n_sea_old = btr_cur_n_sea; + btr_cur_n_non_sea_old = btr_cur_n_non_sea; + + log_refresh_stats(); + + buf_refresh_io_stats_all(); + + srv_n_rows_inserted_old = srv_stats.n_rows_inserted; + srv_n_rows_updated_old = srv_stats.n_rows_updated; + srv_n_rows_deleted_old = srv_stats.n_rows_deleted; + srv_n_rows_read_old = srv_stats.n_rows_read; + + mutex_exit(&srv_innodb_monitor_mutex); +} + +/******************************************************************//** +Outputs to a file the output of the InnoDB Monitor. +@return FALSE if not all information printed +due to failure to obtain necessary mutex */ +UNIV_INTERN +ibool +srv_printf_innodb_monitor( +/*======================*/ + FILE* file, /*!< in: output stream */ + ibool nowait, /*!< in: whether to wait for the + lock_sys_t:: mutex */ + ulint* trx_start_pos, /*!< out: file position of the start of + the list of active transactions */ + ulint* trx_end) /*!< out: file position of the end of + the list of active transactions */ +{ + double time_elapsed; + time_t current_time; + ulint n_reserved; + ibool ret; + + ulong btr_search_sys_constant; + ulong btr_search_sys_variable; + ulint lock_sys_subtotal; + ulint recv_sys_subtotal; + + ulint i; + trx_t* trx; + + mutex_enter(&srv_innodb_monitor_mutex); + + current_time = time(NULL); + + /* We add 0.001 seconds to time_elapsed to prevent division + by zero if two users happen to call SHOW ENGINE INNODB STATUS at the + same time */ + + time_elapsed = difftime(current_time, srv_last_monitor_time) + + 0.001; + + srv_last_monitor_time = time(NULL); + + fputs("\n=====================================\n", file); + + ut_print_timestamp(file); + fprintf(file, + " INNODB MONITOR OUTPUT\n" + "=====================================\n" + "Per second averages calculated from the last %lu seconds\n", + (ulong) time_elapsed); + + fputs("-----------------\n" + "BACKGROUND THREAD\n" + "-----------------\n", file); + srv_print_master_thread_info(file); + + fputs("----------\n" + "SEMAPHORES\n" + "----------\n", file); + sync_print(file); + + /* Conceptually, srv_innodb_monitor_mutex has a very high latching + order level in sync0sync.h, while dict_foreign_err_mutex has a very + low level 135. Therefore we can reserve the latter mutex here without + a danger of a deadlock of threads. */ + + mutex_enter(&dict_foreign_err_mutex); + + if (!srv_read_only_mode && ftell(dict_foreign_err_file) != 0L) { + fputs("------------------------\n" + "LATEST FOREIGN KEY ERROR\n" + "------------------------\n", file); + ut_copy_file(file, dict_foreign_err_file); + } + + mutex_exit(&dict_foreign_err_mutex); + + /* Only if lock_print_info_summary proceeds correctly, + before we call the lock_print_info_all_transactions + to print all the lock information. IMPORTANT NOTE: This + function acquires the lock mutex on success. */ + ret = lock_print_info_summary(file, nowait); + + if (ret) { + if (trx_start_pos) { + long t = ftell(file); + if (t < 0) { + *trx_start_pos = ULINT_UNDEFINED; + } else { + *trx_start_pos = (ulint) t; + } + } + + /* NOTE: If we get here then we have the lock mutex. This + function will release the lock mutex that we acquired when + we called the lock_print_info_summary() function earlier. */ + + lock_print_info_all_transactions(file); + + if (trx_end) { + long t = ftell(file); + if (t < 0) { + *trx_end = ULINT_UNDEFINED; + } else { + *trx_end = (ulint) t; + } + } + } + + fputs("--------\n" + "FILE I/O\n" + "--------\n", file); + os_aio_print(file); + + fputs("-------------------------------------\n" + "INSERT BUFFER AND ADAPTIVE HASH INDEX\n" + "-------------------------------------\n", file); + ibuf_print(file); + + + fprintf(file, + "%.2f hash searches/s, %.2f non-hash searches/s\n", + (btr_cur_n_sea - btr_cur_n_sea_old) + / time_elapsed, + (btr_cur_n_non_sea - btr_cur_n_non_sea_old) + / time_elapsed); + btr_cur_n_sea_old = btr_cur_n_sea; + btr_cur_n_non_sea_old = btr_cur_n_non_sea; + + fputs("---\n" + "LOG\n" + "---\n", file); + log_print(file); + + fputs("----------------------\n" + "BUFFER POOL AND MEMORY\n" + "----------------------\n", file); + fprintf(file, + "Total memory allocated " ULINTPF + "; in additional pool allocated " ULINTPF "\n", + ut_total_allocated_memory, + mem_pool_get_reserved(mem_comm_pool)); + + fprintf(file, + "Total memory allocated by read views " ULINTPF "\n", + os_atomic_increment_lint(&srv_read_views_memory, 0)); + + /* Calculate AHI constant and variable memory allocations */ + + btr_search_sys_constant = 0; + btr_search_sys_variable = 0; + + ut_ad(btr_search_sys->hash_tables); + + for (i = 0; i < btr_search_index_num; i++) { + hash_table_t* ht = btr_search_sys->hash_tables[i]; + + ut_ad(ht); + ut_ad(ht->heap); + + /* Multiple mutexes/heaps are currently never used for adaptive + hash index tables. */ + ut_ad(!ht->n_sync_obj); + ut_ad(!ht->heaps); + + btr_search_sys_variable += mem_heap_get_size(ht->heap); + btr_search_sys_constant += ht->n_cells * sizeof(hash_cell_t); + } + + lock_sys_subtotal = 0; + if (trx_sys) { + mutex_enter(&trx_sys->mutex); + trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list); + while (trx) { + lock_sys_subtotal + += ((trx->lock.lock_heap) + ? mem_heap_get_size(trx->lock.lock_heap) + : 0); + trx = UT_LIST_GET_NEXT(mysql_trx_list, trx); + } + mutex_exit(&trx_sys->mutex); + } + + recv_sys_subtotal = ((recv_sys && recv_sys->addr_hash) + ? mem_heap_get_size(recv_sys->heap) : 0); + + fprintf(file, + "Internal hash tables (constant factor + variable factor)\n" + " Adaptive hash index %lu \t(%lu + " ULINTPF ")\n" + " Page hash %lu (buffer pool 0 only)\n" + " Dictionary cache %lu \t(%lu + " ULINTPF ")\n" + " File system %lu \t(%lu + " ULINTPF ")\n" + " Lock system %lu \t(%lu + " ULINTPF ")\n" + " Recovery system %lu \t(%lu + " ULINTPF ")\n", + + btr_search_sys_constant + btr_search_sys_variable, + btr_search_sys_constant, + btr_search_sys_variable, + + (ulong) (buf_pool_from_array(0)->page_hash->n_cells * sizeof(hash_cell_t)), + + (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells + + dict_sys->table_id_hash->n_cells + ) * sizeof(hash_cell_t) + + dict_sys->size) : 0), + (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells + + dict_sys->table_id_hash->n_cells + ) * sizeof(hash_cell_t)) : 0), + dict_sys ? (dict_sys->size) : 0, + + (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t) + + fil_system_hash_nodes()), + (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)), + fil_system_hash_nodes(), + + (ulong) ((lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0) + + lock_sys_subtotal), + (ulong) (lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0), + lock_sys_subtotal, + + (ulong) (((recv_sys && recv_sys->addr_hash) + ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0) + + recv_sys_subtotal), + (ulong) ((recv_sys && recv_sys->addr_hash) + ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0), + recv_sys_subtotal); + + fprintf(file, "Dictionary memory allocated " ULINTPF "\n", + dict_sys->size); + + buf_print_io(file); + + fputs("--------------\n" + "ROW OPERATIONS\n" + "--------------\n", file); + fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n", + (long) srv_conc_get_active_threads(), + srv_conc_get_waiting_threads()); + + mutex_enter(&trx_sys->mutex); + + fprintf(file, "%lu read views open inside InnoDB\n", + UT_LIST_GET_LEN(trx_sys->view_list)); + + fprintf(file, "%lu RW transactions active inside InnoDB\n", + UT_LIST_GET_LEN(trx_sys->rw_trx_list)); + + fprintf(file, "%lu RO transactions active inside InnoDB\n", + UT_LIST_GET_LEN(trx_sys->ro_trx_list)); + + fprintf(file, "%lu out of %lu descriptors used\n", + trx_sys->descr_n_used, trx_sys->descr_n_max); + + if (UT_LIST_GET_LEN(trx_sys->view_list)) { + read_view_t* view = UT_LIST_GET_LAST(trx_sys->view_list); + + if (view) { + fprintf(file, "---OLDEST VIEW---\n"); + read_view_print(file, view); + fprintf(file, "-----------------\n"); + } + } + + mutex_exit(&trx_sys->mutex); + + n_reserved = fil_space_get_n_reserved_extents(0); + if (n_reserved > 0) { + fprintf(file, + "%lu tablespace extents now reserved for" + " B-tree split operations\n", + (ulong) n_reserved); + } + +#ifdef UNIV_LINUX + fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n", + (ulong) srv_main_thread_process_no, + (ulong) srv_main_thread_id, + srv_main_thread_op_info); +#else + fprintf(file, "Main thread id %lu, state: %s\n", + (ulong) srv_main_thread_id, + srv_main_thread_op_info); +#endif + fprintf(file, + "Number of rows inserted " ULINTPF + ", updated " ULINTPF ", deleted " ULINTPF + ", read " ULINTPF "\n", + (ulint) srv_stats.n_rows_inserted, + (ulint) srv_stats.n_rows_updated, + (ulint) srv_stats.n_rows_deleted, + (ulint) srv_stats.n_rows_read); + fprintf(file, + "%.2f inserts/s, %.2f updates/s," + " %.2f deletes/s, %.2f reads/s\n", + ((ulint) srv_stats.n_rows_inserted - srv_n_rows_inserted_old) + / time_elapsed, + ((ulint) srv_stats.n_rows_updated - srv_n_rows_updated_old) + / time_elapsed, + ((ulint) srv_stats.n_rows_deleted - srv_n_rows_deleted_old) + / time_elapsed, + ((ulint) srv_stats.n_rows_read - srv_n_rows_read_old) + / time_elapsed); + + srv_n_rows_inserted_old = srv_stats.n_rows_inserted; + srv_n_rows_updated_old = srv_stats.n_rows_updated; + srv_n_rows_deleted_old = srv_stats.n_rows_deleted; + srv_n_rows_read_old = srv_stats.n_rows_read; + + fputs("----------------------------\n" + "END OF INNODB MONITOR OUTPUT\n" + "============================\n", file); + mutex_exit(&srv_innodb_monitor_mutex); + fflush(file); + + return(ret); +} + +/******************************************************************//** +Function to pass InnoDB status variables to MySQL */ +UNIV_INTERN +void +srv_export_innodb_status(void) +/*==========================*/ +{ + buf_pool_stat_t stat; + buf_pools_list_size_t buf_pools_list_size; + ulint LRU_len; + ulint free_len; + ulint flush_list_len; + ulint mem_adaptive_hash, mem_dictionary; + read_view_t* oldest_view; + ulint i; + + buf_get_total_stat(&stat); + buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len); + buf_get_total_list_size_in_bytes(&buf_pools_list_size); + + mem_adaptive_hash = 0; + + ut_ad(btr_search_sys->hash_tables); + + for (i = 0; i < btr_search_index_num; i++) { + hash_table_t* ht = btr_search_sys->hash_tables[i]; + + ut_ad(ht); + ut_ad(ht->heap); + /* Multiple mutexes/heaps are currently never used for adaptive + hash index tables. */ + ut_ad(!ht->n_sync_obj); + ut_ad(!ht->heaps); + + mem_adaptive_hash += mem_heap_get_size(ht->heap); + mem_adaptive_hash += ht->n_cells * sizeof(hash_cell_t); + } + + mem_dictionary = (dict_sys ? ((dict_sys->table_hash->n_cells + + dict_sys->table_id_hash->n_cells + ) * sizeof(hash_cell_t) + + dict_sys->size) : 0); + + mutex_enter(&srv_innodb_monitor_mutex); + + export_vars.innodb_data_pending_reads = + os_n_pending_reads; + + export_vars.innodb_data_pending_writes = + os_n_pending_writes; + + export_vars.innodb_data_pending_fsyncs = + fil_n_pending_log_flushes + + fil_n_pending_tablespace_flushes; + export_vars.innodb_adaptive_hash_hash_searches + = btr_cur_n_sea; + export_vars.innodb_adaptive_hash_non_hash_searches + = btr_cur_n_non_sea; + export_vars.innodb_background_log_sync + = srv_log_writes_and_flush; + + export_vars.innodb_data_fsyncs = os_n_fsyncs; + + export_vars.innodb_data_read = srv_stats.data_read; + + export_vars.innodb_data_reads = os_n_file_reads; + + export_vars.innodb_data_writes = os_n_file_writes; + + export_vars.innodb_data_written = srv_stats.data_written; + + export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets; + + export_vars.innodb_buffer_pool_write_requests = + srv_stats.buf_pool_write_requests; + + export_vars.innodb_buffer_pool_wait_free = + srv_stats.buf_pool_wait_free; + + export_vars.innodb_buffer_pool_pages_flushed = + srv_stats.buf_pool_flushed; + + export_vars.innodb_buffer_pool_reads = srv_stats.buf_pool_reads; + + export_vars.innodb_buffer_pool_read_ahead_rnd = + stat.n_ra_pages_read_rnd; + + export_vars.innodb_buffer_pool_read_ahead = + stat.n_ra_pages_read; + + export_vars.innodb_buffer_pool_read_ahead_evicted = + stat.n_ra_pages_evicted; + + export_vars.innodb_buffer_pool_pages_LRU_flushed = + stat.buf_lru_flush_page_count; + + export_vars.innodb_buffer_pool_pages_data = LRU_len; + + export_vars.innodb_buffer_pool_bytes_data = + buf_pools_list_size.LRU_bytes + + buf_pools_list_size.unzip_LRU_bytes; + + export_vars.innodb_buffer_pool_pages_dirty = flush_list_len; + + export_vars.innodb_buffer_pool_bytes_dirty = + buf_pools_list_size.flush_list_bytes; + + export_vars.innodb_buffer_pool_pages_free = free_len; + + export_vars.innodb_deadlocks = srv_stats.lock_deadlock_count; + +#ifdef UNIV_DEBUG + export_vars.innodb_buffer_pool_pages_latched = + buf_get_latched_pages_number(); +#endif /* UNIV_DEBUG */ + export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages(); + + export_vars.innodb_buffer_pool_pages_misc = + buf_pool_get_n_pages() - LRU_len - free_len; + + export_vars.innodb_buffer_pool_pages_made_young + = stat.n_pages_made_young; + export_vars.innodb_buffer_pool_pages_made_not_young + = stat.n_pages_not_made_young; + export_vars.innodb_buffer_pool_pages_old = 0; + for (i = 0; i < srv_buf_pool_instances; i++) { + buf_pool_t* buf_pool = buf_pool_from_array(i); + export_vars.innodb_buffer_pool_pages_old + += buf_pool->LRU_old_len; + } + export_vars.innodb_checkpoint_age + = (log_sys->lsn - log_sys->last_checkpoint_lsn); + export_vars.innodb_checkpoint_max_age + = log_sys->max_checkpoint_age; + export_vars.innodb_history_list_length + = trx_sys->rseg_history_len; + ibuf_export_ibuf_status( + &export_vars.innodb_ibuf_size, + &export_vars.innodb_ibuf_free_list, + &export_vars.innodb_ibuf_segment_size, + &export_vars.innodb_ibuf_merges, + &export_vars.innodb_ibuf_merged_inserts, + &export_vars.innodb_ibuf_merged_delete_marks, + &export_vars.innodb_ibuf_merged_deletes, + &export_vars.innodb_ibuf_discarded_inserts, + &export_vars.innodb_ibuf_discarded_delete_marks, + &export_vars.innodb_ibuf_discarded_deletes); + export_vars.innodb_lsn_current + = log_sys->lsn; + export_vars.innodb_lsn_flushed + = log_sys->flushed_to_disk_lsn; + export_vars.innodb_lsn_last_checkpoint + = log_sys->last_checkpoint_lsn; + export_vars.innodb_master_thread_active_loops + = srv_main_active_loops; + export_vars.innodb_master_thread_idle_loops + = srv_main_idle_loops; + export_vars.innodb_max_trx_id + = trx_sys->max_trx_id; + export_vars.innodb_mem_adaptive_hash + = mem_adaptive_hash; + export_vars.innodb_mem_dictionary + = mem_dictionary; + export_vars.innodb_mem_total + = ut_total_allocated_memory; + export_vars.innodb_mutex_os_waits + = mutex_os_wait_count; + export_vars.innodb_mutex_spin_rounds + = mutex_spin_round_count; + export_vars.innodb_mutex_spin_waits + = mutex_spin_wait_count; + export_vars.innodb_s_lock_os_waits + = rw_lock_stats.rw_s_os_wait_count; + export_vars.innodb_s_lock_spin_rounds + = rw_lock_stats.rw_s_spin_round_count; + export_vars.innodb_s_lock_spin_waits + = rw_lock_stats.rw_s_spin_wait_count; + export_vars.innodb_x_lock_os_waits + = rw_lock_stats.rw_x_os_wait_count; + export_vars.innodb_x_lock_spin_rounds + = rw_lock_stats.rw_x_spin_round_count; + export_vars.innodb_x_lock_spin_waits + = rw_lock_stats.rw_x_spin_wait_count; + + oldest_view = UT_LIST_GET_LAST(trx_sys->view_list); + export_vars.innodb_oldest_view_low_limit_trx_id + = oldest_view ? oldest_view->low_limit_id : 0; + + export_vars.innodb_purge_trx_id = purge_sys->limit.trx_no; + export_vars.innodb_purge_undo_no = purge_sys->limit.undo_no; + export_vars.innodb_current_row_locks + = lock_sys->rec_num; + +#ifdef HAVE_ATOMIC_BUILTINS + export_vars.innodb_have_atomic_builtins = 1; +#else + export_vars.innodb_have_atomic_builtins = 0; +#endif + export_vars.innodb_page_size = UNIV_PAGE_SIZE; + + export_vars.innodb_log_waits = srv_stats.log_waits; + + export_vars.innodb_os_log_written = srv_stats.os_log_written; + + export_vars.innodb_os_log_fsyncs = fil_n_log_flushes; + + export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes; + + export_vars.innodb_os_log_pending_writes = + srv_stats.os_log_pending_writes; + + export_vars.innodb_log_write_requests = srv_stats.log_write_requests; + + export_vars.innodb_log_writes = srv_stats.log_writes; + + export_vars.innodb_dblwr_pages_written = + srv_stats.dblwr_pages_written; + + export_vars.innodb_dblwr_writes = srv_stats.dblwr_writes; + + export_vars.innodb_pages_created = stat.n_pages_created; + + export_vars.innodb_pages_read = stat.n_pages_read; + + export_vars.innodb_pages_written = stat.n_pages_written; + + export_vars.innodb_row_lock_waits = srv_stats.n_lock_wait_count; + + export_vars.innodb_row_lock_current_waits = + srv_stats.n_lock_wait_current_count; + + export_vars.innodb_row_lock_time = srv_stats.n_lock_wait_time / 1000; + + if (srv_stats.n_lock_wait_count > 0) { + + export_vars.innodb_row_lock_time_avg = (ulint) + (srv_stats.n_lock_wait_time + / 1000 / srv_stats.n_lock_wait_count); + + } else { + export_vars.innodb_row_lock_time_avg = 0; + } + + export_vars.innodb_row_lock_time_max = + lock_sys->n_lock_max_wait_time / 1000; + + export_vars.innodb_rows_read = srv_stats.n_rows_read; + + export_vars.innodb_rows_inserted = srv_stats.n_rows_inserted; + + export_vars.innodb_rows_updated = srv_stats.n_rows_updated; + + export_vars.innodb_rows_deleted = srv_stats.n_rows_deleted; + + export_vars.innodb_num_open_files = fil_n_file_opened; + + export_vars.innodb_truncated_status_writes = + srv_truncated_status_writes; + + export_vars.innodb_available_undo_logs = srv_available_undo_logs; + export_vars.innodb_read_views_memory + = os_atomic_increment_lint(&srv_read_views_memory, 0); + export_vars.innodb_descriptors_memory + = os_atomic_increment_lint(&srv_descriptors_memory, 0); + +#ifdef UNIV_DEBUG + rw_lock_s_lock(&purge_sys->latch); + trx_id_t done_trx_no = purge_sys->done.trx_no; + trx_id_t up_limit_id = purge_sys->view + ? purge_sys->view->up_limit_id + : 0; + rw_lock_s_unlock(&purge_sys->latch); + + mutex_enter(&trx_sys->mutex); + trx_id_t max_trx_id = trx_sys->rw_max_trx_id; + mutex_exit(&trx_sys->mutex); + + if (!done_trx_no || max_trx_id < done_trx_no - 1) { + export_vars.innodb_purge_trx_id_age = 0; + } else { + export_vars.innodb_purge_trx_id_age = + (ulint) (max_trx_id - done_trx_no + 1); + } + + if (!up_limit_id + || max_trx_id < up_limit_id) { + export_vars.innodb_purge_view_trx_id_age = 0; + } else { + export_vars.innodb_purge_view_trx_id_age = + (ulint) (max_trx_id - up_limit_id); + } +#endif /* UNIV_DEBUG */ + + mutex_exit(&srv_innodb_monitor_mutex); +} + +/*********************************************************************//** +A thread which prints the info output by various InnoDB monitors. +@return a dummy parameter */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(srv_monitor_thread)( +/*===============================*/ + void* arg __attribute__((unused))) + /*!< in: a dummy parameter required by + os_thread_create */ +{ + ib_int64_t sig_count; + double time_elapsed; + time_t current_time; + time_t last_table_monitor_time; + time_t last_tablespace_monitor_time; + time_t last_monitor_time; + ulint mutex_skipped; + ibool last_srv_print_monitor; + + ut_ad(!srv_read_only_mode); + +#ifdef UNIV_DEBUG_THREAD_CREATION + fprintf(stderr, "Lock timeout thread starts, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +#endif /* UNIV_DEBUG_THREAD_CREATION */ + +#ifdef UNIV_PFS_THREAD + pfs_register_thread(srv_monitor_thread_key); +#endif /* UNIV_PFS_THREAD */ + srv_monitor_active = TRUE; + + UT_NOT_USED(arg); + srv_last_monitor_time = ut_time(); + last_table_monitor_time = ut_time(); + last_tablespace_monitor_time = ut_time(); + last_monitor_time = ut_time(); + mutex_skipped = 0; + last_srv_print_monitor = srv_print_innodb_monitor; +loop: + /* Wake up every 5 seconds to see if we need to print + monitor information or if signalled at shutdown. */ + + sig_count = os_event_reset(srv_monitor_event); + + os_event_wait_time_low(srv_monitor_event, 5000000, sig_count); + + current_time = ut_time(); + + time_elapsed = difftime(current_time, last_monitor_time); + + if (time_elapsed > 15) { + last_monitor_time = ut_time(); + + if (srv_print_innodb_monitor) { + /* Reset mutex_skipped counter everytime + srv_print_innodb_monitor changes. This is to + ensure we will not be blocked by lock_sys->mutex + for short duration information printing, + such as requested by sync_array_print_long_waits() */ + if (!last_srv_print_monitor) { + mutex_skipped = 0; + last_srv_print_monitor = TRUE; + } + + if (!srv_printf_innodb_monitor(stderr, + MUTEX_NOWAIT(mutex_skipped), + NULL, NULL)) { + mutex_skipped++; + } else { + /* Reset the counter */ + mutex_skipped = 0; + } + } else { + last_srv_print_monitor = FALSE; + } + + + /* We don't create the temp files or associated + mutexes in read-only-mode */ + + if (!srv_read_only_mode && srv_innodb_status) { + mutex_enter(&srv_monitor_file_mutex); + rewind(srv_monitor_file); + if (!srv_printf_innodb_monitor(srv_monitor_file, + MUTEX_NOWAIT(mutex_skipped), + NULL, NULL)) { + mutex_skipped++; + } else { + mutex_skipped = 0; + } + + os_file_set_eof(srv_monitor_file); + mutex_exit(&srv_monitor_file_mutex); + } + + if (srv_print_innodb_tablespace_monitor + && difftime(current_time, + last_tablespace_monitor_time) > 60) { + last_tablespace_monitor_time = ut_time(); + + fputs("========================" + "========================\n", + stderr); + + ut_print_timestamp(stderr); + + fputs(" INNODB TABLESPACE MONITOR OUTPUT\n" + "========================" + "========================\n", + stderr); + + fsp_print(0); + fputs("Validating tablespace\n", stderr); + fsp_validate(0); + fputs("Validation ok\n" + "---------------------------------------\n" + "END OF INNODB TABLESPACE MONITOR OUTPUT\n" + "=======================================\n", + stderr); + } + + if (srv_print_innodb_table_monitor + && difftime(current_time, last_table_monitor_time) > 60) { + + last_table_monitor_time = ut_time(); + + fprintf(stderr, "Warning: %s\n", + DEPRECATED_MSG_INNODB_TABLE_MONITOR); + + fputs("===========================================\n", + stderr); + + ut_print_timestamp(stderr); + + fputs(" INNODB TABLE MONITOR OUTPUT\n" + "===========================================\n", + stderr); + dict_print(); + + fputs("-----------------------------------\n" + "END OF INNODB TABLE MONITOR OUTPUT\n" + "==================================\n", + stderr); + + fprintf(stderr, "Warning: %s\n", + DEPRECATED_MSG_INNODB_TABLE_MONITOR); + } + } + + if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) { + goto exit_func; + } + + if (srv_print_innodb_monitor + || srv_print_innodb_lock_monitor + || srv_print_innodb_tablespace_monitor + || srv_print_innodb_table_monitor) { + goto loop; + } + + goto loop; + +exit_func: + srv_monitor_active = FALSE; + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; +} + +/*********************************************************************//** +A thread which prints warnings about semaphore waits which have lasted +too long. These can be used to track bugs which cause hangs. +@return a dummy parameter */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(srv_error_monitor_thread)( +/*=====================================*/ + void* arg __attribute__((unused))) + /*!< in: a dummy parameter required by + os_thread_create */ +{ + /* number of successive fatal timeouts observed */ + ulint fatal_cnt = 0; + lsn_t old_lsn; + lsn_t new_lsn; + ib_int64_t sig_count; + /* longest waiting thread for a semaphore */ + os_thread_id_t waiter = os_thread_get_curr_id(); + os_thread_id_t old_waiter = waiter; + /* the semaphore that is being waited for */ + const void* sema = NULL; + const void* old_sema = NULL; + + ut_ad(!srv_read_only_mode); + + old_lsn = srv_start_lsn; + +#ifdef UNIV_DEBUG_THREAD_CREATION + fprintf(stderr, "Error monitor thread starts, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +#endif /* UNIV_DEBUG_THREAD_CREATION */ + +#ifdef UNIV_PFS_THREAD + pfs_register_thread(srv_error_monitor_thread_key); +#endif /* UNIV_PFS_THREAD */ + srv_error_monitor_active = TRUE; + +loop: + /* Try to track a strange bug reported by Harald Fuchs and others, + where the lsn seems to decrease at times */ + + new_lsn = log_get_lsn(); + + if (new_lsn < old_lsn) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: old log sequence number " LSN_PF + " was greater\n" + "InnoDB: than the new log sequence number " LSN_PF "!\n" + "InnoDB: Please submit a bug report" + " to http://bugs.mysql.com\n", + old_lsn, new_lsn); + ut_ad(0); + } + + old_lsn = new_lsn; + + if (difftime(time(NULL), srv_last_monitor_time) > 60) { + /* We referesh InnoDB Monitor values so that averages are + printed from at most 60 last seconds */ + + srv_refresh_innodb_monitor_stats(); + } + + /* Update the statistics collected for deciding LRU + eviction policy. */ + buf_LRU_stat_update(); + + /* In case mutex_exit is not a memory barrier, it is + theoretically possible some threads are left waiting though + the semaphore is already released. Wake up those threads: */ + + sync_arr_wake_threads_if_sema_free(); + + if (sync_array_print_long_waits(&waiter, &sema) + && sema == old_sema && os_thread_eq(waiter, old_waiter)) { + fatal_cnt++; + if (fatal_cnt > 10) { + + fprintf(stderr, + "InnoDB: Error: semaphore wait has lasted" + " > %lu seconds\n" + "InnoDB: We intentionally crash the server," + " because it appears to be hung.\n", + (ulong) srv_fatal_semaphore_wait_threshold); + + ut_error; + } + } else { + fatal_cnt = 0; + old_waiter = waiter; + old_sema = sema; + } + + if (srv_kill_idle_transaction && trx_sys) { + trx_t* trx; + time_t now; +rescan_idle: + now = time(NULL); + mutex_enter(&trx_sys->mutex); + trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list); + while (trx) { + if (!trx_state_eq(trx, TRX_STATE_NOT_STARTED) + && trx_state_eq(trx, TRX_STATE_ACTIVE) + && trx->mysql_thd + && innobase_thd_is_idle(trx->mysql_thd)) { + ib_int64_t start_time = innobase_thd_get_start_time(trx->mysql_thd); + ulong thd_id = innobase_thd_get_thread_id(trx->mysql_thd); + + if (trx->last_stmt_start != start_time) { + trx->idle_start = now; + trx->last_stmt_start = start_time; + } else if (difftime(now, trx->idle_start) + > srv_kill_idle_transaction) { + /* kill the session */ + mutex_exit(&trx_sys->mutex); + innobase_thd_kill(thd_id); + goto rescan_idle; + } + } + trx = UT_LIST_GET_NEXT(mysql_trx_list, trx); + } + mutex_exit(&trx_sys->mutex); + } + + /* Flush stderr so that a database user gets the output + to possible MySQL error file */ + + fflush(stderr); + + sig_count = os_event_reset(srv_error_event); + + os_event_wait_time_low(srv_error_event, 1000000, sig_count); + + if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) { + + goto loop; + } + + srv_error_monitor_active = FALSE; + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; +} + +/******************************************************************//** +Increment the server activity count. */ +UNIV_INTERN +void +srv_inc_activity_count(void) +/*========================*/ +{ + srv_sys->activity_count.inc(); +} + +/**********************************************************************//** +Check whether any background thread is active. If so return the thread +type. +@return SRV_NONE if all are suspended or have exited, thread +type if any are still active. */ +UNIV_INTERN +srv_thread_type +srv_get_active_thread_type(void) +/*============================*/ +{ + srv_thread_type ret = SRV_NONE; + + if (srv_read_only_mode) { + return(SRV_NONE); + } + + srv_sys_mutex_enter(); + + for (ulint i = SRV_WORKER; i <= SRV_MASTER; ++i) { + if (srv_sys->n_threads_active[i] != 0) { + ret = static_cast<srv_thread_type>(i); + break; + } + } + + srv_sys_mutex_exit(); + + /* Check only on shutdown. */ + if (ret == SRV_NONE + && srv_shutdown_state != SRV_SHUTDOWN_NONE + && trx_purge_state() != PURGE_STATE_DISABLED + && trx_purge_state() != PURGE_STATE_EXIT) { + + ret = SRV_PURGE; + } + + return(ret); +} + +/**********************************************************************//** +Check whether any background thread are active. If so print which thread +is active. Send the threads wakeup signal. +@return name of thread that is active or NULL */ +UNIV_INTERN +const char* +srv_any_background_threads_are_active(void) +/*=======================================*/ +{ + const char* thread_active = NULL; + + if (srv_read_only_mode) { + return(NULL); + } else if (srv_error_monitor_active) { + thread_active = "srv_error_monitor_thread"; + } else if (lock_sys->timeout_thread_active) { + thread_active = "srv_lock_timeout thread"; + } else if (srv_monitor_active) { + thread_active = "srv_monitor_thread"; + } else if (srv_buf_dump_thread_active) { + thread_active = "buf_dump_thread"; + } else if (srv_dict_stats_thread_active) { + thread_active = "dict_stats_thread"; + } + + os_event_set(srv_error_event); + os_event_set(srv_monitor_event); + os_event_set(srv_buf_dump_event); + os_event_set(lock_sys->timeout_event); + os_event_set(dict_stats_event); + + return(thread_active); +} + +/******************************************************************//** +A thread which follows the redo log and outputs the changed page bitmap. +@return a dummy value */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(srv_redo_log_follow_thread)( +/*=======================================*/ + void* arg __attribute__((unused))) /*!< in: a dummy parameter + required by + os_thread_create */ +{ + ut_ad(!srv_read_only_mode); + +#ifdef UNIV_DEBUG_THREAD_CREATION + fprintf(stderr, "Redo log follower thread starts, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +#endif + +#ifdef UNIV_PFS_THREAD + pfs_register_thread(srv_log_tracking_thread_key); +#endif + + my_thread_init(); + srv_redo_log_thread_started = true; + + do { + os_event_wait(srv_checkpoint_completed_event); + os_event_reset(srv_checkpoint_completed_event); + +#ifdef UNIV_DEBUG + if (!srv_track_changed_pages) { + continue; + } +#endif + + if (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) { + if (!log_online_follow_redo_log()) { + /* TODO: sync with I_S log tracking status? */ + ib_logf(IB_LOG_LEVEL_ERROR, + "log tracking bitmap write failed, " + "stopping log tracking thread!\n"); + break; + } + os_event_set(srv_redo_log_tracked_event); + } + + } while (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE); + + srv_track_changed_pages = FALSE; + log_online_read_shutdown(); + os_event_set(srv_redo_log_tracked_event); + srv_redo_log_thread_started = false; /* Defensive, not required */ + + my_thread_end(); + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; +} + +/*************************************************************//** +Removes old archived transaction log files. +Both parameters couldn't be provided at the same time */ +dberr_t +purge_archived_logs( + time_t before_date, /*!< in: all files modified + before timestamp should be removed */ + lsn_t before_no) /*!< in: files with this number in name + and earler should be removed */ +{ + log_group_t* group = UT_LIST_GET_FIRST(log_sys->log_groups); + + os_file_dir_t dir; + os_file_stat_t fileinfo; + char archived_log_filename[OS_FILE_MAX_PATH]; + char namegen[OS_FILE_MAX_PATH]; + ulint dirnamelen; + + if (srv_arch_dir) { + dir = os_file_opendir(srv_arch_dir, FALSE); + if (!dir) { + ib_logf(IB_LOG_LEVEL_WARN, + "opening archived log directory %s failed. " + "Purge archived logs are not available\n", + srv_arch_dir); + /* failed to open directory */ + return(DB_ERROR); + } + } else { + /* log archive directory is not specified */ + return(DB_ERROR); + } + + dirnamelen = strlen(srv_arch_dir); + + memcpy(archived_log_filename, srv_arch_dir, dirnamelen); + if (dirnamelen && + archived_log_filename[dirnamelen - 1] != SRV_PATH_SEPARATOR) { + archived_log_filename[dirnamelen++] = SRV_PATH_SEPARATOR; + } + + memset(&fileinfo, 0, sizeof(fileinfo)); + while(!os_file_readdir_next_file(srv_arch_dir, dir, + &fileinfo) ) { + if (strncmp(fileinfo.name, + IB_ARCHIVED_LOGS_PREFIX, IB_ARCHIVED_LOGS_PREFIX_LEN)) { + continue; + } + if (dirnamelen + strlen(fileinfo.name) + 2 > OS_FILE_MAX_PATH) + continue; + + snprintf(archived_log_filename + dirnamelen, OS_FILE_MAX_PATH, + "%s", fileinfo.name); + + if (before_no) { + ib_uint64_t log_file_no = strtoull(fileinfo.name + + IB_ARCHIVED_LOGS_PREFIX_LEN, + NULL, 10); + if (log_file_no == 0 || before_no <= log_file_no) { + continue; + } + } else { + fileinfo.mtime = 0; + if (os_file_get_status(archived_log_filename, + &fileinfo, false) != DB_SUCCESS || + fileinfo.mtime == 0) { + continue; + } + + if (before_date == 0 || fileinfo.mtime > before_date) { + continue; + } + } + + /* We are going to delete archived file. Acquire log_sys->mutex + to make sure that we are the only who try to delete file. This + also prevents log system from using this file. Do not delete + file if it is currently in progress of writting or have + pending IO. This is enforced by checking: + 1. fil_space_contains_node. + 2. group->archived_offset % group->file_size != 0, i.e. + there is archive in progress and we are going to delete it. + This covers 3 cases: + a. Usual case when we have one archive in progress, + both 1 and 2 are TRUE + b. When we have more then 1 archive in fil_space, + this can happen when flushed LSN range crosses file + boundary + c. When we have empty fil_space, but existing file will be + opened once archiving operation is requested. This usually + happens on startup. + */ + + mutex_enter(&log_sys->mutex); + + log_archived_file_name_gen(namegen, sizeof(namegen), + group->id, group->archived_file_no); + + if (fil_space_contains_node(group->archive_space_id, + archived_log_filename) || + (group->archived_offset % group->file_size != 0 && + strcmp(namegen, archived_log_filename) == 0)) { + + mutex_exit(&log_sys->mutex); + continue; + } + + if (!os_file_delete_if_exists(innodb_file_data_key, + archived_log_filename)) { + + ib_logf(IB_LOG_LEVEL_WARN, + "can't delete archived log file %s.\n", + archived_log_filename); + + mutex_exit(&log_sys->mutex); + os_file_closedir(dir); + + return(DB_ERROR); + } + + mutex_exit(&log_sys->mutex); + } + + os_file_closedir(dir); + + return(DB_SUCCESS); +} + +/*******************************************************************//** +Tells the InnoDB server that there has been activity in the database +and wakes up the master thread if it is suspended (not sleeping). Used +in the MySQL interface. Note that there is a small chance that the master +thread stays suspended (we do not protect our operation with the +srv_sys_t->mutex, for performance reasons). */ +UNIV_INTERN +void +srv_active_wake_master_thread(void) +/*===============================*/ +{ + if (srv_read_only_mode) { + return; + } + + ut_ad(!srv_sys_mutex_own()); + + srv_inc_activity_count(); + + if (srv_sys->n_threads_active[SRV_MASTER] == 0) { + srv_slot_t* slot; + + srv_sys_mutex_enter(); + + slot = &srv_sys->sys_threads[SRV_MASTER_SLOT]; + + /* Only if the master thread has been started. */ + + if (slot->in_use) { + ut_a(srv_slot_get_type(slot) == SRV_MASTER); + + if (slot->suspended) { + + slot->suspended = FALSE; + + ++srv_sys->n_threads_active[SRV_MASTER]; + + os_event_set(slot->event); + } + } + + srv_sys_mutex_exit(); + } +} + +/*******************************************************************//** +Tells the purge thread that there has been activity in the database +and wakes up the purge thread if it is suspended (not sleeping). Note +that there is a small chance that the purge thread stays suspended +(we do not protect our check with the srv_sys_t:mutex and the +purge_sys->latch, for performance reasons). */ +UNIV_INTERN +void +srv_wake_purge_thread_if_not_active(void) +/*=====================================*/ +{ + ut_ad(!srv_sys_mutex_own()); + + if (purge_sys->state == PURGE_STATE_RUN + && srv_sys->n_threads_active[SRV_PURGE] == 0) { + + srv_release_threads(SRV_PURGE, 1); + } +} + +/*******************************************************************//** +Wakes up the master thread if it is suspended or being suspended. */ +UNIV_INTERN +void +srv_wake_master_thread(void) +/*========================*/ +{ + ut_ad(!srv_sys_mutex_own()); + + srv_inc_activity_count(); + + srv_release_threads(SRV_MASTER, 1); +} + +/*******************************************************************//** +Get current server activity count. We don't hold srv_sys::mutex while +reading this value as it is only used in heuristics. +@return activity count. */ +UNIV_INTERN +ulint +srv_get_activity_count(void) +/*========================*/ +{ + return(srv_sys->activity_count); +} + +/*******************************************************************//** +Check if there has been any activity. +@return FALSE if no change in activity counter. */ +UNIV_INTERN +ibool +srv_check_activity( +/*===============*/ + ulint old_activity_count) /*!< in: old activity count */ +{ + return(srv_sys->activity_count != old_activity_count); +} + +/********************************************************************//** +The master thread is tasked to ensure that flush of log file happens +once every second in the background. This is to ensure that not more +than one second of trxs are lost in case of crash when +innodb_flush_logs_at_trx_commit != 1 */ +static +void +srv_sync_log_buffer_in_background(void) +/*===================================*/ +{ + time_t current_time = time(NULL); + + srv_main_thread_op_info = "flushing log"; + if (difftime(current_time, srv_last_log_flush_time) + >= srv_flush_log_at_timeout) { + log_buffer_sync_in_background(TRUE); + srv_last_log_flush_time = current_time; + srv_log_writes_and_flush++; + } +} + +/********************************************************************//** +Make room in the table cache by evicting an unused table. +@return number of tables evicted. */ +static +ulint +srv_master_evict_from_table_cache( +/*==============================*/ + ulint pct_check) /*!< in: max percent to check */ +{ + ulint n_tables_evicted = 0; + + rw_lock_x_lock(&dict_operation_lock); + + dict_mutex_enter_for_mysql(); + + n_tables_evicted = dict_make_room_in_cache( + innobase_get_table_cache_size(), pct_check); + + dict_mutex_exit_for_mysql(); + + rw_lock_x_unlock(&dict_operation_lock); + + return(n_tables_evicted); +} + +/*********************************************************************//** +This function prints progress message every 60 seconds during server +shutdown, for any activities that master thread is pending on. */ +static +void +srv_shutdown_print_master_pending( +/*==============================*/ + ib_time_t* last_print_time, /*!< last time the function + print the message */ + ulint n_tables_to_drop, /*!< number of tables to + be dropped */ + ulint n_bytes_merged) /*!< number of change buffer + just merged */ +{ + ib_time_t current_time; + double time_elapsed; + + current_time = ut_time(); + time_elapsed = ut_difftime(current_time, *last_print_time); + + if (time_elapsed > 60) { + *last_print_time = ut_time(); + + if (n_tables_to_drop) { + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Waiting for " + "%lu table(s) to be dropped\n", + (ulong) n_tables_to_drop); + } + + /* Check change buffer merge, we only wait for change buffer + merge if it is a slow shutdown */ + if (!srv_fast_shutdown && n_bytes_merged) { + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Waiting for change " + "buffer merge to complete\n" + " InnoDB: number of bytes of change buffer " + "just merged: %lu\n", + n_bytes_merged); + } + } +} + +/*********************************************************************//** +Perform the tasks that the master thread is supposed to do when the +server is active. There are two types of tasks. The first category is +of such tasks which are performed at each inovcation of this function. +We assume that this function is called roughly every second when the +server is active. The second category is of such tasks which are +performed at some interval e.g.: purge, dict_LRU cleanup etc. */ +static +void +srv_master_do_active_tasks(void) +/*============================*/ +{ + ib_time_t cur_time = ut_time(); + ullint counter_time = ut_time_us(NULL); + + /* First do the tasks that we are suppose to do at each + invocation of this function. */ + + ++srv_main_active_loops; + + MONITOR_INC(MONITOR_MASTER_ACTIVE_LOOPS); + + /* ALTER TABLE in MySQL requires on Unix that the table handler + can drop tables lazily after there no longer are SELECT + queries to them. */ + srv_main_thread_op_info = "doing background drop tables"; + row_drop_tables_for_mysql_in_background(); + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, counter_time); + + if (srv_shutdown_state > 0) { + return; + } + + /* make sure that there is enough reusable space in the redo + log files */ + srv_main_thread_op_info = "checking free log space"; + log_free_check(); + + /* Do an ibuf merge */ + srv_main_thread_op_info = "doing insert buffer merge"; + counter_time = ut_time_us(NULL); + ibuf_contract_in_background(0, FALSE); + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time); + + /* Flush logs if needed */ + srv_main_thread_op_info = "flushing log"; + srv_sync_log_buffer_in_background(); + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time); + + /* Now see if various tasks that are performed at defined + intervals need to be performed. */ + +#ifdef MEM_PERIODIC_CHECK + /* Check magic numbers of every allocated mem block once in + SRV_MASTER_MEM_VALIDATE_INTERVAL seconds */ + if (cur_time % SRV_MASTER_MEM_VALIDATE_INTERVAL == 0) { + mem_validate_all_blocks(); + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_MEM_VALIDATE_MICROSECOND, counter_time); + } +#endif + if (srv_shutdown_state > 0) { + return; + } + + if (srv_shutdown_state > 0) { + return; + } + + if (cur_time % SRV_MASTER_DICT_LRU_INTERVAL == 0) { + srv_main_thread_op_info = "enforcing dict cache limit"; + srv_master_evict_from_table_cache(50); + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time); + } + + if (srv_shutdown_state > 0) { + return; + } + + /* Make a new checkpoint */ + if (cur_time % SRV_MASTER_CHECKPOINT_INTERVAL == 0) { + srv_main_thread_op_info = "making checkpoint"; + log_checkpoint(TRUE, FALSE); + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_CHECKPOINT_MICROSECOND, counter_time); + } +} + +/*********************************************************************//** +Perform the tasks that the master thread is supposed to do whenever the +server is idle. We do check for the server state during this function +and if the server has entered the shutdown phase we may return from +the function without completing the required tasks. +Note that the server can move to active state when we are executing this +function but we don't check for that as we are suppose to perform more +or less same tasks when server is active. */ +static +void +srv_master_do_idle_tasks(void) +/*==========================*/ +{ + ullint counter_time; + + ++srv_main_idle_loops; + + MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS); + + + /* ALTER TABLE in MySQL requires on Unix that the table handler + can drop tables lazily after there no longer are SELECT + queries to them. */ + counter_time = ut_time_us(NULL); + srv_main_thread_op_info = "doing background drop tables"; + row_drop_tables_for_mysql_in_background(); + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, + counter_time); + + if (srv_shutdown_state > 0) { + return; + } + + /* make sure that there is enough reusable space in the redo + log files */ + srv_main_thread_op_info = "checking free log space"; + log_free_check(); + + /* Do an ibuf merge */ + counter_time = ut_time_us(NULL); + srv_main_thread_op_info = "doing insert buffer merge"; + ibuf_contract_in_background(0, TRUE); + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time); + + if (srv_shutdown_state > 0) { + return; + } + + srv_main_thread_op_info = "enforcing dict cache limit"; + srv_master_evict_from_table_cache(100); + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time); + + /* Flush logs if needed */ + srv_sync_log_buffer_in_background(); + MONITOR_INC_TIME_IN_MICRO_SECS( + MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time); + + if (srv_shutdown_state > 0) { + return; + } + + /* Make a new checkpoint */ + srv_main_thread_op_info = "making checkpoint"; + log_checkpoint(TRUE, FALSE); + MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_CHECKPOINT_MICROSECOND, + counter_time); + + if (srv_shutdown_state > 0) { + return; + } + + if (srv_log_arch_expire_sec) { + srv_main_thread_op_info = "purging archived logs"; + purge_archived_logs(ut_time() - srv_log_arch_expire_sec, + 0); + } +} + +/*********************************************************************//** +Perform the tasks during shutdown. The tasks that we do at shutdown +depend on srv_fast_shutdown: +2 => very fast shutdown => do no book keeping +1 => normal shutdown => clear drop table queue and make checkpoint +0 => slow shutdown => in addition to above do complete purge and ibuf +merge +@return TRUE if some work was done. FALSE otherwise */ +static +ibool +srv_master_do_shutdown_tasks( +/*=========================*/ + ib_time_t* last_print_time)/*!< last time the function + print the message */ +{ + ulint n_bytes_merged = 0; + ulint n_tables_to_drop = 0; + + ut_ad(!srv_read_only_mode); + + ++srv_main_shutdown_loops; + + ut_a(srv_shutdown_state > 0); + + /* In very fast shutdown none of the following is necessary */ + if (srv_fast_shutdown == 2) { + return(FALSE); + } + + /* ALTER TABLE in MySQL requires on Unix that the table handler + can drop tables lazily after there no longer are SELECT + queries to them. */ + srv_main_thread_op_info = "doing background drop tables"; + n_tables_to_drop = row_drop_tables_for_mysql_in_background(); + + /* make sure that there is enough reusable space in the redo + log files */ + srv_main_thread_op_info = "checking free log space"; + log_free_check(); + + /* In case of normal shutdown we don't do ibuf merge or purge */ + if (srv_fast_shutdown == 1) { + goto func_exit; + } + + /* Do an ibuf merge */ + srv_main_thread_op_info = "doing insert buffer merge"; + n_bytes_merged = ibuf_contract_in_background(0, TRUE); + + /* Flush logs if needed */ + srv_sync_log_buffer_in_background(); + +func_exit: + /* Make a new checkpoint about once in 10 seconds */ + srv_main_thread_op_info = "making checkpoint"; + log_checkpoint(TRUE, FALSE); + + /* Print progress message every 60 seconds during shutdown */ + if (srv_shutdown_state > 0 && srv_print_verbose_log) { + srv_shutdown_print_master_pending( + last_print_time, n_tables_to_drop, n_bytes_merged); + } + + return(n_bytes_merged || n_tables_to_drop); +} + +/*********************************************************************//** +Puts master thread to sleep. At this point we are using polling to +service various activities. Master thread sleeps for one second before +checking the state of the server again */ +static +void +srv_master_sleep(void) +/*==================*/ +{ + srv_main_thread_op_info = "sleeping"; + os_thread_sleep(1000000); + srv_main_thread_op_info = ""; +} + +/*********************************************************************//** +The master thread controlling the server. +@return a dummy parameter */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(srv_master_thread)( +/*==============================*/ + void* arg __attribute__((unused))) + /*!< in: a dummy parameter required by + os_thread_create */ +{ + srv_slot_t* slot; + ulint old_activity_count = srv_get_activity_count(); + ib_time_t last_print_time; + + ut_ad(!srv_read_only_mode); + + srv_master_tid = os_thread_get_tid(); + + os_thread_set_priority(srv_master_tid, srv_sched_priority_master); + +#ifdef UNIV_DEBUG_THREAD_CREATION + fprintf(stderr, "Master thread starts, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +#endif /* UNIV_DEBUG_THREAD_CREATION */ + +#ifdef UNIV_PFS_THREAD + pfs_register_thread(srv_master_thread_key); +#endif /* UNIV_PFS_THREAD */ + + srv_main_thread_process_no = os_proc_get_number(); + srv_main_thread_id = os_thread_pf(os_thread_get_curr_id()); + + slot = srv_reserve_slot(SRV_MASTER); + ut_a(slot == srv_sys->sys_threads); + + last_print_time = ut_time(); +loop: + if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) { + goto suspend_thread; + } + + while (srv_shutdown_state == SRV_SHUTDOWN_NONE) { + + srv_master_sleep(); + + MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP); + + srv_current_thread_priority = srv_master_thread_priority; + + if (srv_check_activity(old_activity_count)) { + old_activity_count = srv_get_activity_count(); + srv_master_do_active_tasks(); + } else { + srv_master_do_idle_tasks(); + } + } + + while (srv_master_do_shutdown_tasks(&last_print_time)) { + + /* Shouldn't loop here in case of very fast shutdown */ + ut_ad(srv_fast_shutdown < 2); + } + +suspend_thread: + srv_main_thread_op_info = "suspending"; + + srv_suspend_thread(slot); + + /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql() + waits for database activity to die down when converting < 4.1.x + databases, and relies on this string being exactly as it is. InnoDB + manual also mentions this string in several places. */ + srv_main_thread_op_info = "waiting for server activity"; + + os_event_wait(slot->event); + + if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { + os_thread_exit(NULL); + } + + goto loop; + + OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */ +} + +/*********************************************************************//** +Check if purge should stop. +@return true if it should shutdown. */ +static +bool +srv_purge_should_exit( +/*==============*/ + ulint n_purged) /*!< in: pages purged in last batch */ +{ + switch (srv_shutdown_state) { + case SRV_SHUTDOWN_NONE: + /* Normal operation. */ + break; + + case SRV_SHUTDOWN_CLEANUP: + case SRV_SHUTDOWN_EXIT_THREADS: + /* Exit unless slow shutdown requested or all done. */ + return(srv_fast_shutdown != 0 || n_purged == 0); + + case SRV_SHUTDOWN_LAST_PHASE: + case SRV_SHUTDOWN_FLUSH_PHASE: + ut_error; + } + + return(false); +} + +/*********************************************************************//** +Fetch and execute a task from the work queue. +@return true if a task was executed */ +static +bool +srv_task_execute(void) +/*==================*/ +{ + que_thr_t* thr = NULL; + + ut_ad(!srv_read_only_mode); + ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); + + mutex_enter(&srv_sys->tasks_mutex); + + if (UT_LIST_GET_LEN(srv_sys->tasks) > 0) { + + thr = UT_LIST_GET_FIRST(srv_sys->tasks); + + ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE); + + UT_LIST_REMOVE(queue, srv_sys->tasks, thr); + } + + mutex_exit(&srv_sys->tasks_mutex); + + if (thr != NULL) { + + que_run_threads(thr); + + os_atomic_inc_ulint( + &purge_sys->bh_mutex, &purge_sys->n_completed, 1); + + srv_inc_activity_count(); + } + + return(thr != NULL); +} + +static ulint purge_tid_i = 0; + +/*********************************************************************//** +Worker thread that reads tasks from the work queue and executes them. +@return a dummy parameter */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(srv_worker_thread)( +/*==============================*/ + void* arg __attribute__((unused))) /*!< in: a dummy parameter + required by os_thread_create */ +{ + srv_slot_t* slot; + ulint tid_i = os_atomic_increment_ulint(&purge_tid_i, 1); + + ut_ad(tid_i < srv_n_purge_threads); + ut_ad(!srv_read_only_mode); + ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); + + srv_purge_tids[tid_i] = os_thread_get_tid(); + os_thread_set_priority(srv_purge_tids[tid_i], + srv_sched_priority_purge); + +#ifdef UNIV_DEBUG_THREAD_CREATION + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: worker thread starting, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +#endif /* UNIV_DEBUG_THREAD_CREATION */ + + slot = srv_reserve_slot(SRV_WORKER); + + ut_a(srv_n_purge_threads > 1); + + srv_sys_mutex_enter(); + + ut_a(srv_sys->n_threads_active[SRV_WORKER] < srv_n_purge_threads); + + srv_sys_mutex_exit(); + + /* We need to ensure that the worker threads exit after the + purge coordinator thread. Otherwise the purge coordinaor can + end up waiting forever in trx_purge_wait_for_workers_to_complete() */ + + do { + srv_suspend_thread(slot); + + os_event_wait(slot->event); + + srv_current_thread_priority = srv_purge_thread_priority; + + if (srv_task_execute()) { + + /* If there are tasks in the queue, wakeup + the purge coordinator thread. */ + + srv_wake_purge_thread_if_not_active(); + } + + /* Note: we are checking the state without holding the + purge_sys->latch here. */ + } while (purge_sys->state != PURGE_STATE_EXIT); + + srv_free_slot(slot); + + rw_lock_x_lock(&purge_sys->latch); + + ut_a(!purge_sys->running); + ut_a(purge_sys->state == PURGE_STATE_EXIT); + ut_a(srv_shutdown_state > SRV_SHUTDOWN_NONE); + + rw_lock_x_unlock(&purge_sys->latch); + +#ifdef UNIV_DEBUG_THREAD_CREATION + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Purge worker thread exiting, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +#endif /* UNIV_DEBUG_THREAD_CREATION */ + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */ +} + +/*********************************************************************//** +Do the actual purge operation. +@return length of history list before the last purge batch. */ +static +ulint +srv_do_purge( +/*=========*/ + ulint n_threads, /*!< in: number of threads to use */ + ulint* n_total_purged) /*!< in/out: total pages purged */ +{ + ulint n_pages_purged; + + static ulint count = 0; + static ulint n_use_threads = 0; + static ulint rseg_history_len = 0; + ulint old_activity_count = srv_get_activity_count(); + + ut_a(n_threads > 0); + ut_ad(!srv_read_only_mode); + + /* Purge until there are no more records to purge and there is + no change in configuration or server state. If the user has + configured more than one purge thread then we treat that as a + pool of threads and only use the extra threads if purge can't + keep up with updates. */ + + if (n_use_threads == 0) { + n_use_threads = n_threads; + } + + do { + srv_current_thread_priority = srv_purge_thread_priority; + + if (trx_sys->rseg_history_len > rseg_history_len + || (srv_max_purge_lag > 0 + && rseg_history_len > srv_max_purge_lag)) { + + /* History length is now longer than what it was + when we took the last snapshot. Use more threads. */ + + if (n_use_threads < n_threads) { + ++n_use_threads; + } + + } else if (srv_check_activity(old_activity_count) + && n_use_threads > 1) { + + /* History length same or smaller since last snapshot, + use fewer threads. */ + + --n_use_threads; + + old_activity_count = srv_get_activity_count(); + } + + /* Ensure that the purge threads are less than what + was configured. */ + + ut_a(n_use_threads > 0); + ut_a(n_use_threads <= n_threads); + + /* Take a snapshot of the history list before purge. */ + if ((rseg_history_len = trx_sys->rseg_history_len) == 0) { + break; + } + + n_pages_purged = trx_purge( + n_use_threads, srv_purge_batch_size, false); + + if (!(count++ % TRX_SYS_N_RSEGS)) { + /* Force a truncate of the history list. */ + n_pages_purged += trx_purge( + 1, srv_purge_batch_size, true); + } + + *n_total_purged += n_pages_purged; + + } while (!srv_purge_should_exit(n_pages_purged) && n_pages_purged > 0); + + return(rseg_history_len); +} + +/*********************************************************************//** +Suspend the purge coordinator thread. */ +static +void +srv_purge_coordinator_suspend( +/*==========================*/ + srv_slot_t* slot, /*!< in/out: Purge coordinator + thread slot */ + ulint rseg_history_len) /*!< in: history list length + before last purge */ +{ + ut_ad(!srv_read_only_mode); + ut_a(slot->type == SRV_PURGE); + + bool stop = false; + + /** Maximum wait time on the purge event, in micro-seconds. */ + static const ulint SRV_PURGE_MAX_TIMEOUT = 10000; + + ib_int64_t sig_count = srv_suspend_thread(slot); + + do { + ulint ret; + + rw_lock_x_lock(&purge_sys->latch); + + purge_sys->running = false; + + rw_lock_x_unlock(&purge_sys->latch); + + /* We don't wait right away on the the non-timed wait because + we want to signal the thread that wants to suspend purge. */ + + if (stop) { + os_event_wait_low(slot->event, sig_count); + ret = 0; + } else if (rseg_history_len <= trx_sys->rseg_history_len) { + ret = os_event_wait_time_low( + slot->event, SRV_PURGE_MAX_TIMEOUT, sig_count); + } else { + /* We don't want to waste time waiting, if the + history list increased by the time we got here, + unless purge has been stopped. */ + ret = 0; + } + + srv_sys_mutex_enter(); + + /* The thread can be in state !suspended after the timeout + but before this check if another thread sent a wakeup signal. */ + + if (slot->suspended) { + slot->suspended = FALSE; + ++srv_sys->n_threads_active[slot->type]; + ut_a(srv_sys->n_threads_active[slot->type] == 1); + } + + srv_sys_mutex_exit(); + + sig_count = srv_suspend_thread(slot); + + rw_lock_x_lock(&purge_sys->latch); + + stop = (srv_shutdown_state == SRV_SHUTDOWN_NONE + && purge_sys->state == PURGE_STATE_STOP); + + if (!stop) { + ut_a(purge_sys->n_stop == 0); + purge_sys->running = true; + } else { + ut_a(purge_sys->n_stop > 0); + + /* Signal that we are suspended. */ + os_event_set(purge_sys->event); + } + + rw_lock_x_unlock(&purge_sys->latch); + + if (ret == OS_SYNC_TIME_EXCEEDED) { + + /* No new records added since wait started then simply + wait for new records. The magic number 5000 is an + approximation for the case where we have cached UNDO + log records which prevent truncate of the UNDO + segments. */ + + if (rseg_history_len == trx_sys->rseg_history_len + && trx_sys->rseg_history_len < 5000) { + + stop = true; + } + } + + } while (stop); + + srv_sys_mutex_enter(); + + if (slot->suspended) { + slot->suspended = FALSE; + ++srv_sys->n_threads_active[slot->type]; + ut_a(srv_sys->n_threads_active[slot->type] == 1); + } + + srv_sys_mutex_exit(); +} + +/*********************************************************************//** +Purge coordinator thread that schedules the purge tasks. +@return a dummy parameter */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(srv_purge_coordinator_thread)( +/*=========================================*/ + void* arg __attribute__((unused))) /*!< in: a dummy parameter + required by os_thread_create */ +{ + srv_slot_t* slot; + ulint n_total_purged = ULINT_UNDEFINED; + + ut_ad(!srv_read_only_mode); + ut_a(srv_n_purge_threads >= 1); + ut_a(trx_purge_state() == PURGE_STATE_INIT); + ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); + + srv_purge_tids[0] = os_thread_get_tid(); + os_thread_set_priority(srv_purge_tids[0], srv_sched_priority_purge); + + rw_lock_x_lock(&purge_sys->latch); + + purge_sys->running = true; + purge_sys->state = PURGE_STATE_RUN; + + rw_lock_x_unlock(&purge_sys->latch); + +#ifdef UNIV_PFS_THREAD + pfs_register_thread(srv_purge_thread_key); +#endif /* UNIV_PFS_THREAD */ + +#ifdef UNIV_DEBUG_THREAD_CREATION + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Purge coordinator thread created, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +#endif /* UNIV_DEBUG_THREAD_CREATION */ + + slot = srv_reserve_slot(SRV_PURGE); + + ulint rseg_history_len = trx_sys->rseg_history_len; + + do { + /* If there are no records to purge or the last + purge didn't purge any records then wait for activity. */ + + if (srv_shutdown_state == SRV_SHUTDOWN_NONE + && (purge_sys->state == PURGE_STATE_STOP + || n_total_purged == 0)) { + + srv_purge_coordinator_suspend(slot, rseg_history_len); + } + + if (srv_purge_should_exit(n_total_purged)) { + ut_a(!slot->suspended); + break; + } + + n_total_purged = 0; + + srv_current_thread_priority = srv_purge_thread_priority; + + rseg_history_len = srv_do_purge( + srv_n_purge_threads, &n_total_purged); + + srv_inc_activity_count(); + + } while (!srv_purge_should_exit(n_total_purged)); + + /* Ensure that we don't jump out of the loop unless the + exit condition is satisfied. */ + + ut_a(srv_purge_should_exit(n_total_purged)); + + ulint n_pages_purged = ULINT_MAX; + + /* Ensure that all records are purged if it is not a fast shutdown. + This covers the case where a record can be added after we exit the + loop above. */ + while (srv_fast_shutdown == 0 && n_pages_purged > 0) { + n_pages_purged = trx_purge(1, srv_purge_batch_size, false); + } + + /* Force a truncate of the history list. */ + n_pages_purged = trx_purge(1, srv_purge_batch_size, true); + ut_a(n_pages_purged == 0 || srv_fast_shutdown != 0); + + /* The task queue should always be empty, independent of fast + shutdown state. */ + ut_a(srv_get_task_queue_length() == 0); + + srv_free_slot(slot); + + /* Note that we are shutting down. */ + rw_lock_x_lock(&purge_sys->latch); + + purge_sys->state = PURGE_STATE_EXIT; + + purge_sys->running = false; + + rw_lock_x_unlock(&purge_sys->latch); + +#ifdef UNIV_DEBUG_THREAD_CREATION + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Purge coordinator exiting, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); +#endif /* UNIV_DEBUG_THREAD_CREATION */ + + /* Ensure that all the worker threads quit. */ + if (srv_n_purge_threads > 1) { + srv_release_threads(SRV_WORKER, srv_n_purge_threads - 1); + } + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. */ + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */ +} + +/**********************************************************************//** +Enqueues a task to server task queue and releases a worker thread, if there +is a suspended one. */ +UNIV_INTERN +void +srv_que_task_enqueue_low( +/*=====================*/ + que_thr_t* thr) /*!< in: query thread */ +{ + ut_ad(!srv_read_only_mode); + mutex_enter(&srv_sys->tasks_mutex); + + UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr); + + mutex_exit(&srv_sys->tasks_mutex); + + srv_release_threads(SRV_WORKER, 1); +} + +/**********************************************************************//** +Get count of tasks in the queue. +@return number of tasks in queue */ +UNIV_INTERN +ulint +srv_get_task_queue_length(void) +/*===========================*/ +{ + ulint n_tasks; + + ut_ad(!srv_read_only_mode); + + mutex_enter(&srv_sys->tasks_mutex); + + n_tasks = UT_LIST_GET_LEN(srv_sys->tasks); + + mutex_exit(&srv_sys->tasks_mutex); + + return(n_tasks); +} + +/**********************************************************************//** +Wakeup the purge threads. */ +UNIV_INTERN +void +srv_purge_wakeup(void) +/*==================*/ +{ + ut_ad(!srv_read_only_mode); + + if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { + + srv_release_threads(SRV_PURGE, 1); + + if (srv_n_purge_threads > 1) { + ulint n_workers = srv_n_purge_threads - 1; + + srv_release_threads(SRV_WORKER, n_workers); + } + } +} + diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc new file mode 100644 index 00000000000..4e0e03a6491 --- /dev/null +++ b/storage/xtradb/srv/srv0start.cc @@ -0,0 +1,3284 @@ +/***************************************************************************** + +Copyright (c) 1996, 2014, Oracle and/or its affiliates. All rights reserved. +Copyright (c) 2008, Google Inc. +Copyright (c) 2009, Percona Inc. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +Portions of this file contain modifications contributed and copyrighted +by Percona Inc.. Those modifications are +gratefully acknowledged and are described briefly in the InnoDB +documentation. The contributions by Percona Inc. are incorporated with +their permission, and subject to the conditions contained in the file +COPYING.Percona. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/********************************************************************//** +@file srv/srv0start.cc +Starts the InnoDB database server + +Created 2/16/1996 Heikki Tuuri +*************************************************************************/ + +#include "mysqld.h" +#include "pars0pars.h" +#include "row0ftsort.h" +#include "ut0mem.h" +#include "mem0mem.h" +#include "data0data.h" +#include "data0type.h" +#include "dict0dict.h" +#include "buf0buf.h" +#include "buf0dump.h" +#include "os0file.h" +#include "os0thread.h" +#include "fil0fil.h" +#include "fsp0fsp.h" +#include "rem0rec.h" +#include "mtr0mtr.h" +#include "log0log.h" +#include "log0online.h" +#include "log0recv.h" +#include "page0page.h" +#include "page0cur.h" +#include "trx0trx.h" +#include "trx0sys.h" +#include "btr0btr.h" +#include "btr0cur.h" +#include "rem0rec.h" +#include "ibuf0ibuf.h" +#include "srv0start.h" +#include "srv0srv.h" +#ifndef UNIV_HOTBACKUP +# include "trx0rseg.h" +# include "os0proc.h" +# include "sync0sync.h" +# include "buf0flu.h" +# include "buf0rea.h" +# include "dict0boot.h" +# include "dict0load.h" +# include "dict0stats_bg.h" +# include "que0que.h" +# include "usr0sess.h" +# include "lock0lock.h" +# include "trx0roll.h" +# include "trx0purge.h" +# include "lock0lock.h" +# include "pars0pars.h" +# include "btr0sea.h" +# include "rem0cmp.h" +# include "dict0crea.h" +# include "row0ins.h" +# include "row0sel.h" +# include "row0upd.h" +# include "row0row.h" +# include "row0mysql.h" +# include "btr0pcur.h" +# include "os0sync.h" +# include "zlib.h" +# include "ut0crc32.h" + +/** Log sequence number immediately after startup */ +UNIV_INTERN lsn_t srv_start_lsn; +/** Log sequence number at shutdown */ +UNIV_INTERN lsn_t srv_shutdown_lsn; + +#ifdef HAVE_DARWIN_THREADS +# include <sys/utsname.h> +/** TRUE if the F_FULLFSYNC option is available */ +UNIV_INTERN ibool srv_have_fullfsync = FALSE; +#endif + +/** TRUE if a raw partition is in use */ +UNIV_INTERN ibool srv_start_raw_disk_in_use = FALSE; + +/** TRUE if the server is being started, before rolling back any +incomplete transactions */ +UNIV_INTERN ibool srv_startup_is_before_trx_rollback_phase = FALSE; +/** TRUE if the server is being started */ +UNIV_INTERN ibool srv_is_being_started = FALSE; +/** TRUE if the server was successfully started */ +UNIV_INTERN ibool srv_was_started = FALSE; +/** TRUE if innobase_start_or_create_for_mysql() has been called */ +static ibool srv_start_has_been_called = FALSE; + +/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to +SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */ +UNIV_INTERN enum srv_shutdown_state srv_shutdown_state = SRV_SHUTDOWN_NONE; + +/** Files comprising the system tablespace */ +static os_file_t files[1000]; + +/** io_handler_thread parameters for thread identification */ +static ulint n[SRV_MAX_N_IO_THREADS]; +/** io_handler_thread identifiers, 32 is the maximum number of purge threads. +The extra elements at the end are allocated as follows: +SRV_MAX_N_IO_THREADS + 1: srv_master_thread +SRV_MAX_N_IO_THREADS + 2: lock_wait_timeout_thread +SRV_MAX_N_IO_THREADS + 3: srv_error_monitor_thread +SRV_MAX_N_IO_THREADS + 4: srv_monitor_thread +SRV_MAX_N_IO_THREADS + 5: srv_redo_log_follow_thread +SRV_MAX_N_IO_THREADS + 6: srv_purge_coordinator_thread +SRV_MAX_N_IO_THREADS + 7: srv_worker_thread +... +SRV_MAX_N_IO_THREADS + 7 + srv_n_purge_threads - 1: srv_worker_thread */ +static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 7 + + SRV_MAX_N_PURGE_THREADS]; + +/** We use this mutex to test the return value of pthread_mutex_trylock + on successful locking. HP-UX does NOT return 0, though Linux et al do. */ +static os_fast_mutex_t srv_os_test_mutex; + +/** Name of srv_monitor_file */ +static char* srv_monitor_file_name; +#endif /* !UNIV_HOTBACKUP */ + +/** Default undo tablespace size in UNIV_PAGEs count (10MB). */ +static const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES = + ((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF; + +/** */ +#define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD +#define SRV_MAX_N_PENDING_SYNC_IOS 100 + +#ifdef UNIV_PFS_THREAD +/* Keys to register InnoDB threads with performance schema */ +UNIV_INTERN mysql_pfs_key_t io_handler_thread_key; +UNIV_INTERN mysql_pfs_key_t srv_lock_timeout_thread_key; +UNIV_INTERN mysql_pfs_key_t srv_error_monitor_thread_key; +UNIV_INTERN mysql_pfs_key_t srv_monitor_thread_key; +UNIV_INTERN mysql_pfs_key_t srv_master_thread_key; +UNIV_INTERN mysql_pfs_key_t srv_purge_thread_key; +UNIV_INTERN mysql_pfs_key_t srv_log_tracking_thread_key; +#endif /* UNIV_PFS_THREAD */ + +/*********************************************************************//** +Convert a numeric string that optionally ends in G or M or K, to a number +containing megabytes. +@return next character in string */ +static +char* +srv_parse_megabytes( +/*================*/ + char* str, /*!< in: string containing a quantity in bytes */ + ulint* megs) /*!< out: the number in megabytes */ +{ + char* endp; + ulint size; + + size = strtoul(str, &endp, 10); + + str = endp; + + switch (*str) { + case 'G': case 'g': + size *= 1024; + /* fall through */ + case 'M': case 'm': + str++; + break; + case 'K': case 'k': + size /= 1024; + str++; + break; + default: + size /= 1024 * 1024; + break; + } + + *megs = size; + return(str); +} + +/*********************************************************************//** +Check if a file can be opened in read-write mode. +@return true if it doesn't exist or can be opened in rw mode. */ +static +bool +srv_file_check_mode( +/*================*/ + const char* name) /*!< in: filename to check */ +{ + os_file_stat_t stat; + + memset(&stat, 0x0, sizeof(stat)); + + dberr_t err = os_file_get_status(name, &stat, true); + + if (err == DB_FAIL) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "os_file_get_status() failed on '%s'. Can't determine " + "file permissions", name); + + return(false); + + } else if (err == DB_SUCCESS) { + + /* Note: stat.rw_perm is only valid of files */ + + if (stat.type == OS_FILE_TYPE_FILE + || stat.type == OS_FILE_TYPE_BLOCK) { + if (!stat.rw_perm) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "%s can't be opened in %s mode", + name, + srv_read_only_mode + ? "read" : "read-write"); + + return(false); + } + } else { + /* Not a regular file, bail out. */ + + ib_logf(IB_LOG_LEVEL_ERROR, + "'%s' not a regular file.", name); + + return(false); + } + } else { + + /* This is OK. If the file create fails on RO media, there + is nothing we can do. */ + + ut_a(err == DB_NOT_FOUND); + } + + return(true); +} + +/*********************************************************************//** +Reads the data files and their sizes from a character string given in +the .cnf file. +@return TRUE if ok, FALSE on parse error */ +UNIV_INTERN +ibool +srv_parse_data_file_paths_and_sizes( +/*================================*/ + char* str) /*!< in/out: the data file path string */ +{ + char* input_str; + char* path; + ulint size; + ulint i = 0; + + srv_auto_extend_last_data_file = FALSE; + srv_last_file_size_max = 0; + srv_data_file_names = NULL; + srv_data_file_sizes = NULL; + srv_data_file_is_raw_partition = NULL; + + input_str = str; + + /* First calculate the number of data files and check syntax: + path:size[M | G];path:size[M | G]... . Note that a Windows path may + contain a drive name and a ':'. */ + + while (*str != '\0') { + path = str; + + while ((*str != ':' && *str != '\0') + || (*str == ':' + && (*(str + 1) == '\\' || *(str + 1) == '/' + || *(str + 1) == ':'))) { + str++; + } + + if (*str == '\0') { + return(FALSE); + } + + str++; + + str = srv_parse_megabytes(str, &size); + + if (0 == strncmp(str, ":autoextend", + (sizeof ":autoextend") - 1)) { + + str += (sizeof ":autoextend") - 1; + + if (0 == strncmp(str, ":max:", + (sizeof ":max:") - 1)) { + + str += (sizeof ":max:") - 1; + + str = srv_parse_megabytes(str, &size); + } + + if (*str != '\0') { + + return(FALSE); + } + } + + if (strlen(str) >= 6 + && *str == 'n' + && *(str + 1) == 'e' + && *(str + 2) == 'w') { + str += 3; + } + + if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') { + str += 3; + } + + if (size == 0) { + return(FALSE); + } + + i++; + + if (*str == ';') { + str++; + } else if (*str != '\0') { + + return(FALSE); + } + } + + if (i == 0) { + /* If innodb_data_file_path was defined it must contain + at least one data file definition */ + + return(FALSE); + } + + srv_data_file_names = static_cast<char**>( + malloc(i * sizeof *srv_data_file_names)); + + srv_data_file_sizes = static_cast<ulint*>( + malloc(i * sizeof *srv_data_file_sizes)); + + srv_data_file_is_raw_partition = static_cast<ulint*>( + malloc(i * sizeof *srv_data_file_is_raw_partition)); + + srv_n_data_files = i; + + /* Then store the actual values to our arrays */ + + str = input_str; + i = 0; + + while (*str != '\0') { + path = str; + + /* Note that we must step over the ':' in a Windows path; + a Windows path normally looks like C:\ibdata\ibdata1:1G, but + a Windows raw partition may have a specification like + \\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */ + + while ((*str != ':' && *str != '\0') + || (*str == ':' + && (*(str + 1) == '\\' || *(str + 1) == '/' + || *(str + 1) == ':'))) { + str++; + } + + if (*str == ':') { + /* Make path a null-terminated string */ + *str = '\0'; + str++; + } + + str = srv_parse_megabytes(str, &size); + + srv_data_file_names[i] = path; + srv_data_file_sizes[i] = size; + + if (0 == strncmp(str, ":autoextend", + (sizeof ":autoextend") - 1)) { + + srv_auto_extend_last_data_file = TRUE; + + str += (sizeof ":autoextend") - 1; + + if (0 == strncmp(str, ":max:", + (sizeof ":max:") - 1)) { + + str += (sizeof ":max:") - 1; + + str = srv_parse_megabytes( + str, &srv_last_file_size_max); + } + + if (*str != '\0') { + + return(FALSE); + } + } + + (srv_data_file_is_raw_partition)[i] = 0; + + if (strlen(str) >= 6 + && *str == 'n' + && *(str + 1) == 'e' + && *(str + 2) == 'w') { + str += 3; + (srv_data_file_is_raw_partition)[i] = SRV_NEW_RAW; + } + + if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') { + str += 3; + + if ((srv_data_file_is_raw_partition)[i] == 0) { + (srv_data_file_is_raw_partition)[i] = SRV_OLD_RAW; + } + } + + i++; + + if (*str == ';') { + str++; + } + } + + return(TRUE); +} + +/*********************************************************************//** +Frees the memory allocated by srv_parse_data_file_paths_and_sizes() +and srv_parse_log_group_home_dirs(). */ +UNIV_INTERN +void +srv_free_paths_and_sizes(void) +/*==========================*/ +{ + free(srv_data_file_names); + srv_data_file_names = NULL; + free(srv_data_file_sizes); + srv_data_file_sizes = NULL; + free(srv_data_file_is_raw_partition); + srv_data_file_is_raw_partition = NULL; +} + +#ifndef UNIV_HOTBACKUP + +static ulint io_tid_i = 0; + +/********************************************************************//** +I/o-handler thread function. +@return OS_THREAD_DUMMY_RETURN */ +extern "C" UNIV_INTERN +os_thread_ret_t +DECLARE_THREAD(io_handler_thread)( +/*==============================*/ + void* arg) /*!< in: pointer to the number of the segment in + the aio array */ +{ + ulint segment; + ulint tid_i = os_atomic_increment_ulint(&io_tid_i, 1) - 1; + + ut_ad(tid_i < srv_n_file_io_threads); + + segment = *((ulint*) arg); + + srv_io_tids[tid_i] = os_thread_get_tid(); + os_thread_set_priority(srv_io_tids[tid_i], srv_sched_priority_io); + +#ifdef UNIV_DEBUG_THREAD_CREATION + fprintf(stderr, "Io handler thread %lu starts, id %lu\n", segment, + os_thread_pf(os_thread_get_curr_id())); +#endif + +#ifdef UNIV_PFS_THREAD + pfs_register_thread(io_handler_thread_key); +#endif /* UNIV_PFS_THREAD */ + + while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) { + srv_current_thread_priority = srv_io_thread_priority; + fil_aio_wait(segment); + } + + /* We count the number of threads in os_thread_exit(). A created + thread should always use that to exit and not use return() to exit. + The thread actually never comes here because it is exited in an + os_event_wait(). */ + + os_thread_exit(NULL); + + OS_THREAD_DUMMY_RETURN; +} +#endif /* !UNIV_HOTBACKUP */ + +/*********************************************************************//** +Normalizes a directory path for Windows: converts slashes to backslashes. */ +UNIV_INTERN +void +srv_normalize_path_for_win( +/*=======================*/ + char* str __attribute__((unused))) /*!< in/out: null-terminated + character string */ +{ +#ifdef __WIN__ + for (; *str; str++) { + + if (*str == '/') { + *str = '\\'; + } + } +#endif +} + +#ifndef UNIV_HOTBACKUP +/*********************************************************************//** +Creates a log file. +@return DB_SUCCESS or error code */ +static __attribute__((nonnull, warn_unused_result)) +dberr_t +create_log_file( +/*============*/ + os_file_t* file, /*!< out: file handle */ + const char* name) /*!< in: log file name */ +{ + ibool ret; + + *file = os_file_create( + innodb_file_log_key, name, + OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL, + OS_LOG_FILE, &ret); + + if (!ret) { + ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name); + return(DB_ERROR); + } + + ib_logf(IB_LOG_LEVEL_INFO, + "Setting log file %s size to %lu MB", + name, (ulong) srv_log_file_size + >> (20 - UNIV_PAGE_SIZE_SHIFT)); + + ret = os_file_set_size(name, *file, + (os_offset_t) srv_log_file_size + << UNIV_PAGE_SIZE_SHIFT); + if (!ret) { + ib_logf(IB_LOG_LEVEL_ERROR, "Cannot set log file" + " %s to size %lu MB", name, (ulong) srv_log_file_size + >> (20 - UNIV_PAGE_SIZE_SHIFT)); + return(DB_ERROR); + } + + ret = os_file_close(*file); + ut_a(ret); + + return(DB_SUCCESS); +} + +/** Initial number of the first redo log file */ +#define INIT_LOG_FILE0 (SRV_N_LOG_FILES_MAX + 1) + +#ifdef DBUG_OFF +# define RECOVERY_CRASH(x) do {} while(0) +#else +# define RECOVERY_CRASH(x) do { \ + if (srv_force_recovery_crash == x) { \ + fprintf(stderr, "innodb_force_recovery_crash=%lu\n", \ + srv_force_recovery_crash); \ + fflush(stderr); \ + exit(3); \ + } \ +} while (0) +#endif + +/*********************************************************************//** +Creates all log files. +@return DB_SUCCESS or error code */ +static +dberr_t +create_log_files( +/*=============*/ + bool create_new_db, /*!< in: TRUE if new database is being + created */ + char* logfilename, /*!< in/out: buffer for log file name */ + size_t dirnamelen, /*!< in: length of the directory path */ + lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */ + char*& logfile0) /*!< out: name of the first log file */ +{ + if (srv_read_only_mode) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Cannot create log files in read-only mode"); + return(DB_READ_ONLY); + } + + /* We prevent system tablespace creation with existing files in + data directory. So we do not delete log files when creating new system + tablespace */ + if (!create_new_db) { + /* Remove any old log files. */ + for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) { + sprintf(logfilename + dirnamelen, "ib_logfile%u", i); + + /* Ignore errors about non-existent files or files + that cannot be removed. The create_log_file() will + return an error when the file exists. */ +#ifdef __WIN__ + DeleteFile((LPCTSTR) logfilename); +#else + unlink(logfilename); +#endif + /* Crashing after deleting the first + file should be recoverable. The buffer + pool was clean, and we can simply create + all log files from the scratch. */ + RECOVERY_CRASH(6); + } + } + + ut_ad(!buf_pool_check_no_pending_io()); + + RECOVERY_CRASH(7); + + for (unsigned i = 0; i < srv_n_log_files; i++) { + sprintf(logfilename + dirnamelen, + "ib_logfile%u", i ? i : INIT_LOG_FILE0); + + dberr_t err = create_log_file(&files[i], logfilename); + + if (err != DB_SUCCESS) { + return(err); + } + } + + RECOVERY_CRASH(8); + + /* We did not create the first log file initially as + ib_logfile0, so that crash recovery cannot find it until it + has been completed and renamed. */ + sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0); + + fil_space_create( + logfilename, SRV_LOG_SPACE_FIRST_ID, + fsp_flags_set_page_size(0, UNIV_PAGE_SIZE), + FIL_LOG); + ut_a(fil_validate()); + + logfile0 = fil_node_create( + logfilename, (ulint) srv_log_file_size, + SRV_LOG_SPACE_FIRST_ID, FALSE); + ut_a(logfile0); + + for (unsigned i = 1; i < srv_n_log_files; i++) { + sprintf(logfilename + dirnamelen, "ib_logfile%u", i); + + if (!fil_node_create( + logfilename, + (ulint) srv_log_file_size, + SRV_LOG_SPACE_FIRST_ID, FALSE)) { + ut_error; + } + } + +#ifdef UNIV_LOG_ARCHIVE + /* Create the file space object for archived logs. */ + fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1, + 0, FIL_LOG); +#endif + log_group_init(0, srv_n_log_files, + srv_log_file_size * UNIV_PAGE_SIZE, + SRV_LOG_SPACE_FIRST_ID, + SRV_LOG_SPACE_FIRST_ID + 1); + + fil_open_log_and_system_tablespace_files(); + + /* Create a log checkpoint. */ + mutex_enter(&log_sys->mutex); + ut_d(recv_no_log_write = FALSE); + recv_reset_logs( +#ifdef UNIV_LOG_ARCHIVE + UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no, + TRUE, +#endif + lsn); + mutex_exit(&log_sys->mutex); + + return(DB_SUCCESS); +} + +/*********************************************************************//** +Renames the first log file. */ +static +void +create_log_files_rename( +/*====================*/ + char* logfilename, /*!< in/out: buffer for log file name */ + size_t dirnamelen, /*!< in: length of the directory path */ + lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */ + char* logfile0) /*!< in/out: name of the first log file */ +{ + /* If innodb_flush_method=O_DSYNC, + we need to explicitly flush the log buffers. */ + fil_flush(SRV_LOG_SPACE_FIRST_ID); + /* Close the log files, so that we can rename + the first one. */ + fil_close_log_files(false); + + /* Rename the first log file, now that a log + checkpoint has been created. */ + sprintf(logfilename + dirnamelen, "ib_logfile%u", 0); + + RECOVERY_CRASH(9); + + ib_logf(IB_LOG_LEVEL_INFO, + "Renaming log file %s to %s", logfile0, logfilename); + + mutex_enter(&log_sys->mutex); + ut_ad(strlen(logfile0) == 2 + strlen(logfilename)); + ibool success = os_file_rename( + innodb_file_log_key, logfile0, logfilename); + ut_a(success); + + RECOVERY_CRASH(10); + + /* Replace the first file with ib_logfile0. */ + strcpy(logfile0, logfilename); + mutex_exit(&log_sys->mutex); + + fil_open_log_and_system_tablespace_files(); + + ib_logf(IB_LOG_LEVEL_WARN, "New log files created, LSN=" LSN_PF, lsn); +} + +/*********************************************************************//** +Opens a log file. +@return DB_SUCCESS or error code */ +static __attribute__((nonnull, warn_unused_result)) +dberr_t +open_log_file( +/*==========*/ + os_file_t* file, /*!< out: file handle */ + const char* name, /*!< in: log file name */ + os_offset_t* size) /*!< out: file size */ +{ + ibool ret; + + *file = os_file_create(innodb_file_log_key, name, + OS_FILE_OPEN, OS_FILE_AIO, + OS_LOG_FILE, &ret); + if (!ret) { + ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name); + return(DB_ERROR); + } + + *size = os_file_get_size(*file); + + ret = os_file_close(*file); + ut_a(ret); + return(DB_SUCCESS); +} + +/*********************************************************************//** +Creates or opens database data files and closes them. +@return DB_SUCCESS or error code */ +static __attribute__((nonnull, warn_unused_result)) +dberr_t +open_or_create_data_files( +/*======================*/ + ibool* create_new_db, /*!< out: TRUE if new database should be + created */ +#ifdef UNIV_LOG_ARCHIVE + lsn_t* min_arch_log_no,/*!< out: min of archived log + numbers in data files */ + lsn_t* max_arch_log_no,/*!< out: max of archived log + numbers in data files */ +#endif /* UNIV_LOG_ARCHIVE */ + lsn_t* min_flushed_lsn,/*!< out: min of flushed lsn + values in data files */ + lsn_t* max_flushed_lsn,/*!< out: max of flushed lsn + values in data files */ + ulint* sum_of_new_sizes)/*!< out: sum of sizes of the + new files added */ +{ + ibool ret; + ulint i; + ibool one_opened = FALSE; + ibool one_created = FALSE; + os_offset_t size; + ulint flags; + ulint space; + ulint rounded_size_pages; + char name[10000]; + + if (srv_n_data_files >= 1000) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "Can only have < 1000 data files, you have " + "defined %lu", (ulong) srv_n_data_files); + + return(DB_ERROR); + } + + *sum_of_new_sizes = 0; + + *create_new_db = FALSE; + + srv_normalize_path_for_win(srv_data_home); + + for (i = 0; i < srv_n_data_files; i++) { + ulint dirnamelen; + + srv_normalize_path_for_win(srv_data_file_names[i]); + dirnamelen = strlen(srv_data_home); + + ut_a(dirnamelen + strlen(srv_data_file_names[i]) + < (sizeof name) - 1); + + memcpy(name, srv_data_home, dirnamelen); + + /* Add a path separator if needed. */ + if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) { + name[dirnamelen++] = SRV_PATH_SEPARATOR; + } + + strcpy(name + dirnamelen, srv_data_file_names[i]); + + /* Note: It will return true if the file doesn' exist. */ + + if (!srv_file_check_mode(name)) { + + return(DB_FAIL); + + } else if (srv_data_file_is_raw_partition[i] == 0) { + + /* First we try to create the file: if it already + exists, ret will get value FALSE */ + + files[i] = os_file_create( + innodb_file_data_key, name, OS_FILE_CREATE, + OS_FILE_NORMAL, OS_DATA_FILE, &ret); + + if (srv_read_only_mode) { + + if (ret) { + goto size_check; + } + + ib_logf(IB_LOG_LEVEL_ERROR, + "Opening %s failed!", name); + + return(DB_ERROR); + + } else if (!ret + && os_file_get_last_error(false) + != OS_FILE_ALREADY_EXISTS +#ifdef UNIV_AIX + /* AIX 5.1 after security patch ML7 may have + errno set to 0 here, which causes our + function to return 100; work around that + AIX problem */ + && os_file_get_last_error(false) != 100 +#endif /* UNIV_AIX */ + ) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Creating or opening %s failed!", + name); + + return(DB_ERROR); + } + + } else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) { + + ut_a(!srv_read_only_mode); + + /* The partition is opened, not created; then it is + written over */ + + srv_start_raw_disk_in_use = TRUE; + srv_created_new_raw = TRUE; + + files[i] = os_file_create( + innodb_file_data_key, name, OS_FILE_OPEN_RAW, + OS_FILE_NORMAL, OS_DATA_FILE, &ret); + + if (!ret) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Error in opening %s", name); + + return(DB_ERROR); + } + } else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) { + srv_start_raw_disk_in_use = TRUE; + + ret = FALSE; + } else { + ut_a(0); + } + + if (ret == FALSE) { + const char* check_msg; + /* We open the data file */ + + if (one_created) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Data files can only be added at " + "the end of a tablespace, but " + "data file %s existed beforehand.", + name); + return(DB_ERROR); + } + if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) { + ut_a(!srv_read_only_mode); + files[i] = os_file_create( + innodb_file_data_key, + name, OS_FILE_OPEN_RAW, + OS_FILE_NORMAL, OS_DATA_FILE, &ret); + } else if (i == 0) { + files[i] = os_file_create( + innodb_file_data_key, + name, OS_FILE_OPEN_RETRY, + OS_FILE_NORMAL, OS_DATA_FILE, &ret); + } else { + files[i] = os_file_create( + innodb_file_data_key, + name, OS_FILE_OPEN, OS_FILE_NORMAL, + OS_DATA_FILE, &ret); + } + + if (!ret) { + + os_file_get_last_error(true); + + ib_logf(IB_LOG_LEVEL_ERROR, + "Can't open '%s'", name); + + return(DB_ERROR); + } + + if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) { + + goto skip_size_check; + } + +size_check: + size = os_file_get_size(files[i]); + ut_a(size != (os_offset_t) -1); + /* Round size downward to megabytes */ + + rounded_size_pages = (ulint) + (size >> UNIV_PAGE_SIZE_SHIFT); + + if (i == srv_n_data_files - 1 + && srv_auto_extend_last_data_file) { + + if (srv_data_file_sizes[i] > rounded_size_pages + || (srv_last_file_size_max > 0 + && srv_last_file_size_max + < rounded_size_pages)) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "auto-extending " + "data file %s is " + "of a different size " + "%lu pages (rounded " + "down to MB) than specified " + "in the .cnf file: " + "initial %lu pages, " + "max %lu (relevant if " + "non-zero) pages!", + name, + (ulong) rounded_size_pages, + (ulong) srv_data_file_sizes[i], + (ulong) + srv_last_file_size_max); + + return(DB_ERROR); + } + + srv_data_file_sizes[i] = rounded_size_pages; + } + + if (rounded_size_pages != srv_data_file_sizes[i]) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "Data file %s is of a different " + "size %lu pages (rounded down to MB) " + "than specified in the .cnf file " + "%lu pages!", + name, + (ulong) rounded_size_pages, + (ulong) srv_data_file_sizes[i]); + + return(DB_ERROR); + } +skip_size_check: + + /* This is the earliest location where we can load + the double write buffer. */ + if (i == 0) { + buf_dblwr_init_or_load_pages( + files[i], srv_data_file_names[i], true); + } + + bool retry = true; +check_first_page: + check_msg = fil_read_first_page( + files[i], one_opened, &flags, &space, + min_flushed_lsn, max_flushed_lsn); + + if (check_msg) { + + if (retry) { + fsp_open_info fsp; + const ulint page_no = 0; + + retry = false; + fsp.id = 0; + fsp.filepath = srv_data_file_names[i]; + fsp.file = files[i]; + + if (fil_user_tablespace_restore_page( + &fsp, page_no)) { + goto check_first_page; + } + } + + ib_logf(IB_LOG_LEVEL_ERROR, + "%s in data file %s", + check_msg, name); + return(DB_ERROR); + } + + /* The first file of the system tablespace must + have space ID = TRX_SYS_SPACE. The FSP_SPACE_ID + field in files greater than ibdata1 are unreliable. */ + ut_a(one_opened || space == TRX_SYS_SPACE); + + /* Check the flags for the first system tablespace + file only. */ + if (!one_opened + && UNIV_PAGE_SIZE + != fsp_flags_get_page_size(flags)) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "Data file \"%s\" uses page size %lu," + "but the start-up parameter " + "is --innodb-page-size=%lu", + name, + fsp_flags_get_page_size(flags), + UNIV_PAGE_SIZE); + + return(DB_ERROR); + } + + one_opened = TRUE; + } else if (!srv_read_only_mode) { + /* We created the data file and now write it full of + zeros */ + + one_created = TRUE; + + if (i > 0) { + ib_logf(IB_LOG_LEVEL_INFO, + "Data file %s did not" + " exist: new to be created", + name); + } else { + ib_logf(IB_LOG_LEVEL_INFO, + "The first specified " + "data file %s did not exist: " + "a new database to be created!", + name); + + *create_new_db = TRUE; + } + + ib_logf(IB_LOG_LEVEL_INFO, + "Setting file %s size to %lu MB", + name, + (ulong) (srv_data_file_sizes[i] + >> (20 - UNIV_PAGE_SIZE_SHIFT))); + + ib_logf(IB_LOG_LEVEL_INFO, + "Database physically writes the" + " file full: wait..."); + + ret = os_file_set_size( + name, files[i], + (os_offset_t) srv_data_file_sizes[i] + << UNIV_PAGE_SIZE_SHIFT); + + if (!ret) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Error in creating %s: " + "probably out of disk space", + name); + + return(DB_ERROR); + } + + *sum_of_new_sizes += srv_data_file_sizes[i]; + } + + ret = os_file_close(files[i]); + ut_a(ret); + + if (i == 0) { + flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE); + fil_space_create(name, 0, flags, FIL_TABLESPACE); + } + + ut_a(fil_validate()); + + if (!fil_node_create(name, srv_data_file_sizes[i], 0, + srv_data_file_is_raw_partition[i] != 0)) { + return(DB_ERROR); + } + } + + return(DB_SUCCESS); +} + +/*********************************************************************//** +Create undo tablespace. +@return DB_SUCCESS or error code */ +static +dberr_t +srv_undo_tablespace_create( +/*=======================*/ + const char* name, /*!< in: tablespace name */ + ulint size) /*!< in: tablespace size in pages */ +{ + os_file_t fh; + ibool ret; + dberr_t err = DB_SUCCESS; + + os_file_create_subdirs_if_needed(name); + + fh = os_file_create( + innodb_file_data_key, + name, + srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE, + OS_FILE_NORMAL, OS_DATA_FILE, &ret); + + if (srv_read_only_mode && ret) { + ib_logf(IB_LOG_LEVEL_INFO, + "%s opened in read-only mode", name); + } else if (ret == FALSE) { + if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS +#ifdef UNIV_AIX + /* AIX 5.1 after security patch ML7 may have + errno set to 0 here, which causes our function + to return 100; work around that AIX problem */ + && os_file_get_last_error(false) != 100 +#endif /* UNIV_AIX */ + ) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Can't create UNDO tablespace %s", name); + } else { + ib_logf(IB_LOG_LEVEL_ERROR, + "Creating system tablespace with" + " existing undo tablespaces is not" + " supported. Please delete all undo" + " tablespaces before creating new" + " system tablespace."); + } + err = DB_ERROR; + } else { + ut_a(!srv_read_only_mode); + + /* We created the data file and now write it full of zeros */ + + ib_logf(IB_LOG_LEVEL_INFO, + "Data file %s did not exist: new to be created", + name); + + ib_logf(IB_LOG_LEVEL_INFO, + "Setting file %s size to %lu MB", + name, size >> (20 - UNIV_PAGE_SIZE_SHIFT)); + + ib_logf(IB_LOG_LEVEL_INFO, + "Database physically writes the file full: wait..."); + + ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT); + + if (!ret) { + ib_logf(IB_LOG_LEVEL_INFO, + "Error in creating %s: probably out of " + "disk space", name); + + err = DB_ERROR; + } + + os_file_close(fh); + } + + return(err); +} + +/*********************************************************************//** +Open an undo tablespace. +@return DB_SUCCESS or error code */ +static +dberr_t +srv_undo_tablespace_open( +/*=====================*/ + const char* name, /*!< in: tablespace name */ + ulint space) /*!< in: tablespace id */ +{ + os_file_t fh; + dberr_t err = DB_ERROR; + ibool ret; + ulint flags; + + if (!srv_file_check_mode(name)) { + ib_logf(IB_LOG_LEVEL_ERROR, + "UNDO tablespaces must be %s!", + srv_read_only_mode ? "writable" : "readable"); + + return(DB_ERROR); + } + + fh = os_file_create( + innodb_file_data_key, name, + OS_FILE_OPEN_RETRY + | OS_FILE_ON_ERROR_NO_EXIT + | OS_FILE_ON_ERROR_SILENT, + OS_FILE_NORMAL, + OS_DATA_FILE, + &ret); + + /* If the file open was successful then load the tablespace. */ + + if (ret) { + os_offset_t size; + + size = os_file_get_size(fh); + ut_a(size != (os_offset_t) -1); + + ret = os_file_close(fh); + ut_a(ret); + + /* Load the tablespace into InnoDB's internal + data structures. */ + + /* We set the biggest space id to the undo tablespace + because InnoDB hasn't opened any other tablespace apart + from the system tablespace. */ + + fil_set_max_space_id_if_bigger(space); + + /* Set the compressed page size to 0 (non-compressed) */ + flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE); + fil_space_create(name, space, flags, FIL_TABLESPACE); + + ut_a(fil_validate()); + + os_offset_t n_pages = size / UNIV_PAGE_SIZE; + + /* On 64 bit Windows ulint can be 32 bit and os_offset_t + is 64 bit. It is OK to cast the n_pages to ulint because + the unit has been scaled to pages and they are always + 32 bit. */ + if (fil_node_create(name, (ulint) n_pages, space, FALSE)) { + err = DB_SUCCESS; + } + } + + return(err); +} + +/******************************************************************** +Opens the configured number of undo tablespaces. +@return DB_SUCCESS or error code */ +static +dberr_t +srv_undo_tablespaces_init( +/*======================*/ + ibool create_new_db, /*!< in: TRUE if new db being + created */ + const ulint n_conf_tablespaces, /*!< in: configured undo + tablespaces */ + ulint* n_opened) /*!< out: number of UNDO + tablespaces successfully + discovered and opened */ +{ + ulint i; + dberr_t err = DB_SUCCESS; + ulint prev_space_id = 0; + ulint n_undo_tablespaces; + ulint undo_tablespace_ids[TRX_SYS_N_RSEGS + 1]; + + *n_opened = 0; + + ut_a(n_conf_tablespaces <= TRX_SYS_N_RSEGS); + + memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids)); + + /* Create the undo spaces only if we are creating a new + instance. We don't allow creating of new undo tablespaces + in an existing instance (yet). This restriction exists because + we check in several places for SYSTEM tablespaces to be less than + the min of user defined tablespace ids. Once we implement saving + the location of the undo tablespaces and their space ids this + restriction will/should be lifted. */ + + for (i = 0; create_new_db && i < n_conf_tablespaces; ++i) { + char name[OS_FILE_MAX_PATH]; + + ut_snprintf( + name, sizeof(name), + "%s%cundo%03lu", + srv_undo_dir, SRV_PATH_SEPARATOR, i + 1); + + /* Undo space ids start from 1. */ + err = srv_undo_tablespace_create( + name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES); + + if (err != DB_SUCCESS) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "Could not create undo tablespace '%s'.", + name); + + return(err); + } + } + + /* Get the tablespace ids of all the undo segments excluding + the system tablespace (0). If we are creating a new instance then + we build the undo_tablespace_ids ourselves since they don't + already exist. */ + + if (!create_new_db) { + n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces( + undo_tablespace_ids); + } else { + n_undo_tablespaces = n_conf_tablespaces; + + for (i = 1; i <= n_undo_tablespaces; ++i) { + undo_tablespace_ids[i - 1] = i; + } + + undo_tablespace_ids[i] = ULINT_UNDEFINED; + } + + /* Open all the undo tablespaces that are currently in use. If we + fail to open any of these it is a fatal error. The tablespace ids + should be contiguous. It is a fatal error because they are required + for recovery and are referenced by the UNDO logs (a.k.a RBS). */ + + for (i = 0; i < n_undo_tablespaces; ++i) { + char name[OS_FILE_MAX_PATH]; + + ut_snprintf( + name, sizeof(name), + "%s%cundo%03lu", + srv_undo_dir, SRV_PATH_SEPARATOR, + undo_tablespace_ids[i]); + + /* Should be no gaps in undo tablespace ids. */ + ut_a(prev_space_id + 1 == undo_tablespace_ids[i]); + + /* The system space id should not be in this array. */ + ut_a(undo_tablespace_ids[i] != 0); + ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED); + + /* Undo space ids start from 1. */ + + err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]); + + if (err != DB_SUCCESS) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "Unable to open undo tablespace '%s'.", name); + + return(err); + } + + prev_space_id = undo_tablespace_ids[i]; + + ++*n_opened; + } + + /* Open any extra unused undo tablespaces. These must be contiguous. + We stop at the first failure. These are undo tablespaces that are + not in use and therefore not required by recovery. We only check + that there are no gaps. */ + + for (i = prev_space_id + 1; i < TRX_SYS_N_RSEGS; ++i) { + char name[OS_FILE_MAX_PATH]; + + ut_snprintf( + name, sizeof(name), + "%s%cundo%03lu", srv_undo_dir, SRV_PATH_SEPARATOR, i); + + /* Undo space ids start from 1. */ + err = srv_undo_tablespace_open(name, i); + + if (err != DB_SUCCESS) { + break; + } + + ++n_undo_tablespaces; + + ++*n_opened; + } + + /* If the user says that there are fewer than what we find we + tolerate that discrepancy but not the inverse. Because there could + be unused undo tablespaces for future use. */ + + if (n_conf_tablespaces > n_undo_tablespaces) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Expected to open %lu undo " + "tablespaces but was able\n", + n_conf_tablespaces); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: to find only %lu undo " + "tablespaces.\n", n_undo_tablespaces); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Set the " + "innodb_undo_tablespaces parameter to " + "the\n"); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: correct value and retry. Suggested " + "value is %lu\n", n_undo_tablespaces); + + return(err != DB_SUCCESS ? err : DB_ERROR); + + } else if (n_undo_tablespaces > 0) { + + ib_logf(IB_LOG_LEVEL_INFO, "Opened %lu undo tablespaces", + n_undo_tablespaces); + + if (n_conf_tablespaces == 0) { + ib_logf(IB_LOG_LEVEL_WARN, + "Using the system tablespace for all UNDO " + "logging because innodb_undo_tablespaces=0"); + } + } + + if (create_new_db) { + mtr_t mtr; + + mtr_start(&mtr); + + /* The undo log tablespace */ + for (i = 1; i <= n_undo_tablespaces; ++i) { + + fsp_header_init( + i, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr); + } + + mtr_commit(&mtr); + } + + return(DB_SUCCESS); +} + +/******************************************************************** +Wait for the purge thread(s) to start up. */ +static +void +srv_start_wait_for_purge_to_start() +/*===============================*/ +{ + /* Wait for the purge coordinator and master thread to startup. */ + + purge_state_t state = trx_purge_state(); + + ut_a(state != PURGE_STATE_DISABLED); + + while (srv_shutdown_state == SRV_SHUTDOWN_NONE + && srv_force_recovery < SRV_FORCE_NO_BACKGROUND + && state == PURGE_STATE_INIT) { + + switch (state = trx_purge_state()) { + case PURGE_STATE_RUN: + case PURGE_STATE_STOP: + break; + + case PURGE_STATE_INIT: + ib_logf(IB_LOG_LEVEL_INFO, + "Waiting for purge to start"); + + os_thread_sleep(50000); + break; + + case PURGE_STATE_EXIT: + case PURGE_STATE_DISABLED: + ut_error; + } + } +} + +/*********************************************************************//** +Initializes the log tracking subsystem and starts its thread. */ +static +void +init_log_online(void) +/*=================*/ +{ + if (UNIV_UNLIKELY(srv_force_recovery > 0 || srv_read_only_mode)) { + srv_track_changed_pages = FALSE; + return; + } + + if (srv_track_changed_pages) { + + log_online_read_init(); + + /* Create the thread that follows the redo log to output the + changed page bitmap */ + os_thread_create(&srv_redo_log_follow_thread, NULL, + thread_ids + 5 + SRV_MAX_N_IO_THREADS); + } +} + +/******************************************************************** +Starts InnoDB and creates a new database if database files +are not found and the user wants. +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +innobase_start_or_create_for_mysql(void) +/*====================================*/ +{ + ibool create_new_db; + lsn_t min_flushed_lsn; + lsn_t max_flushed_lsn; +#ifdef UNIV_LOG_ARCHIVE + lsn_t min_arch_log_no = LSN_MAX; + lsn_t max_arch_log_no = LSN_MAX; +#endif /* UNIV_LOG_ARCHIVE */ + ulint sum_of_new_sizes; + ulint sum_of_data_file_sizes; + ulint tablespace_size_in_header; + dberr_t err; + unsigned i; + ulint srv_n_log_files_found = srv_n_log_files; + ulint io_limit; + mtr_t mtr; + ib_bh_t* ib_bh; + ulint n_recovered_trx; + char logfilename[10000]; + char* logfile0 = NULL; + size_t dirnamelen; + + if (srv_force_recovery > SRV_FORCE_NO_TRX_UNDO) { + srv_read_only_mode = true; + } + + if (srv_read_only_mode) { + ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode"); + } + +#ifdef HAVE_DARWIN_THREADS +# ifdef F_FULLFSYNC + /* This executable has been compiled on Mac OS X 10.3 or later. + Assume that F_FULLFSYNC is available at run-time. */ + srv_have_fullfsync = TRUE; +# else /* F_FULLFSYNC */ + /* This executable has been compiled on Mac OS X 10.2 + or earlier. Determine if the executable is running + on Mac OS X 10.3 or later. */ + struct utsname utsname; + if (uname(&utsname)) { + ut_print_timestamp(stderr); + fputs(" InnoDB: cannot determine Mac OS X version!\n", stderr); + } else { + srv_have_fullfsync = strcmp(utsname.release, "7.") >= 0; + } + if (!srv_have_fullfsync) { + ut_print_timestamp(stderr); + fputs(" InnoDB: On Mac OS X, fsync() may be " + "broken on internal drives,\n", stderr); + ut_print_timestamp(stderr); + fputs(" InnoDB: making transactions unsafe!\n", stderr); + } +# endif /* F_FULLFSYNC */ +#endif /* HAVE_DARWIN_THREADS */ + + ib_logf(IB_LOG_LEVEL_INFO, + "Using %s to ref count buffer pool pages", +#ifdef PAGE_ATOMIC_REF_COUNT + "atomics" +#else + "mutexes" +#endif /* PAGE_ATOMIC_REF_COUNT */ + ); + + + if (sizeof(ulint) != sizeof(void*)) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: size of InnoDB's ulint is %lu, " + "but size of void*\n", (ulong) sizeof(ulint)); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: is %lu. The sizes should be the same " + "so that on a 64-bit\n", + (ulong) sizeof(void*)); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: platforms you can allocate more than 4 GB " + "of memory.\n"); + } + +#ifdef UNIV_DEBUG + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!\n"); +#endif + +#ifdef UNIV_IBUF_DEBUG + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!\n"); +# ifdef UNIV_IBUF_COUNT_DEBUG + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: !!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on " + "!!!!!!!!!\n"); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG\n"); +# endif +#endif + +#ifdef UNIV_BLOB_DEBUG + fprintf(stderr, + "InnoDB: !!!!!!!! UNIV_BLOB_DEBUG switched on !!!!!!!!!\n" + "InnoDB: Server restart may fail with UNIV_BLOB_DEBUG\n"); +#endif /* UNIV_BLOB_DEBUG */ + +#ifdef UNIV_SYNC_DEBUG + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!\n"); +#endif + +#ifdef UNIV_SEARCH_DEBUG + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!\n"); +#endif + +#ifdef UNIV_LOG_LSN_DEBUG + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: !!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!\n"); +#endif /* UNIV_LOG_LSN_DEBUG */ +#ifdef UNIV_MEM_DEBUG + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!\n"); +#endif + + if (srv_use_sys_malloc) { + ib_logf(IB_LOG_LEVEL_INFO, + "The InnoDB memory heap is disabled"); + } + +#if defined(COMPILER_HINTS_ENABLED) + ib_logf(IB_LOG_LEVEL_INFO, + " InnoDB: Compiler hints enabled."); +#endif /* defined(COMPILER_HINTS_ENABLED) */ + + ib_logf(IB_LOG_LEVEL_INFO, + "" IB_ATOMICS_STARTUP_MSG ""); + + ib_logf(IB_LOG_LEVEL_INFO, + "" IB_MEMORY_BARRIER_STARTUP_MSG ""); + +#ifndef HAVE_MEMORY_BARRIER +#if defined __i386__ || defined __x86_64__ || defined _M_IX86 || defined _M_X64 || defined __WIN__ +#else + ib_logf(IB_LOG_LEVEL_WARN, + "MySQL was built without a memory barrier capability on this" + " architecture, which might allow a mutex/rw_lock violation" + " under high thread concurrency. This may cause a hang."); +#endif /* IA32 or AMD64 */ +#endif /* HAVE_MEMORY_BARRIER */ + + ib_logf(IB_LOG_LEVEL_INFO, + "Compressed tables use zlib " ZLIB_VERSION +#ifdef UNIV_ZIP_DEBUG + " with validation" +#endif /* UNIV_ZIP_DEBUG */ + ); +#ifdef UNIV_ZIP_COPY + ib_logf(IB_LOG_LEVEL_INFO, "and extra copying"); +#endif /* UNIV_ZIP_COPY */ + + + /* Since InnoDB does not currently clean up all its internal data + structures in MySQL Embedded Server Library server_end(), we + print an error message if someone tries to start up InnoDB a + second time during the process lifetime. */ + + if (srv_start_has_been_called) { + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: Error: startup called second time " + "during the process\n"); + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: lifetime. In the MySQL Embedded " + "Server Library you\n"); + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: cannot call server_init() more " + "than once during the\n"); + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: process lifetime.\n"); + } + + srv_start_has_been_called = TRUE; + +#ifdef UNIV_DEBUG + log_do_write = TRUE; +#endif /* UNIV_DEBUG */ + /* yydebug = TRUE; */ + + srv_is_being_started = TRUE; + srv_startup_is_before_trx_rollback_phase = TRUE; + +#ifdef __WIN__ + switch (os_get_os_version()) { + case OS_WIN95: + case OS_WIN31: + case OS_WINNT: + /* On Win 95, 98, ME, Win32 subsystem for Windows 3.1, + and NT use simulated aio. In NT Windows provides async i/o, + but when run in conjunction with InnoDB Hot Backup, it seemed + to corrupt the data files. */ + + srv_use_native_aio = FALSE; + break; + + case OS_WIN2000: + case OS_WINXP: + /* On 2000 and XP, async IO is available. */ + srv_use_native_aio = TRUE; + break; + + default: + /* Vista and later have both async IO and condition variables */ + srv_use_native_aio = TRUE; + srv_use_native_conditions = TRUE; + break; + } + +#elif defined(LINUX_NATIVE_AIO) + + if (srv_use_native_aio) { + ib_logf(IB_LOG_LEVEL_INFO, "Using Linux native AIO"); + } +#else + /* Currently native AIO is supported only on windows and linux + and that also when the support is compiled in. In all other + cases, we ignore the setting of innodb_use_native_aio. */ + srv_use_native_aio = FALSE; +#endif /* __WIN__ */ + + if (srv_file_flush_method_str == NULL) { + /* These are the default options */ + + srv_unix_file_flush_method = SRV_UNIX_FSYNC; + + srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; +#ifndef __WIN__ + } else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) { + srv_unix_file_flush_method = SRV_UNIX_FSYNC; + + } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) { + srv_unix_file_flush_method = SRV_UNIX_O_DSYNC; + + } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) { + srv_unix_file_flush_method = SRV_UNIX_O_DIRECT; + + } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) { + srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT; + + } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT_NO_FSYNC")) { + srv_unix_file_flush_method = SRV_UNIX_O_DIRECT_NO_FSYNC; + + } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) { + srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC; + + } else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) { + srv_unix_file_flush_method = SRV_UNIX_NOSYNC; +#else + } else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) { + srv_win_file_flush_method = SRV_WIN_IO_NORMAL; + srv_use_native_aio = FALSE; + + } else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) { + srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; + srv_use_native_aio = FALSE; + + } else if (0 == ut_strcmp(srv_file_flush_method_str, + "async_unbuffered")) { + srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; +#endif /* __WIN__ */ + } else { + ib_logf(IB_LOG_LEVEL_ERROR, + "Unrecognized value %s for innodb_flush_method", + srv_file_flush_method_str); + return(DB_ERROR); + } + + /* Note that the call srv_boot() also changes the values of + some variables to the units used by InnoDB internally */ + + /* Set the maximum number of threads which can wait for a semaphore + inside InnoDB: this is the 'sync wait array' size, as well as the + maximum number of threads that can wait in the 'srv_conc array' for + their time to enter InnoDB. */ + +#define BUF_POOL_SIZE_THRESHOLD (1024 * 1024 * 1024) + srv_max_n_threads = 1 /* io_ibuf_thread */ + + 1 /* io_log_thread */ + + 1 /* lock_wait_timeout_thread */ + + 1 /* srv_error_monitor_thread */ + + 1 /* srv_monitor_thread */ + + 1 /* srv_master_thread */ + + 1 /* srv_redo_log_follow_thread */ + + 1 /* srv_purge_coordinator_thread */ + + 1 /* buf_dump_thread */ + + 1 /* dict_stats_thread */ + + 1 /* fts_optimize_thread */ + + 1 /* recv_writer_thread */ + + 1 /* buf_flush_page_cleaner_thread */ + + 1 /* trx_rollback_or_clean_all_recovered */ + + 128 /* added as margin, for use of + InnoDB Memcached etc. */ + + max_connections + + srv_n_read_io_threads + + srv_n_write_io_threads + + srv_n_purge_threads + /* FTS Parallel Sort */ + + fts_sort_pll_degree * FTS_NUM_AUX_INDEX + * max_connections; + + if (srv_buf_pool_size < BUF_POOL_SIZE_THRESHOLD) { + /* If buffer pool is less than 1 GB, + use only one buffer pool instance */ + srv_buf_pool_instances = 1; + } + + srv_boot(); + + ib_logf(IB_LOG_LEVEL_INFO, + "%s CPU crc32 instructions", + ut_crc32_sse2_enabled ? "Using" : "Not using"); + + if (!srv_read_only_mode) { + + mutex_create(srv_monitor_file_mutex_key, + &srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK); + + if (srv_innodb_status) { + + srv_monitor_file_name = static_cast<char*>( + mem_alloc( + strlen(fil_path_to_mysql_datadir) + + 20 + sizeof "/innodb_status.")); + + sprintf(srv_monitor_file_name, "%s/innodb_status.%lu", + fil_path_to_mysql_datadir, + os_proc_get_number()); + + srv_monitor_file = fopen(srv_monitor_file_name, "w+"); + + if (!srv_monitor_file) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "Unable to create %s: %s", + srv_monitor_file_name, + strerror(errno)); + + return(DB_ERROR); + } + } else { + srv_monitor_file_name = NULL; + srv_monitor_file = os_file_create_tmpfile(); + + if (!srv_monitor_file) { + return(DB_ERROR); + } + } + + mutex_create(srv_dict_tmpfile_mutex_key, + &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION); + + srv_dict_tmpfile = os_file_create_tmpfile(); + + if (!srv_dict_tmpfile) { + return(DB_ERROR); + } + + mutex_create(srv_misc_tmpfile_mutex_key, + &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH); + + srv_misc_tmpfile = os_file_create_tmpfile(); + + if (!srv_misc_tmpfile) { + return(DB_ERROR); + } + } + + /* If user has set the value of innodb_file_io_threads then + we'll emit a message telling the user that this parameter + is now deprecated. */ + if (srv_n_file_io_threads != 4) { + ib_logf(IB_LOG_LEVEL_WARN, + "innodb_file_io_threads is deprecated. Please use " + "innodb_read_io_threads and innodb_write_io_threads " + "instead"); + } + + /* Now overwrite the value on srv_n_file_io_threads */ + srv_n_file_io_threads = srv_n_read_io_threads; + + if (!srv_read_only_mode) { + /* Add the log and ibuf IO threads. */ + srv_n_file_io_threads += 2; + srv_n_file_io_threads += srv_n_write_io_threads; + } else { + ib_logf(IB_LOG_LEVEL_INFO, + "Disabling background IO write threads."); + + srv_n_write_io_threads = 0; + } + + ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS); + + io_limit = 8 * SRV_N_PENDING_IOS_PER_THREAD; + + /* On Windows when using native aio the number of aio requests + that a thread can handle at a given time is limited to 32 + i.e.: SRV_N_PENDING_IOS_PER_THREAD */ +# ifdef __WIN__ + if (srv_use_native_aio) { + io_limit = SRV_N_PENDING_IOS_PER_THREAD; + } +# endif /* __WIN__ */ + + if (!os_aio_init(io_limit, + srv_n_read_io_threads, + srv_n_write_io_threads, + SRV_MAX_N_PENDING_SYNC_IOS)) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "Fatal : Cannot initialize AIO sub-system"); + + return(DB_ERROR); + } + + fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files); + + double size; + char unit; + + if (srv_buf_pool_size >= 1024 * 1024 * 1024) { + size = ((double) srv_buf_pool_size) / (1024 * 1024 * 1024); + unit = 'G'; + } else { + size = ((double) srv_buf_pool_size) / (1024 * 1024); + unit = 'M'; + } + + /* Print time to initialize the buffer pool */ + ib_logf(IB_LOG_LEVEL_INFO, + "Initializing buffer pool, size = %.1f%c", size, unit); + + err = buf_pool_init(srv_buf_pool_size, (ibool) srv_buf_pool_populate, + srv_buf_pool_instances); + + if (err != DB_SUCCESS) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Cannot allocate memory for the buffer pool"); + + return(DB_ERROR); + } + + ib_logf(IB_LOG_LEVEL_INFO, + "Completed initialization of buffer pool"); + +#ifdef UNIV_DEBUG + /* We have observed deadlocks with a 5MB buffer pool but + the actual lower limit could very well be a little higher. */ + + if (srv_buf_pool_size <= 5 * 1024 * 1024) { + + ib_logf(IB_LOG_LEVEL_INFO, + "Small buffer pool size (%luM), the flst_validate() " + "debug function can cause a deadlock if the " + "buffer pool fills up.", + srv_buf_pool_size / 1024 / 1024); + } +#endif /* UNIV_DEBUG */ + + fsp_init(); + log_init(); + + lock_sys_create(srv_lock_table_size); + + /* Create i/o-handler threads: */ + + for (i = 0; i < srv_n_file_io_threads; ++i) { + + n[i] = i; + + os_thread_create(io_handler_thread, n + i, thread_ids + i); + } + + if (srv_n_log_files * srv_log_file_size * UNIV_PAGE_SIZE + >= 512ULL * 1024ULL * 1024ULL * 1024ULL) { + /* log_block_convert_lsn_to_no() limits the returned block + number to 1G and given that OS_FILE_LOG_BLOCK_SIZE is 512 + bytes, then we have a limit of 512 GB. If that limit is to + be raised, then log_block_convert_lsn_to_no() must be + modified. */ + ib_logf(IB_LOG_LEVEL_ERROR, + "Combined size of log files must be < 512 GB"); + + return(DB_ERROR); + } + + if (srv_n_log_files * srv_log_file_size >= ULINT_MAX) { + /* fil_io() takes ulint as an argument and we are passing + (next_offset / UNIV_PAGE_SIZE) to it in log_group_write_buf(). + So (next_offset / UNIV_PAGE_SIZE) must be less than ULINT_MAX. + So next_offset must be < ULINT_MAX * UNIV_PAGE_SIZE. This + means that we are limited to ULINT_MAX * UNIV_PAGE_SIZE which + is 64 TB on 32 bit systems. */ + fprintf(stderr, + " InnoDB: Error: combined size of log files" + " must be < %lu GB\n", + ULINT_MAX / 1073741824 * UNIV_PAGE_SIZE); + + return(DB_ERROR); + } + + sum_of_new_sizes = 0; + + for (i = 0; i < srv_n_data_files; i++) { +#ifndef __WIN__ + if (sizeof(off_t) < 5 + && srv_data_file_sizes[i] + >= (ulint) (1 << (32 - UNIV_PAGE_SIZE_SHIFT))) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: file size must be < 4 GB" + " with this MySQL binary\n"); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: and operating system combination," + " in some OS's < 2 GB\n"); + + return(DB_ERROR); + } +#endif + sum_of_new_sizes += srv_data_file_sizes[i]; + } + + if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Tablespace size must be at least 10 MB"); + + return(DB_ERROR); + } + + recv_sys_create(); + recv_sys_init(buf_pool_get_curr_size()); + + err = open_or_create_data_files(&create_new_db, +#ifdef UNIV_LOG_ARCHIVE + &min_arch_log_no, &max_arch_log_no, +#endif /* UNIV_LOG_ARCHIVE */ + &min_flushed_lsn, &max_flushed_lsn, + &sum_of_new_sizes); + if (err == DB_FAIL) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "The system tablespace must be writable!"); + + return(DB_ERROR); + + } else if (err != DB_SUCCESS) { + + ib_logf(IB_LOG_LEVEL_ERROR, + "Could not open or create the system tablespace. If " + "you tried to add new data files to the system " + "tablespace, and it failed here, you should now " + "edit innodb_data_file_path in my.cnf back to what " + "it was, and remove the new ibdata files InnoDB " + "created in this failed attempt. InnoDB only wrote " + "those files full of zeros, but did not yet use " + "them in any way. But be careful: do not remove " + "old data files which contain your precious data!"); + + return(err); + } + +#ifdef UNIV_LOG_ARCHIVE + srv_normalize_path_for_win(srv_arch_dir); +#endif /* UNIV_LOG_ARCHIVE */ + + dirnamelen = strlen(srv_log_group_home_dir); + ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile"); + memcpy(logfilename, srv_log_group_home_dir, dirnamelen); + + /* Add a path separator if needed. */ + if (dirnamelen && logfilename[dirnamelen - 1] != SRV_PATH_SEPARATOR) { + logfilename[dirnamelen++] = SRV_PATH_SEPARATOR; + } + + srv_log_file_size_requested = srv_log_file_size; + + if (create_new_db) { + bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL); + ut_a(success); + + min_flushed_lsn = max_flushed_lsn = log_get_lsn(); + + buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); + + err = create_log_files(create_new_db, logfilename, dirnamelen, + max_flushed_lsn, logfile0); + + if (err != DB_SUCCESS) { + return(err); + } + } else { + for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) { + os_offset_t size; + os_file_stat_t stat_info; + + sprintf(logfilename + dirnamelen, + "ib_logfile%u", i); + + err = os_file_get_status( + logfilename, &stat_info, false); + + if (err == DB_NOT_FOUND) { + if (i == 0) { + if (max_flushed_lsn + != min_flushed_lsn) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Cannot create" + " log files because" + " data files are" + " corrupt or" + " not in sync" + " with each other"); + return(DB_ERROR); + } + + if (max_flushed_lsn < (lsn_t) 1000) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Cannot create" + " log files because" + " data files are" + " corrupt or the" + " database was not" + " shut down cleanly" + " after creating" + " the data files."); + return(DB_ERROR); + } + + err = create_log_files( + create_new_db, logfilename, + dirnamelen, max_flushed_lsn, + logfile0); + + if (err != DB_SUCCESS) { + return(err); + } + + create_log_files_rename( + logfilename, dirnamelen, + max_flushed_lsn, logfile0); + + /* Suppress the message about + crash recovery. */ + max_flushed_lsn = min_flushed_lsn + = log_get_lsn(); + goto files_checked; + } else if (i < 2) { + /* must have at least 2 log files */ + ib_logf(IB_LOG_LEVEL_ERROR, + "Only one log file found."); + return(err); + } + + /* opened all files */ + break; + } + + if (!srv_file_check_mode(logfilename)) { + return(DB_ERROR); + } + + err = open_log_file(&files[i], logfilename, &size); + + if (err != DB_SUCCESS) { + return(err); + } + + ut_a(size != (os_offset_t) -1); + + if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Log file %s size " + UINT64PF " is not a multiple of" + " innodb_page_size", + logfilename, size); + return(DB_ERROR); + } + + size >>= UNIV_PAGE_SIZE_SHIFT; + + if (i == 0) { + srv_log_file_size = size; + } else if (size != srv_log_file_size) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Log file %s is" + " of different size " UINT64PF " bytes" + " than other log" + " files " UINT64PF " bytes!", + logfilename, + size << UNIV_PAGE_SIZE_SHIFT, + (os_offset_t) srv_log_file_size + << UNIV_PAGE_SIZE_SHIFT); + return(DB_ERROR); + } + } + + srv_n_log_files_found = i; + + /* Create the in-memory file space objects. */ + + sprintf(logfilename + dirnamelen, "ib_logfile%u", 0); + + fil_space_create(logfilename, + SRV_LOG_SPACE_FIRST_ID, + fsp_flags_set_page_size(0, UNIV_PAGE_SIZE), + FIL_LOG); + + ut_a(fil_validate()); + + /* srv_log_file_size is measured in pages; if page size is 16KB, + then we have a limit of 64TB on 32 bit systems */ + ut_a(srv_log_file_size <= ULINT_MAX); + + for (unsigned j = 0; j < i; j++) { + sprintf(logfilename + dirnamelen, "ib_logfile%u", j); + + if (!fil_node_create(logfilename, + (ulint) srv_log_file_size, + SRV_LOG_SPACE_FIRST_ID, FALSE)) { + return(DB_ERROR); + } + } + +#ifdef UNIV_LOG_ARCHIVE + /* Create the file space object for archived logs. Under + MySQL, no archiving ever done. */ + fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1, + 0, FIL_LOG); +#endif /* UNIV_LOG_ARCHIVE */ + log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE, + SRV_LOG_SPACE_FIRST_ID, + SRV_LOG_SPACE_FIRST_ID + 1); + } + +files_checked: + /* Open all log files and data files in the system + tablespace: we keep them open until database + shutdown */ + + fil_open_log_and_system_tablespace_files(); + + err = srv_undo_tablespaces_init( + create_new_db, + srv_undo_tablespaces, + &srv_undo_tablespaces_open); + + /* If the force recovery is set very high then we carry on regardless + of all errors. Basically this is fingers crossed mode. */ + + if (err != DB_SUCCESS + && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) { + + return(err); + } + + /* Initialize objects used by dict stats gathering thread, which + can also be used by recovery if it tries to drop some table */ + if (!srv_read_only_mode) { + dict_stats_thread_init(); + } + + trx_sys_file_format_init(); + + trx_sys_create(); + + if (create_new_db) { + + ut_a(!srv_read_only_mode); + init_log_online(); + + mtr_start(&mtr); + + fsp_header_init(0, sum_of_new_sizes, &mtr); + + mtr_commit(&mtr); + + /* To maintain backward compatibility we create only + the first rollback segment before the double write buffer. + All the remaining rollback segments will be created later, + after the double write buffer has been created. */ + trx_sys_create_sys_pages(); + + ib_bh = trx_sys_init_at_db_start(); + n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list); + + /* The purge system needs to create the purge view and + therefore requires that the trx_sys is inited. */ + + trx_purge_sys_create(srv_n_purge_threads, ib_bh); + + err = dict_create(); + + if (err != DB_SUCCESS) { + return(err); + } + + srv_startup_is_before_trx_rollback_phase = FALSE; + + bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL); + ut_a(success); + + min_flushed_lsn = max_flushed_lsn = log_get_lsn(); + + buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); + + /* Stamp the LSN to the data files. */ + fil_write_flushed_lsn_to_data_files(max_flushed_lsn, 0); + + fil_flush_file_spaces(FIL_TABLESPACE); + + create_log_files_rename(logfilename, dirnamelen, + max_flushed_lsn, logfile0); +#ifdef UNIV_LOG_ARCHIVE + } else if (srv_archive_recovery) { + + ib_logf(IB_LOG_LEVEL_INFO, + " Starting archive recovery from a backup..."); + + err = recv_recovery_from_archive_start( + min_flushed_lsn, srv_archive_recovery_limit_lsn, + min_arch_log_no); + if (err != DB_SUCCESS) { + + return(DB_ERROR); + } + /* Since ibuf init is in dict_boot, and ibuf is needed + in any disk i/o, first call dict_boot */ + + err = dict_boot(); + + if (err != DB_SUCCESS) { + return(err); + } + + ib_bh = trx_sys_init_at_db_start(); + n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list); + + /* The purge system needs to create the purge view and + therefore requires that the trx_sys is inited. */ + + trx_purge_sys_create(srv_n_purge_threads, ib_bh); + + srv_startup_is_before_trx_rollback_phase = FALSE; + + recv_recovery_from_archive_finish(); +#endif /* UNIV_LOG_ARCHIVE */ + } else { + + /* Check if we support the max format that is stamped + on the system tablespace. + Note: We are NOT allowed to make any modifications to + the TRX_SYS_PAGE_NO page before recovery because this + page also contains the max_trx_id etc. important system + variables that are required for recovery. We need to + ensure that we return the system to a state where normal + recovery is guaranteed to work. We do this by + invalidating the buffer cache, this will force the + reread of the page and restoration to its last known + consistent state, this is REQUIRED for the recovery + process to work. */ + err = trx_sys_file_format_max_check( + srv_max_file_format_at_startup); + + if (err != DB_SUCCESS) { + return(err); + } + + /* Invalidate the buffer pool to ensure that we reread + the page that we read above, during recovery. + Note that this is not as heavy weight as it seems. At + this point there will be only ONE page in the buf_LRU + and there must be no page in the buf_flush list. */ + buf_pool_invalidate(); + + /* We always try to do a recovery, even if the database had + been shut down normally: this is the normal startup path */ + + err = recv_recovery_from_checkpoint_start( + LOG_CHECKPOINT, LSN_MAX, + min_flushed_lsn, max_flushed_lsn); + + if (err != DB_SUCCESS) { + + return(DB_ERROR); + } + + init_log_online(); + + /* Since the insert buffer init is in dict_boot, and the + insert buffer is needed in any disk i/o, first we call + dict_boot(). Note that trx_sys_init_at_db_start() only needs + to access space 0, and the insert buffer at this stage already + works for space 0. */ + + err = dict_boot(); + + if (err != DB_SUCCESS) { + return(err); + } + + ib_bh = trx_sys_init_at_db_start(); + n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list); + + /* The purge system needs to create the purge view and + therefore requires that the trx_sys is inited. */ + + trx_purge_sys_create(srv_n_purge_threads, ib_bh); + + /* recv_recovery_from_checkpoint_finish needs trx lists which + are initialized in trx_sys_init_at_db_start(). */ + + recv_recovery_from_checkpoint_finish(); + + if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) { + /* The following call is necessary for the insert + buffer to work with multiple tablespaces. We must + know the mapping between space id's and .ibd file + names. + + In a crash recovery, we check that the info in data + dictionary is consistent with what we already know + about space id's from the call of + fil_load_single_table_tablespaces(). + + In a normal startup, we create the space objects for + every table in the InnoDB data dictionary that has + an .ibd file. + + We also determine the maximum tablespace id used. */ + dict_check_t dict_check; + + if (recv_needed_recovery) { + dict_check = DICT_CHECK_ALL_LOADED; + } else if (n_recovered_trx) { + dict_check = DICT_CHECK_SOME_LOADED; + } else { + dict_check = DICT_CHECK_NONE_LOADED; + } + + dict_check_tablespaces_and_store_max_id(dict_check); + } + + if (!srv_force_recovery + && !recv_sys->found_corrupt_log + && (srv_log_file_size_requested != srv_log_file_size + || srv_n_log_files_found != srv_n_log_files)) { + /* Prepare to replace the redo log files. */ + + if (srv_read_only_mode) { + ib_logf(IB_LOG_LEVEL_ERROR, + "Cannot resize log files " + "in read-only mode."); + return(DB_READ_ONLY); + } + + /* Clean the buffer pool. */ + bool success = buf_flush_list( + ULINT_MAX, LSN_MAX, NULL); + ut_a(success); + + RECOVERY_CRASH(1); + + min_flushed_lsn = max_flushed_lsn = log_get_lsn(); + + ib_logf(IB_LOG_LEVEL_WARN, + "Resizing redo log from %u*%u to %u*%u pages" + ", LSN=" LSN_PF, + (unsigned) i, + (unsigned) srv_log_file_size, + (unsigned) srv_n_log_files, + (unsigned) srv_log_file_size_requested, + max_flushed_lsn); + + buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST); + + RECOVERY_CRASH(2); + + /* Flush the old log files. */ + log_buffer_flush_to_disk(); + /* If innodb_flush_method=O_DSYNC, + we need to explicitly flush the log buffers. */ + fil_flush(SRV_LOG_SPACE_FIRST_ID); + + ut_ad(max_flushed_lsn == log_get_lsn()); + + /* Prohibit redo log writes from any other + threads until creating a log checkpoint at the + end of create_log_files(). */ + ut_d(recv_no_log_write = TRUE); + ut_ad(!buf_pool_check_no_pending_io()); + + RECOVERY_CRASH(3); + + /* Stamp the LSN to the data files. */ + fil_write_flushed_lsn_to_data_files( + max_flushed_lsn, 0); + + fil_flush_file_spaces(FIL_TABLESPACE); + + RECOVERY_CRASH(4); + + /* Close and free the redo log files, so that + we can replace them. */ + fil_close_log_files(true); + + RECOVERY_CRASH(5); + + /* Free the old log file space. */ + log_group_close_all(); + + ib_logf(IB_LOG_LEVEL_WARN, + "Starting to delete and rewrite log files."); + + srv_log_file_size = srv_log_file_size_requested; + + err = create_log_files(create_new_db, logfilename, + dirnamelen, max_flushed_lsn, + logfile0); + + if (err != DB_SUCCESS) { + return(err); + } + + /* create_log_files() can increase system lsn that is + why FIL_PAGE_FILE_FLUSH_LSN have to be updated */ + min_flushed_lsn = max_flushed_lsn = log_get_lsn(); + fil_write_flushed_lsn_to_data_files(min_flushed_lsn, 0); + fil_flush_file_spaces(FIL_TABLESPACE); + + create_log_files_rename(logfilename, dirnamelen, + log_get_lsn(), logfile0); + } + + srv_startup_is_before_trx_rollback_phase = FALSE; + recv_recovery_rollback_active(); + + /* It is possible that file_format tag has never + been set. In this case we initialize it to minimum + value. Important to note that we can do it ONLY after + we have finished the recovery process so that the + image of TRX_SYS_PAGE_NO is not stale. */ + trx_sys_file_format_tag_init(); + } + + if (!create_new_db && sum_of_new_sizes > 0) { + /* New data file(s) were added */ + mtr_start(&mtr); + + fsp_header_inc_size(0, sum_of_new_sizes, &mtr); + + mtr_commit(&mtr); + + /* Immediately write the log record about increased tablespace + size to disk, so that it is durable even if mysqld would crash + quickly */ + + log_buffer_flush_to_disk(); + } + +#ifdef UNIV_LOG_ARCHIVE + /* Archiving is always off under MySQL */ + if (!srv_log_archive_on) { + ut_a(DB_SUCCESS == log_archive_noarchivelog()); + } else { + bool start_archive; + + mutex_enter(&(log_sys->mutex)); + + start_archive = FALSE; + + if (log_sys->archiving_state == LOG_ARCH_OFF) { + start_archive = TRUE; + } + + mutex_exit(&(log_sys->mutex)); + + if (start_archive) { + ut_a(DB_SUCCESS == log_archive_archivelog()); + } + } +#endif /* UNIV_LOG_ARCHIVE */ + + /* fprintf(stderr, "Max allowed record size %lu\n", + page_get_free_space_of_empty() / 2); */ + + if (buf_dblwr == NULL) { + /* Create the doublewrite buffer to a new tablespace */ + + buf_dblwr_create(); + } + + /* Here the double write buffer has already been created and so + any new rollback segments will be allocated after the double + write buffer. The default segment should already exist. + We create the new segments only if it's a new database or + the database was shutdown cleanly. */ + + /* Note: When creating the extra rollback segments during an upgrade + we violate the latching order, even if the change buffer is empty. + We make an exception in sync0sync.cc and check srv_is_being_started + for that violation. It cannot create a deadlock because we are still + running in single threaded mode essentially. Only the IO threads + should be running at this stage. */ + + ut_a(srv_undo_logs > 0); + ut_a(srv_undo_logs <= TRX_SYS_N_RSEGS); + + /* The number of rsegs that exist in InnoDB is given by status + variable srv_available_undo_logs. The number of rsegs to use can + be set using the dynamic global variable srv_undo_logs. */ + + srv_available_undo_logs = trx_sys_create_rsegs( + srv_undo_tablespaces, srv_undo_logs); + + if (srv_available_undo_logs == ULINT_UNDEFINED) { + /* Can only happen if server is read only. */ + ut_a(srv_read_only_mode); + srv_undo_logs = ULONG_UNDEFINED; + } + + if (!srv_read_only_mode) { + /* Create the thread which watches the timeouts + for lock waits */ + os_thread_create( + lock_wait_timeout_thread, + NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS); + + /* Create the thread which warns of long semaphore waits */ + os_thread_create( + srv_error_monitor_thread, + NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS); + + /* Create the thread which prints InnoDB monitor info */ + os_thread_create( + srv_monitor_thread, + NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS); + } + + /* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */ + err = dict_create_or_check_foreign_constraint_tables(); + if (err != DB_SUCCESS) { + return(err); + } + + /* Create the SYS_TABLESPACES system table */ + err = dict_create_or_check_sys_tablespace(); + if (err != DB_SUCCESS) { + return(err); + } + + srv_is_being_started = FALSE; + + ut_a(trx_purge_state() == PURGE_STATE_INIT); + + /* Create the master thread which does purge and other utility + operations */ + + if (!srv_read_only_mode) { + + os_thread_create( + srv_master_thread, + NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS)); + } + + if (!srv_read_only_mode + && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) { + + os_thread_create( + srv_purge_coordinator_thread, + NULL, thread_ids + 6 + SRV_MAX_N_IO_THREADS); + + ut_a(UT_ARR_SIZE(thread_ids) + > 6 + srv_n_purge_threads + SRV_MAX_N_IO_THREADS); + + /* We've already created the purge coordinator thread above. */ + for (i = 1; i < srv_n_purge_threads; ++i) { + os_thread_create( + srv_worker_thread, NULL, + thread_ids + 6 + i + SRV_MAX_N_IO_THREADS); + } + + srv_start_wait_for_purge_to_start(); + + } else { + purge_sys->state = PURGE_STATE_DISABLED; + } + + if (!srv_read_only_mode) { + os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL); + } + os_thread_create(buf_flush_lru_manager_thread, NULL, NULL); + +#ifdef UNIV_DEBUG + /* buf_debug_prints = TRUE; */ +#endif /* UNIV_DEBUG */ + sum_of_data_file_sizes = 0; + + for (i = 0; i < srv_n_data_files; i++) { + sum_of_data_file_sizes += srv_data_file_sizes[i]; + } + + tablespace_size_in_header = fsp_header_get_tablespace_size(); + + if (!srv_read_only_mode + && !srv_auto_extend_last_data_file + && sum_of_data_file_sizes != tablespace_size_in_header) { + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: tablespace size" + " stored in header is %lu pages, but\n", + (ulong) tablespace_size_in_header); + ut_print_timestamp(stderr); + fprintf(stderr, + "InnoDB: the sum of data file sizes is %lu pages\n", + (ulong) sum_of_data_file_sizes); + + if (srv_force_recovery == 0 + && sum_of_data_file_sizes < tablespace_size_in_header) { + /* This is a fatal error, the tail of a tablespace is + missing */ + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Cannot start InnoDB." + " The tail of the system tablespace is\n"); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: missing. Have you edited" + " innodb_data_file_path in my.cnf in an\n"); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: inappropriate way, removing" + " ibdata files from there?\n"); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: You can set innodb_force_recovery=1" + " in my.cnf to force\n"); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: a startup if you are trying" + " to recover a badly corrupt database.\n"); + + return(DB_ERROR); + } + } + + if (!srv_read_only_mode + && srv_auto_extend_last_data_file + && sum_of_data_file_sizes < tablespace_size_in_header) { + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: tablespace size stored in header" + " is %lu pages, but\n", + (ulong) tablespace_size_in_header); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: the sum of data file sizes" + " is only %lu pages\n", + (ulong) sum_of_data_file_sizes); + + if (srv_force_recovery == 0) { + + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Cannot start InnoDB. The tail of" + " the system tablespace is\n"); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: missing. Have you edited" + " innodb_data_file_path in my.cnf in an\n"); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: inappropriate way, removing" + " ibdata files from there?\n"); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: You can set innodb_force_recovery=1" + " in my.cnf to force\n"); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: a startup if you are trying to" + " recover a badly corrupt database.\n"); + + return(DB_ERROR); + } + } + + /* Check that os_fast_mutexes work as expected */ + os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &srv_os_test_mutex); + + if (0 != os_fast_mutex_trylock(&srv_os_test_mutex)) { + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Error: pthread_mutex_trylock returns" + " an unexpected value on\n"); + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: success! Cannot continue.\n"); + exit(1); + } + + os_fast_mutex_unlock(&srv_os_test_mutex); + + os_fast_mutex_lock(&srv_os_test_mutex); + + os_fast_mutex_unlock(&srv_os_test_mutex); + + os_fast_mutex_free(&srv_os_test_mutex); + + if (!srv_file_per_table && srv_pass_corrupt_table) { + fprintf(stderr, "InnoDB: Warning:" + " The option innodb_file_per_table is disabled," + " so using the option innodb_pass_corrupt_table doesn't make sense.\n"); + } + + if (srv_print_verbose_log) { + ib_logf(IB_LOG_LEVEL_INFO, + " Percona XtraDB (http://www.percona.com) %s started; " + "log sequence number " LSN_PF "", + INNODB_VERSION_STR, srv_start_lsn); + } + + if (srv_force_recovery > 0) { + ib_logf(IB_LOG_LEVEL_INFO, + "!!! innodb_force_recovery is set to %lu !!!", + (ulong) srv_force_recovery); + } + + if (srv_force_recovery == 0) { + /* In the insert buffer we may have even bigger tablespace + id's, because we may have dropped those tablespaces, but + insert buffer merge has not had time to clean the records from + the ibuf tree. */ + + ibuf_update_max_tablespace_id(); + } + + if (!srv_read_only_mode) { + /* Create the buffer pool dump/load thread */ + os_thread_create(buf_dump_thread, NULL, NULL); + + /* Create the dict stats gathering thread */ + os_thread_create(dict_stats_thread, NULL, NULL); + + /* Create the thread that will optimize the FTS sub-system. */ + fts_optimize_init(); + } + + srv_was_started = TRUE; + + return(DB_SUCCESS); +} + +#if 0 +/******************************************************************** +Sync all FTS cache before shutdown */ +static +void +srv_fts_close(void) +/*===============*/ +{ + dict_table_t* table; + + for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU); + table; table = UT_LIST_GET_NEXT(table_LRU, table)) { + fts_t* fts = table->fts; + + if (fts != NULL) { + fts_sync_table(table); + } + } + + for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU); + table; table = UT_LIST_GET_NEXT(table_LRU, table)) { + fts_t* fts = table->fts; + + if (fts != NULL) { + fts_sync_table(table); + } + } +} +#endif + +/****************************************************************//** +Shuts down the InnoDB database. +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +innobase_shutdown_for_mysql(void) +/*=============================*/ +{ + ulint i; + + if (!srv_was_started) { + if (srv_is_being_started) { + ib_logf(IB_LOG_LEVEL_WARN, + "Shutting down an improperly started, " + "or created database!"); + } + + return(DB_SUCCESS); + } + + if (!srv_read_only_mode) { + /* Shutdown the FTS optimize sub system. */ + fts_optimize_start_shutdown(); + + fts_optimize_end(); + } + + /* 1. Flush the buffer pool to disk, write the current lsn to + the tablespace header(s), and copy all log data to archive. + The step 1 is the real InnoDB shutdown. The remaining steps 2 - ... + just free data structures after the shutdown. */ + + logs_empty_and_mark_files_at_shutdown(); + + if (srv_conc_get_active_threads() != 0) { + ib_logf(IB_LOG_LEVEL_WARN, + "Query counter shows %ld queries still " + "inside InnoDB at shutdown", + srv_conc_get_active_threads()); + } + + /* 2. Make all threads created by InnoDB to exit */ + + srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS; + + /* All threads end up waiting for certain events. Put those events + to the signaled state. Then the threads will exit themselves after + os_event_wait(). */ + + for (i = 0; i < 1000; i++) { + /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM + HERE OR EARLIER */ + + if (!srv_read_only_mode) { + /* a. Let the lock timeout thread exit */ + os_event_set(lock_sys->timeout_event); + + /* b. srv error monitor thread exits automatically, + no need to do anything here */ + + /* c. We wake the master thread so that it exits */ + srv_wake_master_thread(); + + /* d. Wakeup purge threads. */ + srv_purge_wakeup(); + } + + /* e. Exit the i/o threads */ + + os_aio_wake_all_threads_at_shutdown(); + + /* f. dict_stats_thread is signaled from + logs_empty_and_mark_files_at_shutdown() and should have + already quit or is quitting right now. */ + + os_mutex_enter(os_sync_mutex); + + if (os_thread_count == 0) { + /* All the threads have exited or are just exiting; + NOTE that the threads may not have completed their + exit yet. Should we use pthread_join() to make sure + they have exited? If we did, we would have to + remove the pthread_detach() from + os_thread_exit(). Now we just sleep 0.1 + seconds and hope that is enough! */ + + os_mutex_exit(os_sync_mutex); + + os_thread_sleep(100000); + + break; + } + + os_mutex_exit(os_sync_mutex); + + os_thread_sleep(100000); + } + + if (i == 1000) { + ib_logf(IB_LOG_LEVEL_WARN, + "%lu threads created by InnoDB" + " had not exited at shutdown!", + (ulong) os_thread_count); + } + + if (srv_monitor_file) { + fclose(srv_monitor_file); + srv_monitor_file = 0; + if (srv_monitor_file_name) { + unlink(srv_monitor_file_name); + mem_free(srv_monitor_file_name); + } + } + + if (srv_dict_tmpfile) { + fclose(srv_dict_tmpfile); + srv_dict_tmpfile = 0; + } + + if (srv_misc_tmpfile) { + fclose(srv_misc_tmpfile); + srv_misc_tmpfile = 0; + } + + if (!srv_read_only_mode) { + dict_stats_thread_deinit(); + } + + /* This must be disabled before closing the buffer pool + and closing the data dictionary. */ + btr_search_disable(); + + ibuf_close(); + log_shutdown(); + lock_sys_close(); + trx_sys_file_format_close(); + trx_sys_close(); + + /* We don't create these mutexes in RO mode because we don't create + the temp files that the cover. */ + if (!srv_read_only_mode) { + mutex_free(&srv_monitor_file_mutex); + mutex_free(&srv_dict_tmpfile_mutex); + mutex_free(&srv_misc_tmpfile_mutex); + } + + dict_close(); + btr_search_sys_free(); + + /* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside + them */ + os_aio_free(); + que_close(); + row_mysql_close(); + srv_mon_free(); + sync_close(); + srv_free(); + fil_close(); + + /* 4. Free the os_conc_mutex and all os_events and os_mutexes */ + + os_sync_free(); + + /* 5. Free all allocated memory */ + + pars_lexer_close(); + log_mem_free(); + buf_pool_free(srv_buf_pool_instances); + mem_close(); + + /* ut_free_all_mem() frees all allocated memory not freed yet + in shutdown, and it will also free the ut_list_mutex, so it + should be the last one for all operation */ + ut_free_all_mem(); + + if (os_thread_count != 0 + || os_event_count != 0 + || os_mutex_count != 0 + || os_fast_mutex_count != 0) { + ib_logf(IB_LOG_LEVEL_WARN, + "Some resources were not cleaned up in shutdown: " + "threads %lu, events %lu, os_mutexes %lu, " + "os_fast_mutexes %lu", + (ulong) os_thread_count, (ulong) os_event_count, + (ulong) os_mutex_count, (ulong) os_fast_mutex_count); + } + + if (dict_foreign_err_file) { + fclose(dict_foreign_err_file); + } + + if (srv_print_verbose_log) { + ib_logf(IB_LOG_LEVEL_INFO, + "Shutdown completed; log sequence number " LSN_PF "", + srv_shutdown_lsn); + } + + srv_was_started = FALSE; + srv_start_has_been_called = FALSE; + + return(DB_SUCCESS); +} +#endif /* !UNIV_HOTBACKUP */ + + +/******************************************************************** +Signal all per-table background threads to shutdown, and wait for them to do +so. */ +UNIV_INTERN +void +srv_shutdown_table_bg_threads(void) +/*===============================*/ +{ + dict_table_t* table; + dict_table_t* first; + dict_table_t* last = NULL; + + mutex_enter(&dict_sys->mutex); + + /* Signal all threads that they should stop. */ + table = UT_LIST_GET_FIRST(dict_sys->table_LRU); + first = table; + while (table) { + dict_table_t* next; + fts_t* fts = table->fts; + + if (fts != NULL) { + fts_start_shutdown(table, fts); + } + + next = UT_LIST_GET_NEXT(table_LRU, table); + + if (!next) { + last = table; + } + + table = next; + } + + /* We must release dict_sys->mutex here; if we hold on to it in the + loop below, we will deadlock if any of the background threads try to + acquire it (for example, the FTS thread by calling que_eval_sql). + + Releasing it here and going through dict_sys->table_LRU without + holding it is safe because: + + a) MySQL only starts the shutdown procedure after all client + threads have been disconnected and no new ones are accepted, so no + new tables are added or old ones dropped. + + b) Despite its name, the list is not LRU, and the order stays + fixed. + + To safeguard against the above assumptions ever changing, we store + the first and last items in the list above, and then check that + they've stayed the same below. */ + + mutex_exit(&dict_sys->mutex); + + /* Wait for the threads of each table to stop. This is not inside + the above loop, because by signaling all the threads first we can + overlap their shutting down delays. */ + table = UT_LIST_GET_FIRST(dict_sys->table_LRU); + ut_a(first == table); + while (table) { + dict_table_t* next; + fts_t* fts = table->fts; + + if (fts != NULL) { + fts_shutdown(table, fts); + } + + next = UT_LIST_GET_NEXT(table_LRU, table); + + if (table == last) { + ut_a(!next); + } + + table = next; + } +} + +/*****************************************************************//** +Get the meta-data filename from the table name. */ +UNIV_INTERN +void +srv_get_meta_data_filename( +/*=======================*/ + dict_table_t* table, /*!< in: table */ + char* filename, /*!< out: filename */ + ulint max_len) /*!< in: filename max length */ +{ + ulint len; + char* path; + char* suffix; + static const ulint suffix_len = strlen(".cfg"); + + if (DICT_TF_HAS_DATA_DIR(table->flags)) { + dict_get_and_save_data_dir_path(table, false); + ut_a(table->data_dir_path); + + path = os_file_make_remote_pathname( + table->data_dir_path, table->name, "cfg"); + } else { + path = fil_make_ibd_name(table->name, false); + } + + ut_a(path); + len = ut_strlen(path); + ut_a(max_len >= len); + + suffix = path + (len - suffix_len); + if (strncmp(suffix, ".cfg", suffix_len) == 0) { + strcpy(filename, path); + } else { + ut_ad(strncmp(suffix, ".ibd", suffix_len) == 0); + + strncpy(filename, path, len - suffix_len); + suffix = filename + (len - suffix_len); + strcpy(suffix, ".cfg"); + } + + mem_free(path); + + srv_normalize_path_for_win(filename); +} |