summaryrefslogtreecommitdiff
path: root/storage/xtradb/srv
diff options
context:
space:
mode:
Diffstat (limited to 'storage/xtradb/srv')
-rw-r--r--storage/xtradb/srv/srv0conc.cc618
-rw-r--r--storage/xtradb/srv/srv0mon.cc1910
-rw-r--r--storage/xtradb/srv/srv0srv.c4251
-rw-r--r--storage/xtradb/srv/srv0srv.cc3508
-rw-r--r--storage/xtradb/srv/srv0start.c2552
-rw-r--r--storage/xtradb/srv/srv0start.cc3234
6 files changed, 9270 insertions, 6803 deletions
diff --git a/storage/xtradb/srv/srv0conc.cc b/storage/xtradb/srv/srv0conc.cc
new file mode 100644
index 00000000000..413d5c4eab2
--- /dev/null
+++ b/storage/xtradb/srv/srv0conc.cc
@@ -0,0 +1,618 @@
+/*****************************************************************************
+
+Copyright (c) 2011, 2012, Oracle and/or its affiliates. All Rights Reserved.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file srv/srv0conc.cc
+
+InnoDB concurrency manager
+
+Created 2011/04/18 Sunny Bains
+*******************************************************/
+
+#include "srv0srv.h"
+#include "sync0sync.h"
+#include "btr0types.h"
+#include "trx0trx.h"
+
+#include "mysql/plugin.h"
+
+/** Number of times a thread is allowed to enter InnoDB within the same
+SQL query after it has once got the ticket. */
+UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
+
+#ifdef HAVE_ATOMIC_BUILTINS
+/** Maximum sleep delay (in micro-seconds), value of 0 disables it. */
+UNIV_INTERN ulong srv_adaptive_max_sleep_delay = 150000;
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+UNIV_INTERN ulong srv_thread_sleep_delay = 10000;
+
+
+/** We are prepared for a situation that we have this many threads waiting for
+a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
+value. */
+
+UNIV_INTERN ulint srv_max_n_threads = 0;
+
+/** The following controls how many threads we let inside InnoDB concurrently:
+threads waiting for locks are not counted into the number because otherwise
+we could get a deadlock. Value of 0 will disable the concurrency check. */
+
+UNIV_INTERN ulong srv_thread_concurrency = 0;
+
+#ifndef HAVE_ATOMIC_BUILTINS
+
+/** This mutex protects srv_conc data structures */
+static os_fast_mutex_t srv_conc_mutex;
+
+/** Concurrency list node */
+typedef UT_LIST_NODE_T(struct srv_conc_slot_t) srv_conc_node_t;
+
+/** Slot for a thread waiting in the concurrency control queue. */
+struct srv_conc_slot_t{
+ os_event_t event; /*!< event to wait */
+ ibool reserved; /*!< TRUE if slot
+ reserved */
+ ibool wait_ended; /*!< TRUE when another thread has
+ already set the event and the thread
+ in this slot is free to proceed; but
+ reserved may still be TRUE at that
+ point */
+ srv_conc_node_t srv_conc_queue; /*!< queue node */
+};
+
+/** Queue of threads waiting to get in */
+typedef UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue_t;
+
+static srv_conc_queue_t srv_conc_queue;
+
+/** Array of wait slots */
+static srv_conc_slot_t* srv_conc_slots;
+
+#if defined(UNIV_PFS_MUTEX)
+/* Key to register srv_conc_mutex_key with performance schema */
+UNIV_INTERN mysql_pfs_key_t srv_conc_mutex_key;
+#endif /* UNIV_PFS_MUTEX */
+
+#endif /* !HAVE_ATOMIC_BUILTINS */
+
+/** Variables tracking the active and waiting threads. */
+struct srv_conc_t {
+ char pad[64 - (sizeof(ulint) + sizeof(lint))];
+
+ /** Number of transactions that have declared_to_be_inside_innodb set.
+ It used to be a non-error for this value to drop below zero temporarily.
+ This is no longer true. We'll, however, keep the lint datatype to add
+ assertions to catch any corner cases that we may have missed. */
+
+ volatile lint n_active;
+
+ /** Number of OS threads waiting in the FIFO for permission to
+ enter InnoDB */
+ volatile lint n_waiting;
+};
+
+/* Control variables for tracking concurrency. */
+static srv_conc_t srv_conc;
+
+/*********************************************************************//**
+Initialise the concurrency management data structures */
+void
+srv_conc_init(void)
+/*===============*/
+{
+#ifndef HAVE_ATOMIC_BUILTINS
+ ulint i;
+
+ /* Init the server concurrency restriction data structures */
+
+ os_fast_mutex_init(srv_conc_mutex_key, &srv_conc_mutex);
+
+ UT_LIST_INIT(srv_conc_queue);
+
+ srv_conc_slots = static_cast<srv_conc_slot_t*>(
+ mem_zalloc(OS_THREAD_MAX_N * sizeof(*srv_conc_slots)));
+
+ for (i = 0; i < OS_THREAD_MAX_N; i++) {
+ srv_conc_slot_t* conc_slot = &srv_conc_slots[i];
+
+ conc_slot->event = os_event_create();
+ ut_a(conc_slot->event);
+ }
+#endif /* !HAVE_ATOMIC_BUILTINS */
+}
+
+/*********************************************************************//**
+Free the concurrency management data structures */
+void
+srv_conc_free(void)
+/*===============*/
+{
+#ifndef HAVE_ATOMIC_BUILTINS
+ os_fast_mutex_free(&srv_conc_mutex);
+ mem_free(srv_conc_slots);
+ srv_conc_slots = NULL;
+#endif /* !HAVE_ATOMIC_BUILTINS */
+}
+
+#ifdef HAVE_ATOMIC_BUILTINS
+/*********************************************************************//**
+Note that a user thread is entering InnoDB. */
+static
+void
+srv_enter_innodb_with_tickets(
+/*==========================*/
+ trx_t* trx) /*!< in/out: transaction that wants
+ to enter InnoDB */
+{
+ trx->declared_to_be_inside_innodb = TRUE;
+ trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter;
+}
+
+/*********************************************************************//**
+Handle the scheduling of a user thread that wants to enter InnoDB. Setting
+srv_adaptive_max_sleep_delay > 0 switches the adaptive sleep calibration to
+ON. When set, we want to wait in the queue for as little time as possible.
+However, very short waits will result in a lot of context switches and that
+is also not desirable. When threads need to sleep multiple times we increment
+os_thread_sleep_delay by one. When we see threads getting a slot without
+waiting and there are no other threads waiting in the queue, we try and reduce
+the wait as much as we can. Currently we reduce it by half each time. If the
+thread only had to wait for one turn before it was able to enter InnoDB we
+decrement it by one. This is to try and keep the sleep time stable around the
+"optimum" sleep time. */
+static
+void
+srv_conc_enter_innodb_with_atomics(
+/*===============================*/
+ trx_t* trx) /*!< in/out: transaction that wants
+ to enter InnoDB */
+{
+ ulint n_sleeps = 0;
+ ibool notified_mysql = FALSE;
+
+ ut_a(!trx->declared_to_be_inside_innodb);
+
+ for (;;) {
+ ulint sleep_in_us;
+
+ if (srv_conc.n_active < (lint) srv_thread_concurrency) {
+ ulint n_active;
+
+ /* Check if there are any free tickets. */
+ n_active = os_atomic_increment_lint(
+ &srv_conc.n_active, 1);
+
+ if (n_active <= srv_thread_concurrency) {
+
+ srv_enter_innodb_with_tickets(trx);
+
+ if (notified_mysql) {
+
+ (void) os_atomic_decrement_lint(
+ &srv_conc.n_waiting, 1);
+
+ thd_wait_end(trx->mysql_thd);
+ }
+
+ if (srv_adaptive_max_sleep_delay > 0) {
+ if (srv_thread_sleep_delay > 20
+ && n_sleeps == 1) {
+
+ --srv_thread_sleep_delay;
+ }
+
+ if (srv_conc.n_waiting == 0) {
+ srv_thread_sleep_delay >>= 1;
+ }
+ }
+
+ return;
+ }
+
+ /* Since there were no free seats, we relinquish
+ the overbooked ticket. */
+
+ (void) os_atomic_decrement_lint(
+ &srv_conc.n_active, 1);
+ }
+
+ if (!notified_mysql) {
+ (void) os_atomic_increment_lint(
+ &srv_conc.n_waiting, 1);
+
+ /* Release possible search system latch this
+ thread has */
+
+ if (trx->has_search_latch) {
+ trx_search_latch_release_if_reserved(trx);
+ }
+
+ thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
+
+ notified_mysql = TRUE;
+ }
+
+ trx->op_info = "sleeping before entering InnoDB";
+
+ sleep_in_us = srv_thread_sleep_delay;
+
+ /* Guard against overflow when adaptive sleep delay is on. */
+
+ if (srv_adaptive_max_sleep_delay > 0
+ && sleep_in_us > srv_adaptive_max_sleep_delay) {
+
+ sleep_in_us = srv_adaptive_max_sleep_delay;
+ srv_thread_sleep_delay = sleep_in_us;
+ }
+
+ os_thread_sleep(sleep_in_us);
+ trx->innodb_que_wait_timer += sleep_in_us;
+
+ trx->op_info = "";
+
+ ++n_sleeps;
+
+ if (srv_adaptive_max_sleep_delay > 0 && n_sleeps > 1) {
+ ++srv_thread_sleep_delay;
+ }
+ }
+}
+
+/*********************************************************************//**
+Note that a user thread is leaving InnoDB code. */
+static
+void
+srv_conc_exit_innodb_with_atomics(
+/*==============================*/
+ trx_t* trx) /*!< in/out: transaction */
+{
+ trx->n_tickets_to_enter_innodb = 0;
+ trx->declared_to_be_inside_innodb = FALSE;
+
+ (void) os_atomic_decrement_lint(&srv_conc.n_active, 1);
+}
+#else
+/*********************************************************************//**
+Note that a user thread is leaving InnoDB code. */
+static
+void
+srv_conc_exit_innodb_without_atomics(
+/*=================================*/
+ trx_t* trx) /*!< in/out: transaction */
+{
+ srv_conc_slot_t* slot;
+
+ os_fast_mutex_lock(&srv_conc_mutex);
+
+ ut_ad(srv_conc.n_active > 0);
+ srv_conc.n_active--;
+ trx->declared_to_be_inside_innodb = FALSE;
+ trx->n_tickets_to_enter_innodb = 0;
+
+ slot = NULL;
+
+ if (srv_conc.n_active < (lint) srv_thread_concurrency) {
+ /* Look for a slot where a thread is waiting and no other
+ thread has yet released the thread */
+
+ for (slot = UT_LIST_GET_FIRST(srv_conc_queue);
+ slot != NULL && slot->wait_ended == TRUE;
+ slot = UT_LIST_GET_NEXT(srv_conc_queue, slot)) {
+
+ /* No op */
+ }
+
+ if (slot != NULL) {
+ slot->wait_ended = TRUE;
+
+ /* We increment the count on behalf of the released
+ thread */
+
+ srv_conc.n_active++;
+ }
+ }
+
+ os_fast_mutex_unlock(&srv_conc_mutex);
+
+ if (slot != NULL) {
+ os_event_set(slot->event);
+ }
+}
+
+/*********************************************************************//**
+Handle the scheduling of a user thread that wants to enter InnoDB. */
+static
+void
+srv_conc_enter_innodb_without_atomics(
+/*==================================*/
+ trx_t* trx) /*!< in/out: transaction that wants
+ to enter InnoDB */
+{
+ ulint i;
+ srv_conc_slot_t* slot = NULL;
+ ibool has_slept = FALSE;
+ ib_uint64_t start_time = 0L;
+ ib_uint64_t finish_time = 0L;
+ ulint sec;
+ ulint ms;
+
+ os_fast_mutex_lock(&srv_conc_mutex);
+retry:
+ if (UNIV_UNLIKELY(trx->declared_to_be_inside_innodb)) {
+ os_fast_mutex_unlock(&srv_conc_mutex);
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: Error: trying to declare trx"
+ " to enter InnoDB, but\n"
+ "InnoDB: it already is declared.\n", stderr);
+ trx_print(stderr, trx, 0);
+ putc('\n', stderr);
+ return;
+ }
+
+ ut_ad(srv_conc.n_active >= 0);
+
+ if (srv_conc.n_active < (lint) srv_thread_concurrency) {
+
+ srv_conc.n_active++;
+ trx->declared_to_be_inside_innodb = TRUE;
+ trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter;
+
+ os_fast_mutex_unlock(&srv_conc_mutex);
+
+ return;
+ }
+
+ /* If the transaction is not holding resources, let it sleep
+ for srv_thread_sleep_delay microseconds, and try again then */
+
+ if (!has_slept && !trx->has_search_latch
+ && NULL == UT_LIST_GET_FIRST(trx->lock.trx_locks)) {
+
+ has_slept = TRUE; /* We let it sleep only once to avoid
+ starvation */
+
+ srv_conc.n_waiting++;
+
+ os_fast_mutex_unlock(&srv_conc_mutex);
+
+ trx->op_info = "sleeping before joining InnoDB queue";
+
+ /* Peter Zaitsev suggested that we take the sleep away
+ altogether. But the sleep may be good in pathological
+ situations of lots of thread switches. Simply put some
+ threads aside for a while to reduce the number of thread
+ switches. */
+ if (srv_thread_sleep_delay > 0) {
+ os_thread_sleep(srv_thread_sleep_delay);
+ trx->innodb_que_wait_timer += sleep_in_us;
+ }
+
+ trx->op_info = "";
+
+ os_fast_mutex_lock(&srv_conc_mutex);
+
+ srv_conc.n_waiting--;
+
+ goto retry;
+ }
+
+ /* Too many threads inside: put the current thread to a queue */
+
+ for (i = 0; i < OS_THREAD_MAX_N; i++) {
+ slot = srv_conc_slots + i;
+
+ if (!slot->reserved) {
+
+ break;
+ }
+ }
+
+ if (i == OS_THREAD_MAX_N) {
+ /* Could not find a free wait slot, we must let the
+ thread enter */
+
+ srv_conc.n_active++;
+ trx->declared_to_be_inside_innodb = TRUE;
+ trx->n_tickets_to_enter_innodb = 0;
+
+ os_fast_mutex_unlock(&srv_conc_mutex);
+
+ return;
+ }
+
+ /* Release possible search system latch this thread has */
+ if (trx->has_search_latch) {
+ trx_search_latch_release_if_reserved(trx);
+ }
+
+ /* Add to the queue */
+ slot->reserved = TRUE;
+ slot->wait_ended = FALSE;
+
+ UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
+
+ os_event_reset(slot->event);
+
+ srv_conc.n_waiting++;
+
+ os_fast_mutex_unlock(&srv_conc_mutex);
+
+ /* Go to wait for the event; when a thread leaves InnoDB it will
+ release this thread */
+
+ ut_ad(!trx->has_search_latch);
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
+#endif /* UNIV_SYNC_DEBUG */
+
+ if (UNIV_UNLIKELY(trx->take_stats)) {
+ ut_usectime(&sec, &ms);
+ start_time = (ib_uint64_t)sec * 1000000 + ms;
+ } else {
+ start_time = 0;
+ }
+
+ trx->op_info = "waiting in InnoDB queue";
+
+ thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
+
+ os_event_wait(slot->event);
+ thd_wait_end(trx->mysql_thd);
+
+ trx->op_info = "";
+
+ if (UNIV_UNLIKELY(start_time != 0)) {
+ ut_usectime(&sec, &ms);
+ finish_time = (ib_uint64_t)sec * 1000000 + ms;
+ trx->innodb_que_wait_timer += (ulint)(finish_time - start_time);
+ }
+
+ os_fast_mutex_lock(&srv_conc_mutex);
+
+ srv_conc.n_waiting--;
+
+ /* NOTE that the thread which released this thread already
+ incremented the thread counter on behalf of this thread */
+
+ slot->reserved = FALSE;
+
+ UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
+
+ trx->declared_to_be_inside_innodb = TRUE;
+ trx->n_tickets_to_enter_innodb = srv_n_free_tickets_to_enter;
+
+ os_fast_mutex_unlock(&srv_conc_mutex);
+}
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+/*********************************************************************//**
+Puts an OS thread to wait if there are too many concurrent threads
+(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
+UNIV_INTERN
+void
+srv_conc_enter_innodb(
+/*==================*/
+ trx_t* trx) /*!< in: transaction object associated with the
+ thread */
+{
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
+#endif /* UNIV_SYNC_DEBUG */
+
+#ifdef HAVE_ATOMIC_BUILTINS
+ srv_conc_enter_innodb_with_atomics(trx);
+#else
+ srv_conc_enter_innodb_without_atomics(trx);
+#endif /* HAVE_ATOMIC_BUILTINS */
+}
+
+/*********************************************************************//**
+This lets a thread enter InnoDB regardless of the number of threads inside
+InnoDB. This must be called when a thread ends a lock wait. */
+UNIV_INTERN
+void
+srv_conc_force_enter_innodb(
+/*========================*/
+ trx_t* trx) /*!< in: transaction object associated with the
+ thread */
+{
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
+#endif /* UNIV_SYNC_DEBUG */
+
+ if (!srv_thread_concurrency) {
+
+ return;
+ }
+
+ ut_ad(srv_conc.n_active >= 0);
+
+#ifdef HAVE_ATOMIC_BUILTINS
+ (void) os_atomic_increment_lint(&srv_conc.n_active, 1);
+#else
+ os_fast_mutex_lock(&srv_conc_mutex);
+ ++srv_conc.n_active;
+ os_fast_mutex_unlock(&srv_conc_mutex);
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+ trx->n_tickets_to_enter_innodb = 1;
+ trx->declared_to_be_inside_innodb = TRUE;
+}
+
+/*********************************************************************//**
+This must be called when a thread exits InnoDB in a lock wait or at the
+end of an SQL statement. */
+UNIV_INTERN
+void
+srv_conc_force_exit_innodb(
+/*=======================*/
+ trx_t* trx) /*!< in: transaction object associated with the
+ thread */
+{
+ if ((trx->mysql_thd != NULL
+ && thd_is_replication_slave_thread(trx->mysql_thd))
+ || trx->declared_to_be_inside_innodb == FALSE) {
+
+ return;
+ }
+
+#ifdef HAVE_ATOMIC_BUILTINS
+ srv_conc_exit_innodb_with_atomics(trx);
+#else
+ srv_conc_exit_innodb_without_atomics(trx);
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
+#endif /* UNIV_SYNC_DEBUG */
+}
+
+/*********************************************************************//**
+Get the count of threads waiting inside InnoDB. */
+UNIV_INTERN
+ulint
+srv_conc_get_waiting_threads(void)
+/*==============================*/
+{
+ return(srv_conc.n_waiting);
+}
+
+/*********************************************************************//**
+Get the count of threads active inside InnoDB. */
+UNIV_INTERN
+ulint
+srv_conc_get_active_threads(void)
+/*==============================*/
+{
+ return(srv_conc.n_active);
+ }
+
diff --git a/storage/xtradb/srv/srv0mon.cc b/storage/xtradb/srv/srv0mon.cc
new file mode 100644
index 00000000000..d98315ae9a2
--- /dev/null
+++ b/storage/xtradb/srv/srv0mon.cc
@@ -0,0 +1,1910 @@
+/*****************************************************************************
+
+Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2012, Facebook Inc.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file srv/srv0mon.cc
+Database monitor counter interfaces
+
+Created 12/9/2009 Jimmy Yang
+*******************************************************/
+
+#ifndef UNIV_HOTBACKUP
+#include "os0file.h"
+#include "mach0data.h"
+#include "srv0mon.h"
+#include "srv0srv.h"
+#include "buf0buf.h"
+#include "trx0sys.h"
+#include "trx0rseg.h"
+#include "lock0lock.h"
+#include "ibuf0ibuf.h"
+#ifdef UNIV_NONINL
+#include "srv0mon.ic"
+#endif
+
+/* Macro to standardize the counter names for counters in the
+"monitor_buf_page" module as they have very structured defines */
+#define MONITOR_BUF_PAGE(name, description, code, op, op_code) \
+ {"buffer_page_"op"_"name, "buffer_page_io", \
+ "Number of "description" Pages "op, \
+ MONITOR_GROUP_MODULE, MONITOR_DEFAULT_START, \
+ MONITOR_##code##_##op_code}
+
+#define MONITOR_BUF_PAGE_READ(name, description, code) \
+ MONITOR_BUF_PAGE(name, description, code, "read", PAGE_READ)
+
+#define MONITOR_BUF_PAGE_WRITTEN(name, description, code) \
+ MONITOR_BUF_PAGE(name, description, code, "written", PAGE_WRITTEN)
+
+
+/** This array defines basic static information of monitor counters,
+including each monitor's name, module it belongs to, a short
+description and its property/type and corresponding monitor_id.
+Please note: If you add a monitor here, please add its corresponding
+monitor_id to "enum monitor_id_value" structure in srv0mon.h file. */
+
+static monitor_info_t innodb_counter_info[] =
+{
+ /* A dummy item to mark the module start, this is
+ to accomodate the default value (0) set for the
+ global variables with the control system. */
+ {"module_start", "module_start", "module_start",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_DEFAULT_START},
+
+ /* ========== Counters for Server Metadata ========== */
+ {"module_metadata", "metadata", "Server Metadata",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_METADATA},
+
+ {"metadata_table_handles_opened", "metadata",
+ "Number of table handles opened",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TABLE_OPEN},
+
+ {"metadata_table_handles_closed", "metadata",
+ "Number of table handles closed",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TABLE_CLOSE},
+
+ {"metadata_table_reference_count", "metadata",
+ "Table reference counter",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TABLE_REFERENCE},
+
+ {"metadata_mem_pool_size", "metadata",
+ "Size of a memory pool InnoDB uses to store data dictionary"
+ " and internal data structures in bytes",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_META_MEM_POOL},
+
+ /* ========== Counters for Lock Module ========== */
+ {"module_lock", "lock", "Lock Module",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_LOCK},
+
+ {"lock_deadlocks", "lock", "Number of deadlocks",
+ MONITOR_DEFAULT_ON,
+ MONITOR_DEFAULT_START, MONITOR_DEADLOCK},
+
+ {"lock_timeouts", "lock", "Number of lock timeouts",
+ MONITOR_DEFAULT_ON,
+ MONITOR_DEFAULT_START, MONITOR_TIMEOUT},
+
+ {"lock_rec_lock_waits", "lock",
+ "Number of times enqueued into record lock wait queue",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_LOCKREC_WAIT},
+
+ {"lock_table_lock_waits", "lock",
+ "Number of times enqueued into table lock wait queue",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TABLELOCK_WAIT},
+
+ {"lock_rec_lock_requests", "lock",
+ "Number of record locks requested",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_NUM_RECLOCK_REQ},
+
+ {"lock_rec_lock_created", "lock", "Number of record locks created",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_RECLOCK_CREATED},
+
+ {"lock_rec_lock_removed", "lock",
+ "Number of record locks removed from the lock queue",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_RECLOCK_REMOVED},
+
+ {"lock_rec_locks", "lock",
+ "Current number of record locks on tables",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_NUM_RECLOCK},
+
+ {"lock_table_lock_created", "lock", "Number of table locks created",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TABLELOCK_CREATED},
+
+ {"lock_table_lock_removed", "lock",
+ "Number of table locks removed from the lock queue",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TABLELOCK_REMOVED},
+
+ {"lock_table_locks", "lock",
+ "Current number of table locks on tables",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_NUM_TABLELOCK},
+
+ {"lock_row_lock_current_waits", "lock",
+ "Number of row locks currently being waited for"
+ " (innodb_row_lock_current_waits)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT},
+
+ {"lock_row_lock_time", "lock",
+ "Time spent in acquiring row locks, in milliseconds"
+ " (innodb_row_lock_time)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_WAIT_TIME},
+
+ {"lock_row_lock_time_max", "lock",
+ "The maximum time to acquire a row lock, in milliseconds"
+ " (innodb_row_lock_time_max)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_MAX_WAIT_TIME},
+
+ {"lock_row_lock_waits", "lock",
+ "Number of times a row lock had to be waited for"
+ " (innodb_row_lock_waits)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_ROW_LOCK_WAIT},
+
+ {"lock_row_lock_time_avg", "lock",
+ "The average time to acquire a row lock, in milliseconds"
+ " (innodb_row_lock_time_avg)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LOCK_AVG_WAIT_TIME},
+
+ /* ========== Counters for Buffer Manager and I/O ========== */
+ {"module_buffer", "buffer", "Buffer Manager Module",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_BUFFER},
+
+ {"buffer_pool_size", "server",
+ "Server buffer pool size (all buffer pools) in bytes",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUFFER_POOL_SIZE},
+
+ {"buffer_pool_reads", "buffer",
+ "Number of reads directly from disk (innodb_buffer_pool_reads)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READS},
+
+ {"buffer_pool_read_requests", "buffer",
+ "Number of logical read requests (innodb_buffer_pool_read_requests)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_REQUESTS},
+
+ {"buffer_pool_write_requests", "buffer",
+ "Number of write requests (innodb_buffer_pool_write_requests)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_WRITE_REQUEST},
+
+ {"buffer_pool_wait_free", "buffer",
+ "Number of times waited for free buffer"
+ " (innodb_buffer_pool_wait_free)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_WAIT_FREE},
+
+ {"buffer_pool_read_ahead", "buffer",
+ "Number of pages read as read ahead (innodb_buffer_pool_read_ahead)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_AHEAD},
+
+ {"buffer_pool_read_ahead_evicted", "buffer",
+ "Read-ahead pages evicted without being accessed"
+ " (innodb_buffer_pool_read_ahead_evicted)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED},
+
+ {"buffer_pool_pages_total", "buffer",
+ "Total buffer pool size in pages (innodb_buffer_pool_pages_total)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGE_TOTAL},
+
+ {"buffer_pool_pages_misc", "buffer",
+ "Buffer pages for misc use such as row locks or the adaptive"
+ " hash index (innodb_buffer_pool_pages_misc)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGE_MISC},
+
+ {"buffer_pool_pages_data", "buffer",
+ "Buffer pages containing data (innodb_buffer_pool_pages_data)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DATA},
+
+ {"buffer_pool_bytes_data", "buffer",
+ "Buffer bytes containing data (innodb_buffer_pool_bytes_data)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DATA},
+
+ {"buffer_pool_pages_dirty", "buffer",
+ "Buffer pages currently dirty (innodb_buffer_pool_pages_dirty)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_DIRTY},
+
+ {"buffer_pool_bytes_dirty", "buffer",
+ "Buffer bytes currently dirty (innodb_buffer_pool_bytes_dirty)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_BYTES_DIRTY},
+
+ {"buffer_pool_pages_free", "buffer",
+ "Buffer pages currently free (innodb_buffer_pool_pages_free)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_POOL_PAGES_FREE},
+
+ {"buffer_pages_created", "buffer",
+ "Number of pages created (innodb_pages_created)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_CREATED},
+
+ {"buffer_pages_written", "buffer",
+ "Number of pages written (innodb_pages_written)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_WRITTEN},
+
+ {"buffer_pages_read", "buffer",
+ "Number of pages read (innodb_pages_read)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_READ},
+
+ {"buffer_data_reads", "buffer",
+ "Amount of data read in bytes (innodb_data_reads)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BYTE_READ},
+
+ {"buffer_data_written", "buffer",
+ "Amount of data written in bytes (innodb_data_written)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BYTE_WRITTEN},
+
+ /* Cumulative counter for scanning in flush batches */
+ {"buffer_flush_batch_scanned", "buffer",
+ "Total pages scanned as part of flush batch",
+ MONITOR_SET_OWNER,
+ MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL,
+ MONITOR_FLUSH_BATCH_SCANNED},
+
+ {"buffer_flush_batch_num_scan", "buffer",
+ "Number of times buffer flush list flush is called",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_SCANNED,
+ MONITOR_FLUSH_BATCH_SCANNED_NUM_CALL},
+
+ {"buffer_flush_batch_scanned_per_call", "buffer",
+ "Pages scanned per flush batch scan",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_SCANNED,
+ MONITOR_FLUSH_BATCH_SCANNED_PER_CALL},
+
+ {"buffer_flush_batch_rescan", "buffer",
+ "Number of times rescan of flush list forced",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_HP_RESCAN},
+
+ /* Cumulative counter for pages flushed in flush batches */
+ {"buffer_flush_batch_total_pages", "buffer",
+ "Total pages flushed as part of flush batch",
+ MONITOR_SET_OWNER, MONITOR_FLUSH_BATCH_COUNT,
+ MONITOR_FLUSH_BATCH_TOTAL_PAGE},
+
+ {"buffer_flush_batches", "buffer",
+ "Number of flush batches",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+ MONITOR_FLUSH_BATCH_COUNT},
+
+ {"buffer_flush_batch_pages", "buffer",
+ "Pages queued as a flush batch",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+ MONITOR_FLUSH_BATCH_PAGES},
+
+ /* Cumulative counter for flush batches because of neighbor */
+ {"buffer_flush_neighbor_total_pages", "buffer",
+ "Total neighbors flushed as part of neighbor flush",
+ MONITOR_SET_OWNER, MONITOR_FLUSH_NEIGHBOR_COUNT,
+ MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE},
+
+ {"buffer_flush_neighbor", "buffer",
+ "Number of times neighbors flushing is invoked",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
+ MONITOR_FLUSH_NEIGHBOR_COUNT},
+
+ {"buffer_flush_neighbor_pages", "buffer",
+ "Pages queued as a neighbor batch",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_NEIGHBOR_TOTAL_PAGE,
+ MONITOR_FLUSH_NEIGHBOR_PAGES},
+
+ {"buffer_flush_n_to_flush_requested", "buffer",
+ "Number of pages requested for flushing.",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_N_TO_FLUSH_REQUESTED},
+
+ {"buffer_flush_avg_page_rate", "buffer",
+ "Average number of pages at which flushing is happening",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_AVG_PAGE_RATE},
+
+ {"buffer_flush_lsn_avg_rate", "buffer",
+ "Average redo generation rate",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_LSN_AVG_RATE},
+
+ {"buffer_flush_pct_for_dirty", "buffer",
+ "Percent of IO capacity used to avoid max dirty page limit",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_PCT_FOR_DIRTY},
+
+ {"buffer_flush_pct_for_lsn", "buffer",
+ "Percent of IO capacity used to avoid reusable redo space limit",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_PCT_FOR_LSN},
+
+ {"buffer_flush_sync_waits", "buffer",
+ "Number of times a wait happens due to sync flushing",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_FLUSH_SYNC_WAITS},
+
+ /* Cumulative counter for flush batches for adaptive flushing */
+ {"buffer_flush_adaptive_total_pages", "buffer",
+ "Total pages flushed as part of adaptive flushing",
+ MONITOR_SET_OWNER, MONITOR_FLUSH_ADAPTIVE_COUNT,
+ MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE},
+
+ {"buffer_flush_adaptive", "buffer",
+ "Number of adaptive batches",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
+ MONITOR_FLUSH_ADAPTIVE_COUNT},
+
+ {"buffer_flush_adaptive_pages", "buffer",
+ "Pages queued as an adaptive batch",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_ADAPTIVE_TOTAL_PAGE,
+ MONITOR_FLUSH_ADAPTIVE_PAGES},
+
+ /* Cumulative counter for flush batches because of sync */
+ {"buffer_flush_sync_total_pages", "buffer",
+ "Total pages flushed as part of sync batches",
+ MONITOR_SET_OWNER, MONITOR_FLUSH_SYNC_COUNT,
+ MONITOR_FLUSH_SYNC_TOTAL_PAGE},
+
+ {"buffer_flush_sync", "buffer",
+ "Number of sync batches",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_SYNC_TOTAL_PAGE,
+ MONITOR_FLUSH_SYNC_COUNT},
+
+ {"buffer_flush_sync_pages", "buffer",
+ "Pages queued as a sync batch",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_SYNC_TOTAL_PAGE,
+ MONITOR_FLUSH_SYNC_PAGES},
+
+ /* Cumulative counter for flush batches because of background */
+ {"buffer_flush_background_total_pages", "buffer",
+ "Total pages flushed as part of background batches",
+ MONITOR_SET_OWNER, MONITOR_FLUSH_BACKGROUND_COUNT,
+ MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE},
+
+ {"buffer_flush_background", "buffer",
+ "Number of background batches",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
+ MONITOR_FLUSH_BACKGROUND_COUNT},
+
+ {"buffer_flush_background_pages", "buffer",
+ "Pages queued as a background batch",
+ MONITOR_SET_MEMBER, MONITOR_FLUSH_BACKGROUND_TOTAL_PAGE,
+ MONITOR_FLUSH_BACKGROUND_PAGES},
+
+ /* Cumulative counter for LRU batch scan */
+ {"buffer_LRU_batch_scanned", "buffer",
+ "Total pages scanned as part of LRU batch",
+ MONITOR_SET_OWNER, MONITOR_LRU_BATCH_SCANNED_NUM_CALL,
+ MONITOR_LRU_BATCH_SCANNED},
+
+ {"buffer_LRU_batch_num_scan", "buffer",
+ "Number of times LRU batch is called",
+ MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_SCANNED,
+ MONITOR_LRU_BATCH_SCANNED_NUM_CALL},
+
+ {"buffer_LRU_batch_scanned_per_call", "buffer",
+ "Pages scanned per LRU batch call",
+ MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_SCANNED,
+ MONITOR_LRU_BATCH_SCANNED_PER_CALL},
+
+ /* Cumulative counter for LRU batch pages flushed */
+ {"buffer_LRU_batch_total_pages", "buffer",
+ "Total pages flushed as part of LRU batches",
+ MONITOR_SET_OWNER, MONITOR_LRU_BATCH_COUNT,
+ MONITOR_LRU_BATCH_TOTAL_PAGE},
+
+ {"buffer_LRU_batches", "buffer",
+ "Number of LRU batches",
+ MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_TOTAL_PAGE,
+ MONITOR_LRU_BATCH_COUNT},
+
+ {"buffer_LRU_batch_pages", "buffer",
+ "Pages queued as an LRU batch",
+ MONITOR_SET_MEMBER, MONITOR_LRU_BATCH_TOTAL_PAGE,
+ MONITOR_LRU_BATCH_PAGES},
+
+ /* Cumulative counter for single page LRU scans */
+ {"buffer_LRU_single_flush_scanned", "buffer",
+ "Total pages scanned as part of single page LRU flush",
+ MONITOR_SET_OWNER,
+ MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL,
+ MONITOR_LRU_SINGLE_FLUSH_SCANNED},
+
+ {"buffer_LRU_single_flush_num_scan", "buffer",
+ "Number of times single page LRU flush is called",
+ MONITOR_SET_MEMBER, MONITOR_LRU_SINGLE_FLUSH_SCANNED,
+ MONITOR_LRU_SINGLE_FLUSH_SCANNED_NUM_CALL},
+
+ {"buffer_LRU_single_flush_scanned_per_call", "buffer",
+ "Page scanned per single LRU flush",
+ MONITOR_SET_MEMBER, MONITOR_LRU_SINGLE_FLUSH_SCANNED,
+ MONITOR_LRU_SINGLE_FLUSH_SCANNED_PER_CALL},
+
+ {"buffer_LRU_single_flush_failure_count", "Buffer",
+ "Number of times attempt to flush a single page from LRU failed",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_LRU_SINGLE_FLUSH_FAILURE_COUNT},
+
+ {"buffer_LRU_get_free_search", "Buffer",
+ "Number of searches performed for a clean page",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_LRU_GET_FREE_SEARCH},
+
+ /* Cumulative counter for LRU search scans */
+ {"buffer_LRU_search_scanned", "buffer",
+ "Total pages scanned as part of LRU search",
+ MONITOR_SET_OWNER,
+ MONITOR_LRU_SEARCH_SCANNED_NUM_CALL,
+ MONITOR_LRU_SEARCH_SCANNED},
+
+ {"buffer_LRU_search_num_scan", "buffer",
+ "Number of times LRU search is performed",
+ MONITOR_SET_MEMBER, MONITOR_LRU_SEARCH_SCANNED,
+ MONITOR_LRU_SEARCH_SCANNED_NUM_CALL},
+
+ {"buffer_LRU_search_scanned_per_call", "buffer",
+ "Page scanned per single LRU search",
+ MONITOR_SET_MEMBER, MONITOR_LRU_SEARCH_SCANNED,
+ MONITOR_LRU_SEARCH_SCANNED_PER_CALL},
+
+ /* Cumulative counter for LRU unzip search scans */
+ {"buffer_LRU_unzip_search_scanned", "buffer",
+ "Total pages scanned as part of LRU unzip search",
+ MONITOR_SET_OWNER,
+ MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL,
+ MONITOR_LRU_UNZIP_SEARCH_SCANNED},
+
+ {"buffer_LRU_unzip_search_num_scan", "buffer",
+ "Number of times LRU unzip search is performed",
+ MONITOR_SET_MEMBER, MONITOR_LRU_UNZIP_SEARCH_SCANNED,
+ MONITOR_LRU_UNZIP_SEARCH_SCANNED_NUM_CALL},
+
+ {"buffer_LRU_unzip_search_scanned_per_call", "buffer",
+ "Page scanned per single LRU unzip search",
+ MONITOR_SET_MEMBER, MONITOR_LRU_UNZIP_SEARCH_SCANNED,
+ MONITOR_LRU_UNZIP_SEARCH_SCANNED_PER_CALL},
+
+ /* ========== Counters for Buffer Page I/O ========== */
+ {"module_buffer_page", "buffer_page_io", "Buffer Page I/O Module",
+ static_cast<monitor_type_t>(
+ MONITOR_MODULE | MONITOR_GROUP_MODULE),
+ MONITOR_DEFAULT_START, MONITOR_MODULE_BUF_PAGE},
+
+ MONITOR_BUF_PAGE_READ("index_leaf","Index Leaf", INDEX_LEAF),
+
+ MONITOR_BUF_PAGE_READ("index_non_leaf","Index Non-leaf",
+ INDEX_NON_LEAF),
+
+ MONITOR_BUF_PAGE_READ("index_ibuf_leaf", "Insert Buffer Index Leaf",
+ INDEX_IBUF_LEAF),
+
+ MONITOR_BUF_PAGE_READ("index_ibuf_non_leaf",
+ "Insert Buffer Index Non-Leaf",
+ INDEX_IBUF_NON_LEAF),
+
+ MONITOR_BUF_PAGE_READ("undo_log", "Undo Log", UNDO_LOG),
+
+ MONITOR_BUF_PAGE_READ("index_inode", "Index Inode", INODE),
+
+ MONITOR_BUF_PAGE_READ("ibuf_free_list", "Insert Buffer Free List",
+ IBUF_FREELIST),
+
+ MONITOR_BUF_PAGE_READ("ibuf_bitmap", "Insert Buffer Bitmap",
+ IBUF_BITMAP),
+
+ MONITOR_BUF_PAGE_READ("system_page", "System", SYSTEM),
+
+ MONITOR_BUF_PAGE_READ("trx_system", "Transaction System", TRX_SYSTEM),
+
+ MONITOR_BUF_PAGE_READ("fsp_hdr", "File Space Header", FSP_HDR),
+
+ MONITOR_BUF_PAGE_READ("xdes", "Extent Descriptor", XDES),
+
+ MONITOR_BUF_PAGE_READ("blob", "Uncompressed BLOB", BLOB),
+
+ MONITOR_BUF_PAGE_READ("zblob", "First Compressed BLOB", ZBLOB),
+
+ MONITOR_BUF_PAGE_READ("zblob2", "Subsequent Compressed BLOB", ZBLOB2),
+
+ MONITOR_BUF_PAGE_READ("other", "other/unknown (old version of InnoDB)",
+ OTHER),
+
+ MONITOR_BUF_PAGE_WRITTEN("index_leaf","Index Leaf", INDEX_LEAF),
+
+ MONITOR_BUF_PAGE_WRITTEN("index_non_leaf","Index Non-leaf",
+ INDEX_NON_LEAF),
+
+ MONITOR_BUF_PAGE_WRITTEN("index_ibuf_leaf", "Insert Buffer Index Leaf",
+ INDEX_IBUF_LEAF),
+
+ MONITOR_BUF_PAGE_WRITTEN("index_ibuf_non_leaf",
+ "Insert Buffer Index Non-Leaf",
+ INDEX_IBUF_NON_LEAF),
+
+ MONITOR_BUF_PAGE_WRITTEN("undo_log", "Undo Log", UNDO_LOG),
+
+ MONITOR_BUF_PAGE_WRITTEN("index_inode", "Index Inode", INODE),
+
+ MONITOR_BUF_PAGE_WRITTEN("ibuf_free_list", "Insert Buffer Free List",
+ IBUF_FREELIST),
+
+ MONITOR_BUF_PAGE_WRITTEN("ibuf_bitmap", "Insert Buffer Bitmap",
+ IBUF_BITMAP),
+
+ MONITOR_BUF_PAGE_WRITTEN("system_page", "System", SYSTEM),
+
+ MONITOR_BUF_PAGE_WRITTEN("trx_system", "Transaction System",
+ TRX_SYSTEM),
+
+ MONITOR_BUF_PAGE_WRITTEN("fsp_hdr", "File Space Header", FSP_HDR),
+
+ MONITOR_BUF_PAGE_WRITTEN("xdes", "Extent Descriptor", XDES),
+
+ MONITOR_BUF_PAGE_WRITTEN("blob", "Uncompressed BLOB", BLOB),
+
+ MONITOR_BUF_PAGE_WRITTEN("zblob", "First Compressed BLOB", ZBLOB),
+
+ MONITOR_BUF_PAGE_WRITTEN("zblob2", "Subsequent Compressed BLOB",
+ ZBLOB2),
+
+ MONITOR_BUF_PAGE_WRITTEN("other", "other/unknown (old version InnoDB)",
+ OTHER),
+
+ /* ========== Counters for OS level operations ========== */
+ {"module_os", "os", "OS Level Operation",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_OS},
+
+ {"os_data_reads", "os",
+ "Number of reads initiated (innodb_data_reads)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FILE_READ},
+
+ {"os_data_writes", "os",
+ "Number of writes initiated (innodb_data_writes)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FILE_WRITE},
+
+ {"os_data_fsyncs", "os",
+ "Number of fsync() calls (innodb_data_fsyncs)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_OS_FSYNC},
+
+ {"os_pending_reads", "os", "Number of reads pending",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OS_PENDING_READS},
+
+ {"os_pending_writes", "os", "Number of writes pending",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OS_PENDING_WRITES},
+
+ {"os_log_bytes_written", "os",
+ "Bytes of log written (innodb_os_log_written)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_WRITTEN},
+
+ {"os_log_fsyncs", "os",
+ "Number of fsync log writes (innodb_os_log_fsyncs)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_FSYNC},
+
+ {"os_log_pending_fsyncs", "os",
+ "Number of pending fsync write (innodb_os_log_pending_fsyncs)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_PENDING_FSYNC},
+
+ {"os_log_pending_writes", "os",
+ "Number of pending log file writes (innodb_os_log_pending_writes)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_OS_LOG_PENDING_WRITES},
+
+ /* ========== Counters for Transaction Module ========== */
+ {"module_trx", "transaction", "Transaction Manager",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_TRX},
+
+ {"trx_rw_commits", "transaction", "Number of read-write transactions "
+ "committed",
+ MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_RW_COMMIT},
+
+ {"trx_ro_commits", "transaction", "Number of read-only transactions "
+ "committed",
+ MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_RO_COMMIT},
+
+ {"trx_nl_ro_commits", "transaction", "Number of non-locking "
+ "auto-commit read-only transactions committed",
+ MONITOR_NONE, MONITOR_DEFAULT_START, MONITOR_TRX_NL_RO_COMMIT},
+
+ {"trx_commits_insert_update", "transaction",
+ "Number of transactions committed with inserts and updates",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TRX_COMMIT_UNDO},
+
+ {"trx_rollbacks", "transaction",
+ "Number of transactions rolled back",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK},
+
+ {"trx_rollbacks_savepoint", "transaction",
+ "Number of transactions rolled back to savepoint",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK_SAVEPOINT},
+
+ {"trx_rollback_active", "transaction",
+ "Number of resurrected active transactions rolled back",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TRX_ROLLBACK_ACTIVE},
+
+ {"trx_active_transactions", "transaction",
+ "Number of active transactions",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_TRX_ACTIVE},
+
+ {"trx_rseg_history_len", "transaction",
+ "Length of the TRX_RSEG_HISTORY list",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_RSEG_HISTORY_LEN},
+
+ {"trx_undo_slots_used", "transaction", "Number of undo slots used",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_NUM_UNDO_SLOT_USED},
+
+ {"trx_undo_slots_cached", "transaction",
+ "Number of undo slots cached",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_NUM_UNDO_SLOT_CACHED},
+
+ {"trx_rseg_current_size", "transaction",
+ "Current rollback segment size in pages",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_RSEG_CUR_SIZE},
+
+ /* ========== Counters for Purge Module ========== */
+ {"module_purge", "purge", "Purge Module",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_PURGE},
+
+ {"purge_del_mark_records", "purge",
+ "Number of delete-marked rows purged",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_N_DEL_ROW_PURGE},
+
+ {"purge_upd_exist_or_extern_records", "purge",
+ "Number of purges on updates of existing records and "
+ " updates on delete marked record with externally stored field",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_N_UPD_EXIST_EXTERN},
+
+ {"purge_invoked", "purge",
+ "Number of times purge was invoked",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PURGE_INVOKED},
+
+ {"purge_undo_log_pages", "purge",
+ "Number of undo log pages handled by the purge",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PURGE_N_PAGE_HANDLED},
+
+ {"purge_dml_delay_usec", "purge",
+ "Microseconds DML to be delayed due to purge lagging",
+ MONITOR_DISPLAY_CURRENT,
+ MONITOR_DEFAULT_START, MONITOR_DML_PURGE_DELAY},
+
+ {"purge_stop_count", "purge",
+ "Number of times purge was stopped",
+ MONITOR_DISPLAY_CURRENT,
+ MONITOR_DEFAULT_START, MONITOR_PURGE_STOP_COUNT},
+
+ {"purge_resume_count", "purge",
+ "Number of times purge was resumed",
+ MONITOR_DISPLAY_CURRENT,
+ MONITOR_DEFAULT_START, MONITOR_PURGE_RESUME_COUNT},
+
+ /* ========== Counters for Recovery Module ========== */
+ {"module_log", "recovery", "Recovery Module",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_RECOVERY},
+
+ {"log_checkpoints", "recovery", "Number of checkpoints",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_NUM_CHECKPOINT},
+
+ {"log_lsn_last_flush", "recovery", "LSN of Last flush",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_FLUSHDISK},
+
+ {"log_lsn_last_checkpoint", "recovery", "LSN at last checkpoint",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_CHECKPOINT},
+
+ {"log_lsn_current", "recovery", "Current LSN value",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LSN_CURRENT},
+
+ {"log_lsn_checkpoint_age", "recovery",
+ "Current LSN value minus LSN at last checkpoint",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_LSN_CHECKPOINT_AGE},
+
+ {"log_lsn_buf_pool_oldest", "recovery",
+ "The oldest modified block LSN in the buffer pool",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_BUF_OLDEST_LSN},
+
+ {"log_max_modified_age_async", "recovery",
+ "Maximum LSN difference; when exceeded, start asynchronous preflush",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_MAX_AGE_ASYNC},
+
+ {"log_max_modified_age_sync", "recovery",
+ "Maximum LSN difference; when exceeded, start synchronous preflush",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_MAX_AGE_SYNC},
+
+ {"log_pending_log_writes", "recovery", "Pending log writes",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PENDING_LOG_WRITE},
+
+ {"log_pending_checkpoint_writes", "recovery", "Pending checkpoints",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PENDING_CHECKPOINT_WRITE},
+
+ {"log_num_log_io", "recovery", "Number of log I/Os",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_LOG_IO},
+
+ {"log_waits", "recovery",
+ "Number of log waits due to small log buffer (innodb_log_waits)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WAITS},
+
+ {"log_write_requests", "recovery",
+ "Number of log write requests (innodb_log_write_requests)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WRITE_REQUEST},
+
+ {"log_writes", "recovery",
+ "Number of log writes (innodb_log_writes)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_LOG_WRITES},
+
+ /* ========== Counters for Page Compression ========== */
+ {"module_compress", "compression", "Page Compression Info",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_PAGE},
+
+ {"compress_pages_compressed", "compression",
+ "Number of pages compressed", MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PAGE_COMPRESS},
+
+ {"compress_pages_decompressed", "compression",
+ "Number of pages decompressed",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PAGE_DECOMPRESS},
+
+ {"compression_pad_increments", "compression",
+ "Number of times padding is incremented to avoid compression failures",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PAD_INCREMENTS},
+
+ {"compression_pad_decrements", "compression",
+ "Number of times padding is decremented due to good compressibility",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PAD_DECREMENTS},
+
+ /* ========== Counters for Index ========== */
+ {"module_index", "index", "Index Manager",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_INDEX},
+
+ {"index_splits", "index", "Number of index splits",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_INDEX_SPLIT},
+
+ {"index_merges", "index", "Number of index merges",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_INDEX_MERGE},
+
+ /* ========== Counters for Adaptive Hash Index ========== */
+ {"module_adaptive_hash", "adaptive_hash_index", "Adpative Hash Index",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_ADAPTIVE_HASH},
+
+ {"adaptive_hash_searches", "adaptive_hash_index",
+ "Number of successful searches using Adaptive Hash Index",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_ADAPTIVE_HASH_SEARCH},
+
+ {"adaptive_hash_searches_btree", "adaptive_hash_index",
+ "Number of searches using B-tree on an index search",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE},
+
+ {"adaptive_hash_pages_added", "adaptive_hash_index",
+ "Number of index pages on which the Adaptive Hash Index is built",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_PAGE_ADDED},
+
+ {"adaptive_hash_pages_removed", "adaptive_hash_index",
+ "Number of index pages whose corresponding Adaptive Hash Index"
+ " entries were removed",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_PAGE_REMOVED},
+
+ {"adaptive_hash_rows_added", "adaptive_hash_index",
+ "Number of Adaptive Hash Index rows added",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_ADDED},
+
+ {"adaptive_hash_rows_removed", "adaptive_hash_index",
+ "Number of Adaptive Hash Index rows removed",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_REMOVED},
+
+ {"adaptive_hash_rows_deleted_no_hash_entry", "adaptive_hash_index",
+ "Number of rows deleted that did not have corresponding Adaptive Hash"
+ " Index entries",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_REMOVE_NOT_FOUND},
+
+ {"adaptive_hash_rows_updated", "adaptive_hash_index",
+ "Number of Adaptive Hash Index rows updated",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ADAPTIVE_HASH_ROW_UPDATED},
+
+ /* ========== Counters for tablespace ========== */
+ {"module_file", "file_system", "Tablespace and File System Manager",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_FIL_SYSTEM},
+
+ {"file_num_open_files", "file_system",
+ "Number of files currently open (innodb_num_open_files)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DISPLAY_CURRENT | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_N_FILE_OPENED},
+
+ /* ========== Counters for Change Buffer ========== */
+ {"module_ibuf_system", "change_buffer", "InnoDB Change Buffer",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_IBUF_SYSTEM},
+
+ {"ibuf_merges_insert", "change_buffer",
+ "Number of inserted records merged by change buffering",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_INSERT},
+
+ {"ibuf_merges_delete_mark", "change_buffer",
+ "Number of deleted records merged by change buffering",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DELETE},
+
+ {"ibuf_merges_delete", "change_buffer",
+ "Number of purge records merged by change buffering",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_PURGE},
+
+ {"ibuf_merges_discard_insert", "change_buffer",
+ "Number of insert merged operations discarded",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT},
+
+ {"ibuf_merges_discard_delete_mark", "change_buffer",
+ "Number of deleted merged operations discarded",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE},
+
+ {"ibuf_merges_discard_delete", "change_buffer",
+ "Number of purge merged operations discarded",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE},
+
+ {"ibuf_merges", "change_buffer", "Number of change buffer merges",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_MERGES},
+
+ {"ibuf_size", "change_buffer", "Change buffer size in pages",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_IBUF_SIZE},
+
+ /* ========== Counters for server operations ========== */
+ {"module_innodb", "innodb",
+ "Counter for general InnoDB server wide operations and properties",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_SERVER},
+
+ {"innodb_master_thread_sleeps", "server",
+ "Number of times (seconds) master thread sleeps",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_MASTER_THREAD_SLEEP},
+
+ {"innodb_activity_count", "server", "Current server activity count",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_SERVER_ACTIVITY},
+
+ {"innodb_master_active_loops", "server",
+ "Number of times master thread performs its tasks when"
+ " server is active",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_MASTER_ACTIVE_LOOPS},
+
+ {"innodb_master_idle_loops", "server",
+ "Number of times master thread performs its tasks when server is idle",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_MASTER_IDLE_LOOPS},
+
+ {"innodb_background_drop_table_usec", "server",
+ "Time (in microseconds) spent to process drop table list",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND},
+
+ {"innodb_ibuf_merge_usec", "server",
+ "Time (in microseconds) spent to process change buffer merge",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_SRV_IBUF_MERGE_MICROSECOND},
+
+ {"innodb_log_flush_usec", "server",
+ "Time (in microseconds) spent to flush log records",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_SRV_LOG_FLUSH_MICROSECOND},
+
+ {"innodb_mem_validate_usec", "server",
+ "Time (in microseconds) spent to do memory validation",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_SRV_MEM_VALIDATE_MICROSECOND},
+
+ {"innodb_master_purge_usec", "server",
+ "Time (in microseconds) spent by master thread to purge records",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_SRV_PURGE_MICROSECOND},
+
+ {"innodb_dict_lru_usec", "server",
+ "Time (in microseconds) spent to process DICT LRU list",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_SRV_DICT_LRU_MICROSECOND},
+
+ {"innodb_checkpoint_usec", "server",
+ "Time (in microseconds) spent by master thread to do checkpoint",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_SRV_CHECKPOINT_MICROSECOND},
+
+ {"innodb_dblwr_writes", "server",
+ "Number of doublewrite operations that have been performed"
+ " (innodb_dblwr_writes)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_DBLWR_WRITES},
+
+ {"innodb_dblwr_pages_written", "server",
+ "Number of pages that have been written for doublewrite operations"
+ " (innodb_dblwr_pages_written)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN},
+
+ {"innodb_page_size", "server",
+ "InnoDB page size in bytes (innodb_page_size)",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON | MONITOR_DISPLAY_CURRENT),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_SRV_PAGE_SIZE},
+
+ {"innodb_rwlock_s_spin_waits", "server",
+ "Number of rwlock spin waits due to shared latch request",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_S_SPIN_WAITS},
+
+ {"innodb_rwlock_x_spin_waits", "server",
+ "Number of rwlock spin waits due to exclusive latch request",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_SPIN_WAITS},
+
+ {"innodb_rwlock_s_spin_rounds", "server",
+ "Number of rwlock spin loop rounds due to shared latch request",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS},
+
+ {"innodb_rwlock_x_spin_rounds", "server",
+ "Number of rwlock spin loop rounds due to exclusive latch request",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS},
+
+ {"innodb_rwlock_s_os_waits", "server",
+ "Number of OS waits due to shared latch request",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_S_OS_WAITS},
+
+ {"innodb_rwlock_x_os_waits", "server",
+ "Number of OS waits due to exclusive latch request",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OVLD_RWLOCK_X_OS_WAITS},
+
+ /* ========== Counters for DML operations ========== */
+ {"module_dml", "dml", "Statistics for DMLs",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_DML_STATS},
+
+ {"dml_reads", "dml", "Number of rows read",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_READ},
+
+ {"dml_inserts", "dml", "Number of rows inserted",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_INSERTED},
+
+ {"dml_deletes", "dml", "Number of rows deleted",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_DELETED},
+
+ {"dml_updates", "dml", "Number of rows updated",
+ static_cast<monitor_type_t>(
+ MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+ MONITOR_DEFAULT_START, MONITOR_OLVD_ROW_UPDTATED},
+
+ /* ========== Counters for DDL operations ========== */
+ {"module_ddl", "ddl", "Statistics for DDLs",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_DDL_STATS},
+
+ {"ddl_background_drop_indexes", "ddl",
+ "Number of indexes waiting to be dropped after failed index creation",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_BACKGROUND_DROP_INDEX},
+
+ {"ddl_background_drop_tables", "ddl",
+ "Number of tables in background drop table list",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_BACKGROUND_DROP_TABLE},
+
+ {"ddl_online_create_index", "ddl",
+ "Number of indexes being created online",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ONLINE_CREATE_INDEX},
+
+ {"ddl_pending_alter_table", "ddl",
+ "Number of ALTER TABLE, CREATE INDEX, DROP INDEX in progress",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_PENDING_ALTER_TABLE},
+
+ /* ===== Counters for ICP (Index Condition Pushdown) Module ===== */
+ {"module_icp", "icp", "Index Condition Pushdown",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_MODULE_ICP},
+
+ {"icp_attempts", "icp",
+ "Number of attempts for index push-down condition checks",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ICP_ATTEMPTS},
+
+ {"icp_no_match", "icp", "Index push-down condition does not match",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ICP_NO_MATCH},
+
+ {"icp_out_of_range", "icp", "Index push-down condition out of range",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ICP_OUT_OF_RANGE},
+
+ {"icp_match", "icp", "Index push-down condition matches",
+ MONITOR_NONE,
+ MONITOR_DEFAULT_START, MONITOR_ICP_MATCH},
+
+ /* ========== To turn on/off reset all counters ========== */
+ {"all", "All Counters", "Turn on/off and reset all counters",
+ MONITOR_MODULE,
+ MONITOR_DEFAULT_START, MONITOR_ALL_COUNTER}
+};
+
+/* The "innodb_counter_value" array stores actual counter values */
+UNIV_INTERN monitor_value_t innodb_counter_value[NUM_MONITOR];
+
+/* monitor_set_tbl is used to record and determine whether a monitor
+has been turned on/off. */
+UNIV_INTERN ulint monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT
+ - 1) / NUM_BITS_ULINT];
+
+#ifndef HAVE_ATOMIC_BUILTINS_64
+/** Mutex protecting atomic operations on platforms that lack
+built-in operations for atomic memory access */
+ib_mutex_t monitor_mutex;
+
+/** Key to register monitor_mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t monitor_mutex_key;
+
+/****************************************************************//**
+Initialize the monitor subsystem. */
+UNIV_INTERN
+void
+srv_mon_create(void)
+/*================*/
+{
+ mutex_create(monitor_mutex_key, &monitor_mutex, SYNC_ANY_LATCH);
+}
+/****************************************************************//**
+Close the monitor subsystem. */
+UNIV_INTERN
+void
+srv_mon_free(void)
+/*==============*/
+{
+ mutex_free(&monitor_mutex);
+}
+#endif /* !HAVE_ATOMIC_BUILTINS_64 */
+
+/****************************************************************//**
+Get a monitor's "monitor_info" by its monitor id (index into the
+innodb_counter_info array.
+@return Point to corresponding monitor_info_t, or NULL if no such
+monitor */
+UNIV_INTERN
+monitor_info_t*
+srv_mon_get_info(
+/*=============*/
+ monitor_id_t monitor_id) /*!< id indexing into the
+ innodb_counter_info array */
+{
+ ut_a(monitor_id < NUM_MONITOR);
+
+ return((monitor_id < NUM_MONITOR)
+ ? &innodb_counter_info[monitor_id]
+ : NULL);
+}
+
+/****************************************************************//**
+Get monitor's name by its monitor id (indexing into the
+innodb_counter_info array.
+@return corresponding monitor name, or NULL if no such
+monitor */
+UNIV_INTERN
+const char*
+srv_mon_get_name(
+/*=============*/
+ monitor_id_t monitor_id) /*!< id index into the
+ innodb_counter_info array */
+{
+ ut_a(monitor_id < NUM_MONITOR);
+
+ return((monitor_id < NUM_MONITOR)
+ ? innodb_counter_info[monitor_id].monitor_name
+ : NULL);
+}
+
+/****************************************************************//**
+Turn on/off, reset monitor counters in a module. If module_id
+is MONITOR_ALL_COUNTER then turn on all monitor counters.
+turned on because it has already been turned on. */
+UNIV_INTERN
+void
+srv_mon_set_module_control(
+/*=======================*/
+ monitor_id_t module_id, /*!< in: Module ID as in
+ monitor_counter_id. If it is
+ set to MONITOR_ALL_COUNTER, this means
+ we shall turn on all the counters */
+ mon_option_t set_option) /*!< in: Turn on/off reset the
+ counter */
+{
+ ulint ix;
+ ulint start_id;
+ ibool set_current_module = FALSE;
+
+ ut_a(module_id <= NUM_MONITOR);
+ ut_a(UT_ARR_SIZE(innodb_counter_info) == NUM_MONITOR);
+
+ /* The module_id must be an ID of MONITOR_MODULE type */
+ ut_a(innodb_counter_info[module_id].monitor_type & MONITOR_MODULE);
+
+ /* start with the first monitor in the module. If module_id
+ is MONITOR_ALL_COUNTER, this means we need to turn on all
+ monitor counters. */
+ if (module_id == MONITOR_ALL_COUNTER) {
+ start_id = 1;
+ } else if (innodb_counter_info[module_id].monitor_type
+ & MONITOR_GROUP_MODULE) {
+ /* Counters in this module are set as a group together
+ and cannot be turned on/off individually. Need to set
+ the on/off bit in the module counter */
+ start_id = module_id;
+ set_current_module = TRUE;
+
+ } else {
+ start_id = module_id + 1;
+ }
+
+ for (ix = start_id; ix < NUM_MONITOR; ix++) {
+ /* if we hit the next module counter, we will
+ continue if we want to turn on all monitor counters,
+ and break if just turn on the counters in the
+ current module. */
+ if (innodb_counter_info[ix].monitor_type & MONITOR_MODULE) {
+
+ if (set_current_module) {
+ /* Continue to set on/off bit on current
+ module */
+ set_current_module = FALSE;
+ } else if (module_id == MONITOR_ALL_COUNTER) {
+ continue;
+ } else {
+ /* Hitting the next module, stop */
+ break;
+ }
+ }
+
+ /* Cannot turn on a monitor already been turned on. User
+ should be aware some counters are already on before
+ turn them on again (which could reset counter value) */
+ if (MONITOR_IS_ON(ix) && (set_option == MONITOR_TURN_ON)) {
+ fprintf(stderr, "Monitor '%s' is already enabled.\n",
+ srv_mon_get_name((monitor_id_t) ix));
+ continue;
+ }
+
+ /* For some existing counters (server status variables),
+ we will get its counter value at the start/stop time
+ to calculate the actual value during the time. */
+ if (innodb_counter_info[ix].monitor_type & MONITOR_EXISTING) {
+ srv_mon_process_existing_counter(
+ static_cast<monitor_id_t>(ix), set_option);
+ }
+
+ /* Currently support 4 operations on the monitor counters:
+ turn on, turn off, reset and reset all operations. */
+ switch (set_option) {
+ case MONITOR_TURN_ON:
+ MONITOR_ON(ix);
+ MONITOR_INIT(ix);
+ MONITOR_SET_START(ix);
+ break;
+
+ case MONITOR_TURN_OFF:
+ MONITOR_OFF(ix);
+ MONITOR_SET_OFF(ix);
+ break;
+
+ case MONITOR_RESET_VALUE:
+ srv_mon_reset(static_cast<monitor_id_t>(ix));
+ break;
+
+ case MONITOR_RESET_ALL_VALUE:
+ srv_mon_reset_all(static_cast<monitor_id_t>(ix));
+ break;
+
+ default:
+ ut_error;
+ }
+ }
+}
+
+/****************************************************************//**
+Get transaction system's rollback segment size in pages
+@return size in pages */
+static
+ulint
+srv_mon_get_rseg_size(void)
+/*=======================*/
+{
+ ulint i;
+ ulint value = 0;
+
+ /* rseg_array is a static array, so we can go through it without
+ mutex protection. In addition, we provide an estimate of the
+ total rollback segment size and to avoid mutex contention we
+ don't acquire the rseg->mutex" */
+ for (i = 0; i < TRX_SYS_N_RSEGS; ++i) {
+ const trx_rseg_t* rseg = trx_sys->rseg_array[i];
+
+ if (rseg != NULL) {
+ value += rseg->curr_size;
+ }
+ }
+
+ return(value);
+}
+
+/****************************************************************//**
+This function consolidates some existing server counters used
+by "system status variables". These existing system variables do not have
+mechanism to start/stop and reset the counters, so we simulate these
+controls by remembering the corresponding counter values when the
+corresponding monitors are turned on/off/reset, and do appropriate
+mathematics to deduct the actual value. Please also refer to
+srv_export_innodb_status() for related global counters used by
+the existing status variables.*/
+UNIV_INTERN
+void
+srv_mon_process_existing_counter(
+/*=============================*/
+ monitor_id_t monitor_id, /*!< in: the monitor's ID as in
+ monitor_counter_id */
+ mon_option_t set_option) /*!< in: Turn on/off reset the
+ counter */
+{
+ mon_type_t value;
+ monitor_info_t* monitor_info;
+ ibool update_min = FALSE;
+ buf_pool_stat_t stat;
+ buf_pools_list_size_t buf_pools_list_size;
+ ulint LRU_len;
+ ulint free_len;
+ ulint flush_list_len;
+
+ monitor_info = srv_mon_get_info(monitor_id);
+
+ ut_a(monitor_info->monitor_type & MONITOR_EXISTING);
+ ut_a(monitor_id < NUM_MONITOR);
+
+ /* Get the value from corresponding global variable */
+ switch (monitor_id) {
+ case MONITOR_OVLD_META_MEM_POOL:
+ value = srv_mem_pool_size;
+ break;
+
+ /* export_vars.innodb_buffer_pool_reads. Num Reads from
+ disk (page not in buffer) */
+ case MONITOR_OVLD_BUF_POOL_READS:
+ value = srv_stats.buf_pool_reads;
+ break;
+
+ /* innodb_buffer_pool_read_requests, the number of logical
+ read requests */
+ case MONITOR_OVLD_BUF_POOL_READ_REQUESTS:
+ buf_get_total_stat(&stat);
+ value = stat.n_page_gets;
+ break;
+
+ /* innodb_buffer_pool_write_requests, the number of
+ write request */
+ case MONITOR_OVLD_BUF_POOL_WRITE_REQUEST:
+ value = srv_stats.buf_pool_write_requests;
+ break;
+
+ /* innodb_buffer_pool_wait_free */
+ case MONITOR_OVLD_BUF_POOL_WAIT_FREE:
+ value = srv_stats.buf_pool_wait_free;
+ break;
+
+ /* innodb_buffer_pool_read_ahead */
+ case MONITOR_OVLD_BUF_POOL_READ_AHEAD:
+ buf_get_total_stat(&stat);
+ value = stat.n_ra_pages_read;
+ break;
+
+ /* innodb_buffer_pool_read_ahead_evicted */
+ case MONITOR_OVLD_BUF_POOL_READ_AHEAD_EVICTED:
+ buf_get_total_stat(&stat);
+ value = stat.n_ra_pages_evicted;
+ break;
+
+ /* innodb_buffer_pool_pages_total */
+ case MONITOR_OVLD_BUF_POOL_PAGE_TOTAL:
+ value = buf_pool_get_n_pages();
+ break;
+
+ /* innodb_buffer_pool_pages_misc */
+ case MONITOR_OVLD_BUF_POOL_PAGE_MISC:
+ buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
+ value = buf_pool_get_n_pages() - LRU_len - free_len;
+ break;
+
+ /* innodb_buffer_pool_pages_data */
+ case MONITOR_OVLD_BUF_POOL_PAGES_DATA:
+ buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
+ value = LRU_len;
+ break;
+
+ /* innodb_buffer_pool_bytes_data */
+ case MONITOR_OVLD_BUF_POOL_BYTES_DATA:
+ buf_get_total_list_size_in_bytes(&buf_pools_list_size);
+ value = buf_pools_list_size.LRU_bytes
+ + buf_pools_list_size.unzip_LRU_bytes;
+ break;
+
+ /* innodb_buffer_pool_pages_dirty */
+ case MONITOR_OVLD_BUF_POOL_PAGES_DIRTY:
+ buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
+ value = flush_list_len;
+ break;
+
+ /* innodb_buffer_pool_bytes_dirty */
+ case MONITOR_OVLD_BUF_POOL_BYTES_DIRTY:
+ buf_get_total_list_size_in_bytes(&buf_pools_list_size);
+ value = buf_pools_list_size.flush_list_bytes;
+ break;
+
+ /* innodb_buffer_pool_pages_free */
+ case MONITOR_OVLD_BUF_POOL_PAGES_FREE:
+ buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
+ value = free_len;
+ break;
+
+ /* innodb_pages_created, the number of pages created */
+ case MONITOR_OVLD_PAGE_CREATED:
+ buf_get_total_stat(&stat);
+ value = stat.n_pages_created;
+ break;
+
+ /* innodb_pages_written, the number of page written */
+ case MONITOR_OVLD_PAGES_WRITTEN:
+ buf_get_total_stat(&stat);
+ value = stat.n_pages_written;
+ break;
+
+ /* innodb_pages_read */
+ case MONITOR_OVLD_PAGES_READ:
+ buf_get_total_stat(&stat);
+ value = stat.n_pages_read;
+ break;
+
+ /* innodb_data_reads, the total number of data reads */
+ case MONITOR_OVLD_BYTE_READ:
+ value = srv_stats.data_read;
+ break;
+
+ /* innodb_data_writes, the total number of data writes. */
+ case MONITOR_OVLD_BYTE_WRITTEN:
+ value = srv_stats.data_written;
+ break;
+
+ /* innodb_data_reads, the total number of data reads. */
+ case MONITOR_OVLD_OS_FILE_READ:
+ value = os_n_file_reads;
+ break;
+
+ /* innodb_data_writes, the total number of data writes*/
+ case MONITOR_OVLD_OS_FILE_WRITE:
+ value = os_n_file_writes;
+ break;
+
+ /* innodb_data_fsyncs, number of fsync() operations so far. */
+ case MONITOR_OVLD_OS_FSYNC:
+ value = os_n_fsyncs;
+ break;
+
+ /* innodb_os_log_written */
+ case MONITOR_OVLD_OS_LOG_WRITTEN:
+ value = (mon_type_t) srv_stats.os_log_written;
+ break;
+
+ /* innodb_os_log_fsyncs */
+ case MONITOR_OVLD_OS_LOG_FSYNC:
+ value = fil_n_log_flushes;
+ break;
+
+ /* innodb_os_log_pending_fsyncs */
+ case MONITOR_OVLD_OS_LOG_PENDING_FSYNC:
+ value = fil_n_pending_log_flushes;
+ update_min = TRUE;
+ break;
+
+ /* innodb_os_log_pending_writes */
+ case MONITOR_OVLD_OS_LOG_PENDING_WRITES:
+ value = srv_stats.os_log_pending_writes;
+ update_min = TRUE;
+ break;
+
+ /* innodb_log_waits */
+ case MONITOR_OVLD_LOG_WAITS:
+ value = srv_stats.log_waits;
+ break;
+
+ /* innodb_log_write_requests */
+ case MONITOR_OVLD_LOG_WRITE_REQUEST:
+ value = srv_stats.log_write_requests;
+ break;
+
+ /* innodb_log_writes */
+ case MONITOR_OVLD_LOG_WRITES:
+ value = srv_stats.log_writes;
+ break;
+
+ /* innodb_dblwr_writes */
+ case MONITOR_OVLD_SRV_DBLWR_WRITES:
+ value = srv_stats.dblwr_writes;
+ break;
+
+ /* innodb_dblwr_pages_written */
+ case MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN:
+ value = srv_stats.dblwr_pages_written;
+ break;
+
+ /* innodb_page_size */
+ case MONITOR_OVLD_SRV_PAGE_SIZE:
+ value = UNIV_PAGE_SIZE;
+ break;
+
+ case MONITOR_OVLD_RWLOCK_S_SPIN_WAITS:
+ value = rw_lock_stats.rw_s_spin_wait_count;
+ break;
+
+ case MONITOR_OVLD_RWLOCK_X_SPIN_WAITS:
+ value = rw_lock_stats.rw_x_spin_wait_count;
+ break;
+
+ case MONITOR_OVLD_RWLOCK_S_SPIN_ROUNDS:
+ value = rw_lock_stats.rw_s_spin_round_count;
+ break;
+
+ case MONITOR_OVLD_RWLOCK_X_SPIN_ROUNDS:
+ value = rw_lock_stats.rw_x_spin_round_count;
+ break;
+
+ case MONITOR_OVLD_RWLOCK_S_OS_WAITS:
+ value = rw_lock_stats.rw_s_os_wait_count;
+ break;
+
+ case MONITOR_OVLD_RWLOCK_X_OS_WAITS:
+ value = rw_lock_stats.rw_x_os_wait_count;
+ break;
+
+ case MONITOR_OVLD_BUFFER_POOL_SIZE:
+ value = srv_buf_pool_size;
+ break;
+
+ /* innodb_rows_read */
+ case MONITOR_OLVD_ROW_READ:
+ value = srv_stats.n_rows_read;
+ break;
+
+ /* innodb_rows_inserted */
+ case MONITOR_OLVD_ROW_INSERTED:
+ value = srv_stats.n_rows_inserted;
+ break;
+
+ /* innodb_rows_deleted */
+ case MONITOR_OLVD_ROW_DELETED:
+ value = srv_stats.n_rows_deleted;
+ break;
+
+ /* innodb_rows_updated */
+ case MONITOR_OLVD_ROW_UPDTATED:
+ value = srv_stats.n_rows_updated;
+ break;
+
+ /* innodb_row_lock_current_waits */
+ case MONITOR_OVLD_ROW_LOCK_CURRENT_WAIT:
+ value = srv_stats.n_lock_wait_current_count;
+ break;
+
+ /* innodb_row_lock_time */
+ case MONITOR_OVLD_LOCK_WAIT_TIME:
+ value = srv_stats.n_lock_wait_time / 1000;
+ break;
+
+ /* innodb_row_lock_time_max */
+ case MONITOR_OVLD_LOCK_MAX_WAIT_TIME:
+ value = lock_sys->n_lock_max_wait_time / 1000;
+ break;
+
+ /* innodb_row_lock_time_avg */
+ case MONITOR_OVLD_LOCK_AVG_WAIT_TIME:
+ if (srv_stats.n_lock_wait_count > 0) {
+ value = srv_stats.n_lock_wait_time / 1000
+ / srv_stats.n_lock_wait_count;
+ } else {
+ value = 0;
+ }
+ break;
+
+ /* innodb_row_lock_waits */
+ case MONITOR_OVLD_ROW_LOCK_WAIT:
+ value = srv_stats.n_lock_wait_count;
+ break;
+
+ case MONITOR_RSEG_HISTORY_LEN:
+ value = trx_sys->rseg_history_len;
+ break;
+
+ case MONITOR_RSEG_CUR_SIZE:
+ value = srv_mon_get_rseg_size();
+ break;
+
+ case MONITOR_OVLD_N_FILE_OPENED:
+ value = fil_n_file_opened;
+ break;
+
+ case MONITOR_OVLD_IBUF_MERGE_INSERT:
+ value = ibuf->n_merged_ops[IBUF_OP_INSERT];
+ break;
+
+ case MONITOR_OVLD_IBUF_MERGE_DELETE:
+ value = ibuf->n_merged_ops[IBUF_OP_DELETE_MARK];
+ break;
+
+ case MONITOR_OVLD_IBUF_MERGE_PURGE:
+ value = ibuf->n_merged_ops[IBUF_OP_DELETE];
+ break;
+
+ case MONITOR_OVLD_IBUF_MERGE_DISCARD_INSERT:
+ value = ibuf->n_discarded_ops[IBUF_OP_INSERT];
+ break;
+
+ case MONITOR_OVLD_IBUF_MERGE_DISCARD_DELETE:
+ value = ibuf->n_discarded_ops[IBUF_OP_DELETE_MARK];
+ break;
+
+ case MONITOR_OVLD_IBUF_MERGE_DISCARD_PURGE:
+ value = ibuf->n_discarded_ops[IBUF_OP_DELETE];
+ break;
+
+ case MONITOR_OVLD_IBUF_MERGES:
+ value = ibuf->n_merges;
+ break;
+
+ case MONITOR_OVLD_IBUF_SIZE:
+ value = ibuf->size;
+ break;
+
+ case MONITOR_OVLD_SERVER_ACTIVITY:
+ value = srv_get_activity_count();
+ break;
+
+ case MONITOR_OVLD_LSN_FLUSHDISK:
+ value = (mon_type_t) log_sys->flushed_to_disk_lsn;
+ break;
+
+ case MONITOR_OVLD_LSN_CURRENT:
+ value = (mon_type_t) log_sys->lsn;
+ break;
+
+ case MONITOR_OVLD_BUF_OLDEST_LSN:
+ value = (mon_type_t) buf_pool_get_oldest_modification();
+ break;
+
+ case MONITOR_OVLD_LSN_CHECKPOINT:
+ value = (mon_type_t) log_sys->last_checkpoint_lsn;
+ break;
+
+ case MONITOR_OVLD_MAX_AGE_ASYNC:
+ value = log_sys->max_modified_age_async;
+ break;
+
+ case MONITOR_OVLD_MAX_AGE_SYNC:
+ value = log_sys->max_modified_age_sync;
+ break;
+
+ case MONITOR_OVLD_ADAPTIVE_HASH_SEARCH:
+ value = btr_cur_n_sea;
+ break;
+
+ case MONITOR_OVLD_ADAPTIVE_HASH_SEARCH_BTREE:
+ value = btr_cur_n_non_sea;
+ break;
+
+ default:
+ ut_error;
+ }
+
+ switch (set_option) {
+ case MONITOR_TURN_ON:
+ /* Save the initial counter value in mon_start_value
+ field */
+ MONITOR_SAVE_START(monitor_id, value);
+ return;
+
+ case MONITOR_TURN_OFF:
+ /* Save the counter value to mon_last_value when we
+ turn off the monitor but not yet reset. Note the
+ counter has not yet been set to off in the bitmap
+ table for normal turn off. We need to check the
+ count status (on/off) to avoid reset the value
+ for an already off conte */
+ if (MONITOR_IS_ON(monitor_id)) {
+ srv_mon_process_existing_counter(monitor_id,
+ MONITOR_GET_VALUE);
+ MONITOR_SAVE_LAST(monitor_id);
+ }
+ return;
+
+ case MONITOR_GET_VALUE:
+ if (MONITOR_IS_ON(monitor_id)) {
+
+ /* If MONITOR_DISPLAY_CURRENT bit is on, we
+ only record the current value, rather than
+ incremental value over a period. Most of
+` this type of counters are resource related
+ counters such as number of buffer pages etc. */
+ if (monitor_info->monitor_type
+ & MONITOR_DISPLAY_CURRENT) {
+ MONITOR_SET(monitor_id, value);
+ } else {
+ /* Most status counters are montonically
+ increasing, no need to update their
+ minimum values. Only do so
+ if "update_min" set to TRUE */
+ MONITOR_SET_DIFF(monitor_id, value);
+
+ if (update_min
+ && (MONITOR_VALUE(monitor_id)
+ < MONITOR_MIN_VALUE(monitor_id))) {
+ MONITOR_MIN_VALUE(monitor_id) =
+ MONITOR_VALUE(monitor_id);
+ }
+ }
+ }
+ return;
+
+ case MONITOR_RESET_VALUE:
+ if (!MONITOR_IS_ON(monitor_id)) {
+ MONITOR_LAST_VALUE(monitor_id) = 0;
+ }
+ return;
+
+ /* Nothing special for reset all operation for these existing
+ counters */
+ case MONITOR_RESET_ALL_VALUE:
+ return;
+ }
+}
+
+/*************************************************************//**
+Reset a monitor, create a new base line with the current monitor
+value. This baseline is recorded by MONITOR_VALUE_RESET(monitor) */
+UNIV_INTERN
+void
+srv_mon_reset(
+/*==========*/
+ monitor_id_t monitor) /*!< in: monitor id */
+{
+ ibool monitor_was_on;
+
+ monitor_was_on = MONITOR_IS_ON(monitor);
+
+ if (monitor_was_on) {
+ /* Temporarily turn off the counter for the resetting
+ operation */
+ MONITOR_OFF(monitor);
+ }
+
+ /* Before resetting the current monitor value, first
+ calculate and set the max/min value since monitor
+ start */
+ srv_mon_calc_max_since_start(monitor);
+ srv_mon_calc_min_since_start(monitor);
+
+ /* Monitors with MONITOR_DISPLAY_CURRENT bit
+ are not incremental, no need to remember
+ the reset value. */
+ if (innodb_counter_info[monitor].monitor_type
+ & MONITOR_DISPLAY_CURRENT) {
+ MONITOR_VALUE_RESET(monitor) = 0;
+ } else {
+ /* Remember the new baseline */
+ MONITOR_VALUE_RESET(monitor) = MONITOR_VALUE_RESET(monitor)
+ + MONITOR_VALUE(monitor);
+ }
+
+ /* Reset the counter value */
+ MONITOR_VALUE(monitor) = 0;
+ MONITOR_MAX_VALUE(monitor) = MAX_RESERVED;
+ MONITOR_MIN_VALUE(monitor) = MIN_RESERVED;
+
+ MONITOR_FIELD((monitor), mon_reset_time) = time(NULL);
+
+ if (monitor_was_on) {
+ MONITOR_ON(monitor);
+ }
+}
+
+/*************************************************************//**
+Turn on monitor counters that are marked as default ON. */
+UNIV_INTERN
+void
+srv_mon_default_on(void)
+/*====================*/
+{
+ ulint ix;
+
+ for (ix = 0; ix < NUM_MONITOR; ix++) {
+ if (innodb_counter_info[ix].monitor_type
+ & MONITOR_DEFAULT_ON) {
+ /* Turn on monitor counters that are default on */
+ MONITOR_ON(ix);
+ MONITOR_INIT(ix);
+ MONITOR_SET_START(ix);
+ }
+ }
+}
+#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/srv/srv0srv.c b/storage/xtradb/srv/srv0srv.c
deleted file mode 100644
index 478fc0505bc..00000000000
--- a/storage/xtradb/srv/srv0srv.c
+++ /dev/null
@@ -1,4251 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2008, 2009 Google Inc.
-Copyright (c) 2009, Percona Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file srv/srv0srv.c
-The database server main program
-
-NOTE: SQL Server 7 uses something which the documentation
-calls user mode scheduled threads (UMS threads). One such
-thread is usually allocated per processor. Win32
-documentation does not know any UMS threads, which suggests
-that the concept is internal to SQL Server 7. It may mean that
-SQL Server 7 does all the scheduling of threads itself, even
-in i/o waits. We should maybe modify InnoDB to use the same
-technique, because thread switches within NT may be too slow.
-
-SQL Server 7 also mentions fibers, which are cooperatively
-scheduled threads. They can boost performance by 5 %,
-according to the Delaney and Soukup's book.
-
-Windows 2000 will have something called thread pooling
-(see msdn website), which we could possibly use.
-
-Another possibility could be to use some very fast user space
-thread library. This might confuse NT though.
-
-Created 10/8/1995 Heikki Tuuri
-*******************************************************/
-
-/* Dummy comment */
-#include "m_string.h" /* for my_sys.h */
-#include "my_sys.h" /* DEBUG_SYNC_C */
-#include "srv0srv.h"
-
-#include "ut0mem.h"
-#include "ut0ut.h"
-#include "os0proc.h"
-#include "mem0mem.h"
-#include "mem0pool.h"
-#include "sync0sync.h"
-#include "que0que.h"
-#include "log0online.h"
-#include "log0recv.h"
-#include "pars0pars.h"
-#include "usr0sess.h"
-#include "lock0lock.h"
-#include "trx0purge.h"
-#include "ibuf0ibuf.h"
-#include "buf0flu.h"
-#include "buf0lru.h"
-#include "btr0sea.h"
-#include "dict0load.h"
-#include "dict0boot.h"
-#include "srv0start.h"
-#include "row0mysql.h"
-#include "ha_prototypes.h"
-#include "trx0i_s.h"
-#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
-#include "read0read.h"
-#include "mysql/plugin.h"
-#include "mysql/service_thd_wait.h"
-
-/* prototypes of new functions added to ha_innodb.cc for kill_idle_transaction */
-ibool innobase_thd_is_idle(const void* thd);
-ib_int64_t innobase_thd_get_start_time(const void* thd);
-void innobase_thd_kill(ulong thd_id);
-ulong innobase_thd_get_thread_id(const void* thd);
-
-/* prototypes for new functions added to ha_innodb.cc */
-ibool innobase_get_slow_log();
-
-/* The following counter is incremented whenever there is some user activity
-in the server */
-UNIV_INTERN ulint srv_activity_count = 0;
-
-/* The following is the maximum allowed duration of a lock wait. */
-UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
-
-/**/
-UNIV_INTERN long long srv_kill_idle_transaction = 0;
-
-/* How much data manipulation language (DML) statements need to be delayed,
-in microseconds, in order to reduce the lagging of the purge thread. */
-UNIV_INTERN ulint srv_dml_needed_delay = 0;
-
-UNIV_INTERN ibool srv_lock_timeout_active = FALSE;
-UNIV_INTERN ibool srv_monitor_active = FALSE;
-UNIV_INTERN ibool srv_error_monitor_active = FALSE;
-
-UNIV_INTERN const char* srv_main_thread_op_info = "";
-
-/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
-UNIV_INTERN const char srv_mysql50_table_name_prefix[9] = "#mysql50#";
-
-/* Server parameters which are read from the initfile */
-
-/* The following three are dir paths which are catenated before file
-names, where the file name itself may also contain a path */
-
-UNIV_INTERN char* srv_data_home = NULL;
-#ifdef UNIV_LOG_ARCHIVE
-UNIV_INTERN char* srv_arch_dir = NULL;
-#endif /* UNIV_LOG_ARCHIVE */
-
-/** store to its own file each table created by an user; data
-dictionary tables are in the system tablespace 0 */
-UNIV_INTERN my_bool srv_file_per_table;
-/** The file format to use on new *.ibd files. */
-UNIV_INTERN ulint srv_file_format = 0;
-/** Whether to check file format during startup. A value of
-DICT_TF_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
-set it to the highest format we support. */
-UNIV_INTERN ulint srv_max_file_format_at_startup = DICT_TF_FORMAT_MAX;
-
-#if DICT_TF_FORMAT_51
-# error "DICT_TF_FORMAT_51 must be 0!"
-#endif
-/** Place locks to records only i.e. do not use next-key locking except
-on duplicate key checking and foreign key checking */
-UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
-
-/* If this flag is TRUE, then we will use the native aio of the
-OS (provided we compiled Innobase with it in), otherwise we will
-use simulated aio we build below with threads.
-Currently we support native aio on windows and linux */
-UNIV_INTERN my_bool srv_use_native_aio = TRUE;
-
-#ifdef __WIN__
-/* Windows native condition variables. We use runtime loading / function
-pointers, because they are not available on Windows Server 2003 and
-Windows XP/2000.
-
-We use condition for events on Windows if possible, even if os_event
-resembles Windows kernel event object well API-wise. The reason is
-performance, kernel objects are heavyweights and WaitForSingleObject() is a
-performance killer causing calling thread to context switch. Besides, Innodb
-is preallocating large number (often millions) of os_events. With kernel event
-objects it takes a big chunk out of non-paged pool, which is better suited
-for tasks like IO than for storing idle event objects. */
-UNIV_INTERN ibool srv_use_native_conditions = FALSE;
-#endif /* __WIN__ */
-
-UNIV_INTERN ulint srv_n_data_files = 0;
-UNIV_INTERN char** srv_data_file_names = NULL;
-/* size in database pages */
-UNIV_INTERN ulint* srv_data_file_sizes = NULL;
-
-UNIV_INTERN char* srv_doublewrite_file = NULL;
-
-UNIV_INTERN ibool srv_recovery_stats = FALSE;
-
-UNIV_INTERN my_bool srv_track_changed_pages = FALSE;
-
-UNIV_INTERN ib_uint64_t srv_max_bitmap_file_size = 100 * 1024 * 1024;
-
-UNIV_INTERN ulonglong srv_max_changed_pages = 0;
-
-/** When TRUE, fake change transcations take S rather than X row locks.
- When FALSE, row locks are not taken at all. */
-UNIV_INTERN my_bool srv_fake_changes_locks = TRUE;
-
-/* if TRUE, then we auto-extend the last data file */
-UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
-/* if != 0, this tells the max size auto-extending may increase the
-last data file size */
-UNIV_INTERN ulint srv_last_file_size_max = 0;
-/* If the last data file is auto-extended, we add this
-many pages to it at a time */
-UNIV_INTERN ulong srv_auto_extend_increment = 8;
-UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL;
-
-/* If the following is TRUE we do not allow inserts etc. This protects
-the user from forgetting the 'newraw' keyword to my.cnf */
-
-UNIV_INTERN ibool srv_created_new_raw = FALSE;
-
-UNIV_INTERN char** srv_log_group_home_dirs = NULL;
-
-UNIV_INTERN ulint srv_n_log_groups = ULINT_MAX;
-UNIV_INTERN ulint srv_n_log_files = ULINT_MAX;
-/* size in database pages */
-UNIV_INTERN ulint srv_log_file_size = ULINT_MAX;
-/* size in database pages */
-UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
-//UNIV_INTERN ulong srv_flush_log_at_trx_commit = 1;
-UNIV_INTERN char srv_use_global_flush_log_at_trx_commit = TRUE;
-
-/* Try to flush dirty pages so as to avoid IO bursts at
-the checkpoints. */
-UNIV_INTERN char srv_adaptive_flushing = TRUE;
-
-UNIV_INTERN ulong srv_show_locks_held = 10;
-UNIV_INTERN ulong srv_show_verbose_locks = 0;
-
-/** Maximum number of times allowed to conditionally acquire
-mutex before switching to blocking wait on the mutex */
-#define MAX_MUTEX_NOWAIT 20
-
-/** Check whether the number of failed nonblocking mutex
-acquisition attempts exceeds maximum allowed value. If so,
-srv_printf_innodb_monitor() will request mutex acquisition
-with mutex_enter(), which will wait until it gets the mutex. */
-#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
-
-/** The sort order table of the MySQL latin1_swedish_ci character set
-collation */
-UNIV_INTERN const byte* srv_latin1_ordering;
-
-/* use os/external memory allocator */
-UNIV_INTERN my_bool srv_use_sys_malloc = TRUE;
-/* requested size in kilobytes */
-UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX;
-/* force virtual page preallocation (prefault) */
-UNIV_INTERN my_bool srv_buf_pool_populate = FALSE;
-/* requested number of buffer pool instances */
-UNIV_INTERN ulint srv_buf_pool_instances = 1;
-/* previously requested size */
-UNIV_INTERN ulint srv_buf_pool_old_size;
-/* current size in kilobytes */
-UNIV_INTERN ulint srv_buf_pool_curr_size = 0;
-/* size in bytes */
-UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
-UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
-
-/* This parameter is deprecated. Use srv_n_io_[read|write]_threads
-instead. */
-UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
-UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
-UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX;
-
-/* Switch to enable random read ahead. */
-UNIV_INTERN my_bool srv_random_read_ahead = FALSE;
-
-/* The universal page size of the database */
-UNIV_INTERN ulint srv_page_size_shift = 0;
-UNIV_INTERN ulint srv_page_size = 0;
-
-/* The log block size */
-UNIV_INTERN ulint srv_log_block_size = 0;
-
-/* User settable value of the number of pages that must be present
-in the buffer cache and accessed sequentially for InnoDB to trigger a
-readahead request. */
-UNIV_INTERN ulong srv_read_ahead_threshold = 56;
-
-#ifdef UNIV_LOG_ARCHIVE
-UNIV_INTERN ibool srv_log_archive_on = FALSE;
-UNIV_INTERN ibool srv_archive_recovery = 0;
-UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn;
-#endif /* UNIV_LOG_ARCHIVE */
-
-/* This parameter is used to throttle the number of insert buffers that are
-merged in a batch. By increasing this parameter on a faster disk you can
-possibly reduce the number of I/O operations performed to complete the
-merge operation. The value of this parameter is used as is by the
-background loop when the system is idle (low load), on a busy system
-the parameter is scaled down by a factor of 4, this is to avoid putting
-a heavier load on the I/O sub system. */
-
-UNIV_INTERN ulong srv_insert_buffer_batch_size = 20;
-
-UNIV_INTERN char* srv_file_flush_method_str = NULL;
-UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
-
-UNIV_INTERN ulint srv_max_n_open_files = 300;
-
-/* Number of IO operations per second the server can do */
-UNIV_INTERN ulong srv_io_capacity = 200;
-
-/* The InnoDB main thread tries to keep the ratio of modified pages
-in the buffer pool to all database pages in the buffer pool smaller than
-the following number. But it is not guaranteed that the value stays below
-that during a time of heavy update/insert activity. */
-
-UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
-
-/* the number of purge threads to use from the worker pool (currently 0 or 1).*/
-UNIV_INTERN ulong srv_n_purge_threads = 0;
-
-/* the number of pages to purge in one batch */
-UNIV_INTERN ulong srv_purge_batch_size = 20;
-
-/* the number of rollback segments to use */
-UNIV_INTERN ulong srv_rollback_segments = TRX_SYS_N_RSEGS;
-
-/* Internal setting for "innodb_stats_method". Decides how InnoDB treats
-NULL value when collecting statistics. By default, it is set to
-SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
-UNIV_INTERN ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
-
-/** Time in seconds between automatic buffer pool dumps */
-UNIV_INTERN uint srv_auto_lru_dump = 0;
-
-/** Whether startup should be blocked until buffer pool is fully restored */
-UNIV_INTERN ibool srv_blocking_lru_restore;
-
-/* structure to pass status variables to MySQL */
-UNIV_INTERN export_struc export_vars;
-
-/* If the following is != 0 we do not allow inserts etc. This protects
-the user from forgetting the innodb_force_recovery keyword to my.cnf */
-
-UNIV_INTERN ulint srv_force_recovery = 0;
-/*-----------------------*/
-/* We are prepared for a situation that we have this many threads waiting for
-a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
-value. */
-
-UNIV_INTERN ulint srv_max_n_threads = 0;
-
-/* The following controls how many threads we let inside InnoDB concurrently:
-threads waiting for locks are not counted into the number because otherwise
-we could get a deadlock. MySQL creates a thread for each user session, and
-semaphore contention and convoy problems can occur withput this restriction.
-Value 10 should be good if there are less than 4 processors + 4 disks in the
-computer. Bigger computers need bigger values. Value 0 will disable the
-concurrency check. */
-
-UNIV_INTERN ibool srv_thread_concurrency_timer_based = FALSE;
-UNIV_INTERN ulong srv_thread_concurrency = 0;
-
-/* this mutex protects srv_conc data structures */
-UNIV_INTERN os_fast_mutex_t srv_conc_mutex;
-/* number of transactions that have declared_to_be_inside_innodb set.
-It used to be a non-error for this value to drop below zero temporarily.
-This is no longer true. We'll, however, keep the lint datatype to add
-assertions to catch any corner cases that we may have missed. */
-UNIV_INTERN lint srv_conc_n_threads = 0;
-/* number of OS threads waiting in the FIFO for a permission to enter
-InnoDB */
-UNIV_INTERN ulint srv_conc_n_waiting_threads = 0;
-
-/* print all user-level transactions deadlocks to mysqld stderr */
-UNIV_INTERN my_bool srv_print_all_deadlocks = FALSE;
-
-/* Produce a stacktrace on long semaphore wait */
-UNIV_INTERN my_bool srv_use_stacktrace = FALSE;
-
-typedef struct srv_conc_slot_struct srv_conc_slot_t;
-struct srv_conc_slot_struct{
- os_event_t event; /*!< event to wait */
- ibool reserved; /*!< TRUE if slot
- reserved */
- ibool wait_ended; /*!< TRUE when another
- thread has already set
- the event and the
- thread in this slot is
- free to proceed; but
- reserved may still be
- TRUE at that point */
- UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /*!< queue node */
-};
-
-/* queue of threads waiting to get in */
-UNIV_INTERN UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue;
-/* array of wait slots */
-UNIV_INTERN srv_conc_slot_t* srv_conc_slots;
-
-/* Number of times a thread is allowed to enter InnoDB within the same
-SQL query after it has once got the ticket at srv_conc_enter_innodb */
-#define SRV_FREE_TICKETS_TO_ENTER srv_n_free_tickets_to_enter
-#define SRV_THREAD_SLEEP_DELAY srv_thread_sleep_delay
-/*-----------------------*/
-/* If the following is set to 1 then we do not run purge and insert buffer
-merge to completion before shutdown. If it is set to 2, do not even flush the
-buffer pool to data files at the shutdown: we effectively 'crash'
-InnoDB (but lose no committed transactions). */
-UNIV_INTERN ulint srv_fast_shutdown = 0;
-
-/* Generate a innodb_status.<pid> file */
-UNIV_INTERN ibool srv_innodb_status = FALSE;
-
-/* When estimating number of different key values in an index, sample
-this many index pages */
-UNIV_INTERN unsigned long long srv_stats_sample_pages = 8;
-UNIV_INTERN ulint srv_stats_auto_update = 1;
-UNIV_INTERN ulint srv_stats_update_need_lock = 1;
-UNIV_INTERN ibool srv_use_sys_stats_table = FALSE;
-#ifdef UNIV_DEBUG
-UNIV_INTERN ulong srv_sys_stats_root_page = 0;
-#endif
-
-UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
-UNIV_INTERN ibool srv_use_atomic_writes = FALSE;
-#ifdef HAVE_POSIX_FALLOCATE
-UNIV_INTERN ibool srv_use_posix_fallocate = FALSE;
-#endif
-
-UNIV_INTERN ibool srv_use_checksums = TRUE;
-UNIV_INTERN ibool srv_fast_checksum = FALSE;
-
-UNIV_INTERN ulong srv_replication_delay = 0;
-
-UNIV_INTERN long long srv_ibuf_max_size = 0;
-UNIV_INTERN ulong srv_ibuf_active_contract = 0; /* 0:disable 1:enable */
-UNIV_INTERN ulong srv_ibuf_accel_rate = 100;
-#define PCT_IBUF_IO(pct) ((ulint) (srv_io_capacity * srv_ibuf_accel_rate * ((double) pct / 10000.0)))
-
-UNIV_INTERN ulint srv_checkpoint_age_target = 0;
-UNIV_INTERN ulong srv_flush_neighbor_pages = 1; /* 0:disable 1:area 2:contiguous */
-
-UNIV_INTERN ulint srv_deprecated_enable_unsafe_group_commit = 0;
-UNIV_INTERN ulong srv_read_ahead = 3; /* 1: random 2: linear 3: Both */
-UNIV_INTERN ulong srv_adaptive_flushing_method = 0; /* 0: native 1: estimate 2: keep_average */
-
-UNIV_INTERN ulong srv_expand_import = 0; /* 0:disable 1:enable */
-UNIV_INTERN ulong srv_pass_corrupt_table = 0; /* 0:disable 1:enable */
-
-UNIV_INTERN ulint srv_dict_size_limit = 0;
-/*-------------------------------------------*/
-UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
-UNIV_INTERN ulong srv_n_free_tickets_to_enter = 500;
-UNIV_INTERN ulong srv_thread_sleep_delay = 10000;
-UNIV_INTERN ulong srv_spin_wait_delay = 6;
-UNIV_INTERN ibool srv_priority_boost = TRUE;
-
-#ifdef UNIV_DEBUG
-UNIV_INTERN ibool srv_print_thread_releases = FALSE;
-UNIV_INTERN ibool srv_print_lock_waits = FALSE;
-UNIV_INTERN ibool srv_print_buf_io = FALSE;
-UNIV_INTERN ibool srv_print_log_io = FALSE;
-UNIV_INTERN ibool srv_print_latch_waits = FALSE;
-#endif /* UNIV_DEBUG */
-
-static ulint srv_n_rows_inserted_old = 0;
-static ulint srv_n_rows_updated_old = 0;
-static ulint srv_n_rows_deleted_old = 0;
-static ulint srv_n_rows_read_old = 0;
-
-/* Ensure counters are on separate cache lines */
-
-#define CACHE_LINE_SIZE 64
-#define CACHE_ALIGNED __attribute__ ((aligned (CACHE_LINE_SIZE)))
-
-UNIV_INTERN byte
-counters_pad_start[CACHE_LINE_SIZE] __attribute__((unused)) = {0};
-
-UNIV_INTERN ulint srv_n_rows_inserted CACHE_ALIGNED = 0;
-UNIV_INTERN ulint srv_n_rows_updated CACHE_ALIGNED = 0;
-UNIV_INTERN ulint srv_n_rows_deleted CACHE_ALIGNED = 0;
-UNIV_INTERN ulint srv_n_rows_read CACHE_ALIGNED = 0;
-
-UNIV_INTERN ulint srv_read_views_memory CACHE_ALIGNED = 0;
-UNIV_INTERN ulint srv_descriptors_memory CACHE_ALIGNED = 0;
-
-UNIV_INTERN ulint srv_n_lock_deadlock_count CACHE_ALIGNED = 0;
-UNIV_INTERN ulint srv_n_lock_wait_count CACHE_ALIGNED = 0;
-UNIV_INTERN ulint srv_n_lock_wait_current_count CACHE_ALIGNED = 0;
-UNIV_INTERN ib_int64_t srv_n_lock_wait_time CACHE_ALIGNED = 0;
-UNIV_INTERN ulint srv_n_lock_max_wait_time CACHE_ALIGNED = 0;
-
-UNIV_INTERN ulint srv_truncated_status_writes CACHE_ALIGNED = 0;
-
-/* variable counts amount of data read in total (in bytes) */
-UNIV_INTERN ulint srv_data_read CACHE_ALIGNED = 0;
-
-/* here we count the amount of data written in total (in bytes) */
-UNIV_INTERN ulint srv_data_written CACHE_ALIGNED = 0;
-
-/* the number of the log write requests done */
-UNIV_INTERN ulint srv_log_write_requests CACHE_ALIGNED = 0;
-
-/* the number of physical writes to the log performed */
-UNIV_INTERN ulint srv_log_writes CACHE_ALIGNED = 0;
-
-/* amount of data written to the log files in bytes */
-UNIV_INTERN ulint srv_os_log_written CACHE_ALIGNED = 0;
-
-/* amount of writes being done to the log files */
-UNIV_INTERN ulint srv_os_log_pending_writes CACHE_ALIGNED = 0;
-
-/* we increase this counter, when there we don't have enough space in the
-log buffer and have to flush it */
-UNIV_INTERN ulint srv_log_waits CACHE_ALIGNED = 0;
-
-/* this variable counts the amount of times, when the doublewrite buffer
-was flushed */
-UNIV_INTERN ulint srv_dblwr_writes CACHE_ALIGNED = 0;
-
-/* here we store the number of pages that have been flushed to the
-doublewrite buffer */
-UNIV_INTERN ulint srv_dblwr_pages_written CACHE_ALIGNED = 0;
-
-/* in this variable we store the number of write requests issued */
-UNIV_INTERN ulint srv_buf_pool_write_requests CACHE_ALIGNED = 0;
-
-/* here we store the number of times when we had to wait for a free page
-in the buffer pool. It happens when the buffer pool is full and we need
-to make a flush, in order to be able to read or create a page. */
-UNIV_INTERN ulint srv_buf_pool_wait_free CACHE_ALIGNED = 0;
-
-/** Number of buffer pool reads that led to the
-reading of a disk page */
-UNIV_INTERN ulint srv_buf_pool_reads CACHE_ALIGNED = 0;
-
-/* variable to count the number of pages that were written from buffer
-pool to the disk */
-UNIV_INTERN ulint srv_buf_pool_flushed CACHE_ALIGNED = 0;
-
-/* variable to count the number of LRU flushed pages */
-UNIV_INTERN ulint buf_lru_flush_page_count CACHE_ALIGNED = 0;
-
-UNIV_INTERN byte
-counters_pad_end[CACHE_LINE_SIZE] __attribute__((unused)) = {0};
-
-/*
- Set the following to 0 if you want InnoDB to write messages on
- stderr on startup/shutdown
-*/
-UNIV_INTERN ibool srv_print_verbose_log = TRUE;
-UNIV_INTERN ibool srv_print_innodb_monitor = FALSE;
-UNIV_INTERN ibool srv_print_innodb_lock_monitor = FALSE;
-UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE;
-UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE;
-
-/* Array of English strings describing the current state of an
-i/o handler thread */
-
-UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
-UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
-
-UNIV_INTERN time_t srv_last_monitor_time;
-
-UNIV_INTERN mutex_t srv_innodb_monitor_mutex;
-
-/* Mutex for locking srv_monitor_file */
-UNIV_INTERN mutex_t srv_monitor_file_mutex;
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register kernel_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t kernel_mutex_key;
-/* Key to register srv_innodb_monitor_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_innodb_monitor_mutex_key;
-/* Key to register srv_monitor_file_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key;
-/* Key to register srv_dict_tmpfile_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
-/* Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/* Temporary file for innodb monitor output */
-UNIV_INTERN FILE* srv_monitor_file;
-/* Mutex for locking srv_dict_tmpfile.
-This mutex has a very high rank; threads reserving it should not
-be holding any InnoDB latches. */
-UNIV_INTERN mutex_t srv_dict_tmpfile_mutex;
-/* Temporary file for output from the data dictionary */
-UNIV_INTERN FILE* srv_dict_tmpfile;
-/* Mutex for locking srv_misc_tmpfile.
-This mutex has a very low rank; threads reserving it should not
-acquire any further latches or sleep before releasing this one. */
-UNIV_INTERN mutex_t srv_misc_tmpfile_mutex;
-/* Temporary file for miscellanous diagnostic output */
-UNIV_INTERN FILE* srv_misc_tmpfile;
-
-UNIV_INTERN ulint srv_main_thread_process_no = 0;
-UNIV_INTERN ulint srv_main_thread_id = 0;
-
-/* The following count work done by srv_master_thread. */
-
-/* Iterations by the 'once per second' loop. */
-static ulint srv_main_1_second_loops = 0;
-/* Calls to sleep by the 'once per second' loop. */
-static ulint srv_main_sleeps = 0;
-/* Iterations by the 'once per 10 seconds' loop. */
-static ulint srv_main_10_second_loops = 0;
-/* Iterations of the loop bounded by the 'background_loop' label. */
-static ulint srv_main_background_loops = 0;
-/* Iterations of the loop bounded by the 'flush_loop' label. */
-static ulint srv_main_flush_loops = 0;
-/* Log writes involving flush. */
-static ulint srv_log_writes_and_flush = 0;
-
-/* This is only ever touched by the master thread. It records the
-time when the last flush of log file has happened. The master
-thread ensures that we flush the log files at least once per
-second. */
-static time_t srv_last_log_flush_time;
-
-/* The master thread performs various tasks based on the current
-state of IO activity and the level of IO utilization is past
-intervals. Following macros define thresholds for these conditions. */
-#define SRV_PEND_IO_THRESHOLD (PCT_IO(3))
-#define SRV_RECENT_IO_ACTIVITY (PCT_IO(5))
-#define SRV_PAST_IO_ACTIVITY (PCT_IO(200))
-
-/*
- IMPLEMENTATION OF THE SERVER MAIN PROGRAM
- =========================================
-
-There is the following analogue between this database
-server and an operating system kernel:
-
-DB concept equivalent OS concept
----------- ---------------------
-transaction -- process;
-
-query thread -- thread;
-
-lock -- semaphore;
-
-transaction set to
-the rollback state -- kill signal delivered to a process;
-
-kernel -- kernel;
-
-query thread execution:
-(a) without kernel mutex
-reserved -- process executing in user mode;
-(b) with kernel mutex reserved
- -- process executing in kernel mode;
-
-The server is controlled by a master thread which runs at
-a priority higher than normal, that is, higher than user threads.
-It sleeps most of the time, and wakes up, say, every 300 milliseconds,
-to check whether there is anything happening in the server which
-requires intervention of the master thread. Such situations may be,
-for example, when flushing of dirty blocks is needed in the buffer
-pool or old version of database rows have to be cleaned away.
-
-The threads which we call user threads serve the queries of
-the clients and input from the console of the server.
-They run at normal priority. The server may have several
-communications endpoints. A dedicated set of user threads waits
-at each of these endpoints ready to receive a client request.
-Each request is taken by a single user thread, which then starts
-processing and, when the result is ready, sends it to the client
-and returns to wait at the same endpoint the thread started from.
-
-So, we do not have dedicated communication threads listening at
-the endpoints and dealing the jobs to dedicated worker threads.
-Our architecture saves one thread swithch per request, compared
-to the solution with dedicated communication threads
-which amounts to 15 microseconds on 100 MHz Pentium
-running NT. If the client
-is communicating over a network, this saving is negligible, but
-if the client resides in the same machine, maybe in an SMP machine
-on a different processor from the server thread, the saving
-can be important as the threads can communicate over shared
-memory with an overhead of a few microseconds.
-
-We may later implement a dedicated communication thread solution
-for those endpoints which communicate over a network.
-
-Our solution with user threads has two problems: for each endpoint
-there has to be a number of listening threads. If there are many
-communication endpoints, it may be difficult to set the right number
-of concurrent threads in the system, as many of the threads
-may always be waiting at less busy endpoints. Another problem
-is queuing of the messages, as the server internally does not
-offer any queue for jobs.
-
-Another group of user threads is intended for splitting the
-queries and processing them in parallel. Let us call these
-parallel communication threads. These threads are waiting for
-parallelized tasks, suspended on event semaphores.
-
-A single user thread waits for input from the console,
-like a command to shut the database.
-
-Utility threads are a different group of threads which takes
-care of the buffer pool flushing and other, mainly background
-operations, in the server.
-Some of these utility threads always run at a lower than normal
-priority, so that they are always in background. Some of them
-may dynamically boost their priority by the pri_adjust function,
-even to higher than normal priority, if their task becomes urgent.
-The running of utilities is controlled by high- and low-water marks
-of urgency. The urgency may be measured by the number of dirty blocks
-in the buffer pool, in the case of the flush thread, for example.
-When the high-water mark is exceeded, an utility starts running, until
-the urgency drops under the low-water mark. Then the utility thread
-suspend itself to wait for an event. The master thread is
-responsible of signaling this event when the utility thread is
-again needed.
-
-For each individual type of utility, some threads always remain
-at lower than normal priority. This is because pri_adjust is implemented
-so that the threads at normal or higher priority control their
-share of running time by calling sleep. Thus, if the load of the
-system sudenly drops, these threads cannot necessarily utilize
-the system fully. The background priority threads make up for this,
-starting to run when the load drops.
-
-When there is no activity in the system, also the master thread
-suspends itself to wait for an event making
-the server totally silent. The responsibility to signal this
-event is on the user thread which again receives a message
-from a client.
-
-There is still one complication in our server design. If a
-background utility thread obtains a resource (e.g., mutex) needed by a user
-thread, and there is also some other user activity in the system,
-the user thread may have to wait indefinitely long for the
-resource, as the OS does not schedule a background thread if
-there is some other runnable user thread. This problem is called
-priority inversion in real-time programming.
-
-One solution to the priority inversion problem would be to
-keep record of which thread owns which resource and
-in the above case boost the priority of the background thread
-so that it will be scheduled and it can release the resource.
-This solution is called priority inheritance in real-time programming.
-A drawback of this solution is that the overhead of acquiring a mutex
-increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because
-the thread has to call os_thread_get_curr_id.
-This may be compared to 0.5 microsecond overhead for a mutex lock-unlock
-pair. Note that the thread
-cannot store the information in the resource, say mutex, itself,
-because competing threads could wipe out the information if it is
-stored before acquiring the mutex, and if it stored afterwards,
-the information is outdated for the time of one machine instruction,
-at least. (To be precise, the information could be stored to
-lock_word in mutex if the machine supports atomic swap.)
-
-The above solution with priority inheritance may become actual in the
-future, but at the moment we plan to implement a more coarse solution,
-which could be called a global priority inheritance. If a thread
-has to wait for a long time, say 300 milliseconds, for a resource,
-we just guess that it may be waiting for a resource owned by a background
-thread, and boost the priority of all runnable background threads
-to the normal level. The background threads then themselves adjust
-their fixed priority back to background after releasing all resources
-they had (or, at some fixed points in their program code).
-
-What is the performance of the global priority inheritance solution?
-We may weigh the length of the wait time 300 milliseconds, during
-which the system processes some other thread
-to the cost of boosting the priority of each runnable background
-thread, rescheduling it, and lowering the priority again.
-On 100 MHz Pentium + NT this overhead may be of the order 100
-microseconds per thread. So, if the number of runnable background
-threads is not very big, say < 100, the cost is tolerable.
-Utility threads probably will access resources used by
-user threads not very often, so collisions of user threads
-to preempted utility threads should not happen very often.
-
-The thread table contains
-information of the current status of each thread existing in the system,
-and also the event semaphores used in suspending the master thread
-and utility and parallel communication threads when they have nothing to do.
-The thread table can be seen as an analogue to the process table
-in a traditional Unix implementation.
-
-The thread table is also used in the global priority inheritance
-scheme. This brings in one additional complication: threads accessing
-the thread table must have at least normal fixed priority,
-because the priority inheritance solution does not work if a background
-thread is preempted while possessing the mutex protecting the thread table.
-So, if a thread accesses the thread table, its priority has to be
-boosted at least to normal. This priority requirement can be seen similar to
-the privileged mode used when processing the kernel calls in traditional
-Unix.*/
-
-/* Thread slot in the thread table */
-struct srv_slot_struct{
- unsigned type:1; /*!< thread type: user, utility etc. */
- unsigned in_use:1; /*!< TRUE if this slot is in use */
- unsigned suspended:1; /*!< TRUE if the thread is waiting
- for the event of this slot */
- ib_time_t suspend_time; /*!< time when the thread was
- suspended */
- os_event_t event; /*!< event used in suspending the
- thread when it has nothing to do */
- que_thr_t* thr; /*!< suspended query thread (only
- used for MySQL threads) */
-};
-
-/* Table for MySQL threads where they will be suspended to wait for locks */
-UNIV_INTERN srv_slot_t* srv_mysql_table = NULL;
-
-UNIV_INTERN os_event_t srv_timeout_event;
-
-UNIV_INTERN os_event_t srv_monitor_event;
-
-UNIV_INTERN os_event_t srv_error_event;
-
-UNIV_INTERN os_event_t srv_lock_timeout_thread_event;
-
-UNIV_INTERN os_event_t srv_shutdown_event;
-
-UNIV_INTERN os_event_t srv_checkpoint_completed_event;
-
-UNIV_INTERN os_event_t srv_redo_log_thread_finished_event;
-
-UNIV_INTERN srv_sys_t* srv_sys = NULL;
-
-/* padding to prevent other memory update hotspots from residing on
-the same memory cache line */
-UNIV_INTERN byte srv_pad1[64];
-/* mutex protecting the server, trx structs, query threads, and lock table */
-UNIV_INTERN mutex_t* kernel_mutex_temp;
-/* padding to prevent other memory update hotspots from residing on
-the same memory cache line */
-UNIV_INTERN byte srv_pad2[64];
-
-#if 0
-/* The following three values measure the urgency of the jobs of
-buffer, version, and insert threads. They may vary from 0 - 1000.
-The server mutex protects all these variables. The low-water values
-tell that the server can acquiesce the utility when the value
-drops below this low-water mark. */
-
-static ulint srv_meter[SRV_MASTER + 1];
-static ulint srv_meter_low_water[SRV_MASTER + 1];
-static ulint srv_meter_high_water[SRV_MASTER + 1];
-static ulint srv_meter_high_water2[SRV_MASTER + 1];
-static ulint srv_meter_foreground[SRV_MASTER + 1];
-#endif
-
-/* The following values give info about the activity going on in
-the database. They are protected by the server mutex. The arrays
-are indexed by the type of the thread. */
-
-UNIV_INTERN ulint srv_n_threads_active[SRV_MASTER + 1];
-UNIV_INTERN ulint srv_n_threads[SRV_MASTER + 1];
-
-/*********************************************************************//**
-Asynchronous purge thread.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-srv_purge_thread(
-/*=============*/
- void* arg __attribute__((unused))); /*!< in: a dummy parameter
- required by os_thread_create */
-
-/***********************************************************************
-Prints counters for work done by srv_master_thread. */
-static
-void
-srv_print_master_thread_info(
-/*=========================*/
- FILE *file) /* in: output stream */
-{
- fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, "
- "%lu 10_second, %lu background, %lu flush\n",
- srv_main_1_second_loops, srv_main_sleeps,
- srv_main_10_second_loops, srv_main_background_loops,
- srv_main_flush_loops);
- fprintf(file, "srv_master_thread log flush and writes: %lu\n",
- srv_log_writes_and_flush);
-}
-
-/*********************************************************************//**
-Sets the info describing an i/o thread current state. */
-UNIV_INTERN
-void
-srv_set_io_thread_op_info(
-/*======================*/
- ulint i, /*!< in: the 'segment' of the i/o thread */
- const char* str) /*!< in: constant char string describing the
- state */
-{
- ut_a(i < SRV_MAX_N_IO_THREADS);
-
- srv_io_thread_op_info[i] = str;
-}
-
-/*********************************************************************//**
-Accessor function to get pointer to n'th slot in the server thread
-table.
-@return pointer to the slot */
-static
-srv_slot_t*
-srv_table_get_nth_slot(
-/*===================*/
- ulint index) /*!< in: index of the slot */
-{
- ut_ad(mutex_own(&kernel_mutex));
- ut_a(index < OS_THREAD_MAX_N);
-
- return(srv_sys->threads + index);
-}
-
-/*********************************************************************//**
-Gets the number of threads in the system.
-@return sum of srv_n_threads[] */
-UNIV_INTERN
-ulint
-srv_get_n_threads(void)
-/*===================*/
-{
- ulint i;
- ulint n_threads = 0;
-
- mutex_enter(&kernel_mutex);
-
- for (i = 0; i < SRV_MASTER + 1; i++) {
-
- n_threads += srv_n_threads[i];
- }
-
- mutex_exit(&kernel_mutex);
-
- return(n_threads);
-}
-
-#ifdef UNIV_DEBUG
-/*********************************************************************//**
-Validates the type of a thread table slot.
-@return TRUE if ok */
-static
-ibool
-srv_thread_type_validate(
-/*=====================*/
- enum srv_thread_type type) /*!< in: thread type */
-{
- switch (type) {
- case SRV_WORKER:
- case SRV_MASTER:
- return(TRUE);
- }
- ut_error;
- return(FALSE);
-}
-#endif /* UNIV_DEBUG */
-
-/*********************************************************************//**
-Gets the type of a thread table slot.
-@return thread type */
-static
-enum srv_thread_type
-srv_slot_get_type(
-/*==============*/
- const srv_slot_t* slot) /*!< in: thread slot */
-{
- enum srv_thread_type type = (enum srv_thread_type) slot->type;
- ut_ad(srv_thread_type_validate(type));
- return(type);
-}
-
-/*********************************************************************//**
-Reserves a slot in the thread table for the current thread.
-NOTE! The server mutex has to be reserved by the caller!
-@return reserved slot */
-static
-srv_slot_t*
-srv_table_reserve_slot(
-/*===================*/
- enum srv_thread_type type) /*!< in: type of the thread */
-{
- srv_slot_t* slot;
- ulint i;
-
- ut_ad(srv_thread_type_validate(type));
- ut_ad(mutex_own(&kernel_mutex));
-
- i = 0;
- slot = srv_table_get_nth_slot(i);
-
- while (slot->in_use) {
- i++;
- slot = srv_table_get_nth_slot(i);
- }
-
- slot->in_use = TRUE;
- slot->suspended = FALSE;
- slot->type = type;
- ut_ad(srv_slot_get_type(slot) == type);
-
- return(slot);
-}
-
-/*********************************************************************//**
-Suspends the calling thread to wait for the event in its thread slot.
-NOTE! The server mutex has to be reserved by the caller! */
-static
-void
-srv_suspend_thread(
-/*===============*/
- srv_slot_t* slot) /*!< in/out: thread slot */
-{
- enum srv_thread_type type;
-
- ut_ad(mutex_own(&kernel_mutex));
- ut_ad(slot->in_use);
- ut_ad(!slot->suspended);
-
- if (srv_print_thread_releases) {
- fprintf(stderr,
- "Suspending thread %lu to slot %lu\n",
- (ulong) os_thread_get_curr_id(),
- (ulong) (slot - srv_sys->threads));
- }
-
- type = srv_slot_get_type(slot);
-
- slot->suspended = TRUE;
-
- ut_ad(srv_n_threads_active[type] > 0);
-
- srv_n_threads_active[type]--;
-
- os_event_reset(slot->event);
-}
-
-/*********************************************************************//**
-Releases threads of the type given from suspension in the thread table.
-NOTE! The server mutex has to be reserved by the caller!
-@return number of threads released: this may be less than n if not
-enough threads were suspended at the moment */
-UNIV_INTERN
-ulint
-srv_release_threads(
-/*================*/
- enum srv_thread_type type, /*!< in: thread type */
- ulint n) /*!< in: number of threads to release */
-{
- srv_slot_t* slot;
- ulint i;
- ulint count = 0;
-
- ut_ad(srv_thread_type_validate(type));
- ut_ad(n > 0);
- ut_ad(mutex_own(&kernel_mutex));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = srv_table_get_nth_slot(i);
-
- if (slot->in_use && slot->suspended
- && srv_slot_get_type(slot) == type) {
-
- slot->suspended = FALSE;
-
- srv_n_threads_active[type]++;
-
- os_event_set(slot->event);
-
- if (srv_print_thread_releases) {
- fprintf(stderr,
- "Releasing thread type %lu"
- " from slot %lu\n",
- (ulong) type, (ulong) i);
- }
-
- count++;
-
- if (count == n) {
- break;
- }
- }
- }
-
- return(count);
-}
-
-/*********************************************************************//**
-Check whether thread type has reserved a slot. Return the first slot that
-is found. This works because we currently have only 1 thread of each type.
-@return slot number or ULINT_UNDEFINED if not found*/
-UNIV_INTERN
-ulint
-srv_thread_has_reserved_slot(
-/*=========================*/
- enum srv_thread_type type) /*!< in: thread type to check */
-{
- ulint i;
- ulint slot_no = ULINT_UNDEFINED;
-
- ut_ad(srv_thread_type_validate(type));
- mutex_enter(&kernel_mutex);
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- srv_slot_t* slot;
-
- slot = srv_table_get_nth_slot(i);
-
- if (slot->in_use && slot->type == type) {
- slot_no = i;
- break;
- }
- }
-
- mutex_exit(&kernel_mutex);
-
- return(slot_no);
-}
-
-/*********************************************************************//**
-Initializes the server. */
-UNIV_INTERN
-void
-srv_init(void)
-/*==========*/
-{
- srv_conc_slot_t* conc_slot;
- srv_slot_t* slot;
- ulint i;
-
- srv_sys = mem_alloc(sizeof(srv_sys_t));
-
- kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
- mutex_create(kernel_mutex_key, &kernel_mutex, SYNC_KERNEL);
-
- mutex_create(srv_innodb_monitor_mutex_key,
- &srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
-
- srv_sys->threads = mem_zalloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- slot = srv_sys->threads + i;
- slot->event = os_event_create(NULL);
- ut_a(slot->event);
- }
-
- srv_mysql_table = mem_zalloc(OS_THREAD_MAX_N * sizeof(srv_slot_t));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- slot = srv_mysql_table + i;
- slot->event = os_event_create(NULL);
- ut_a(slot->event);
- }
-
- srv_error_event = os_event_create(NULL);
-
- srv_timeout_event = os_event_create(NULL);
-
- srv_monitor_event = os_event_create(NULL);
-
- srv_lock_timeout_thread_event = os_event_create(NULL);
- srv_shutdown_event = os_event_create(NULL);
-
- srv_checkpoint_completed_event = os_event_create(NULL);
- srv_redo_log_thread_finished_event = os_event_create(NULL);
-
- for (i = 0; i < SRV_MASTER + 1; i++) {
- srv_n_threads_active[i] = 0;
- srv_n_threads[i] = 0;
-#if 0
- srv_meter[i] = 30;
- srv_meter_low_water[i] = 50;
- srv_meter_high_water[i] = 100;
- srv_meter_high_water2[i] = 200;
- srv_meter_foreground[i] = 250;
-#endif
- }
-
- UT_LIST_INIT(srv_sys->tasks);
-
- /* Create dummy indexes for infimum and supremum records */
-
- dict_ind_init();
-
- /* Init the server concurrency restriction data structures */
-
- os_fast_mutex_init(&srv_conc_mutex);
-
- UT_LIST_INIT(srv_conc_queue);
-
- srv_conc_slots = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- conc_slot = srv_conc_slots + i;
- conc_slot->reserved = FALSE;
- conc_slot->event = os_event_create(NULL);
- ut_a(conc_slot->event);
- }
-
- /* Initialize some INFORMATION SCHEMA internal structures */
- trx_i_s_cache_init(trx_i_s_cache);
-}
-
-/*********************************************************************//**
-Frees the data structures created in srv_init(). */
-UNIV_INTERN
-void
-srv_free(void)
-/*==========*/
-{
- os_fast_mutex_free(&srv_conc_mutex);
- mem_free(srv_conc_slots);
- srv_conc_slots = NULL;
-
- mem_free(srv_sys->threads);
- mem_free(srv_sys);
- srv_sys = NULL;
-
- mem_free(kernel_mutex_temp);
- kernel_mutex_temp = NULL;
- mem_free(srv_mysql_table);
- srv_mysql_table = NULL;
-
- trx_i_s_cache_free(trx_i_s_cache);
-}
-
-/*********************************************************************//**
-Initializes the synchronization primitives, memory system, and the thread
-local storage. */
-UNIV_INTERN
-void
-srv_general_init(void)
-/*==================*/
-{
- ut_mem_init();
- /* Reset the system variables in the recovery module. */
- recv_sys_var_init();
- os_sync_init();
- sync_init();
- mem_init(srv_mem_pool_size);
-}
-
-/*======================= InnoDB Server FIFO queue =======================*/
-
-/* Maximum allowable purge history length. <=0 means 'infinite'. */
-UNIV_INTERN ulong srv_max_purge_lag = 0;
-
-/*********************************************************************//**
-Puts an OS thread to wait if there are too many concurrent threads
-(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
-
-#ifdef HAVE_ATOMIC_BUILTINS
-static void
-enter_innodb_with_tickets(trx_t* trx)
-{
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
- return;
-}
-
-static void
-srv_conc_enter_innodb_timer_based(trx_t* trx)
-{
- lint conc_n_threads;
- ibool has_yielded = FALSE;
- ulint has_slept = 0;
-
- if (trx->declared_to_be_inside_innodb) {
- ut_print_timestamp(stderr);
- fputs(
-" InnoDB: Error: trying to declare trx to enter InnoDB, but\n"
-"InnoDB: it already is declared.\n", stderr);
- trx_print(stderr, trx, 0);
- putc('\n', stderr);
- }
-retry:
- if (srv_conc_n_threads < (lint) srv_thread_concurrency) {
- conc_n_threads = os_atomic_increment_lint(&srv_conc_n_threads, 1);
- if (conc_n_threads <= (lint) srv_thread_concurrency) {
- enter_innodb_with_tickets(trx);
- return;
- }
- (void) os_atomic_increment_lint(&srv_conc_n_threads, -1);
- }
- if (!has_yielded)
- {
- has_yielded = TRUE;
- os_thread_yield();
- goto retry;
- }
-
- ut_ad(!trx->has_search_latch);
-
- if (NULL != UT_LIST_GET_FIRST(trx->trx_locks)) {
-
- conc_n_threads = os_atomic_increment_lint(&srv_conc_n_threads, 1);
- enter_innodb_with_tickets(trx);
- return;
- }
- if (has_slept < 2)
- {
- trx->op_info = "sleeping before entering InnoDB";
- os_thread_sleep(10000);
- trx->op_info = "";
- has_slept++;
- }
- conc_n_threads = os_atomic_increment_lint(&srv_conc_n_threads, 1);
- enter_innodb_with_tickets(trx);
- return;
-}
-
-static void
-srv_conc_exit_innodb_timer_based(trx_t* trx)
-{
- (void) os_atomic_increment_lint(&srv_conc_n_threads, -1);
- trx->declared_to_be_inside_innodb = FALSE;
- trx->n_tickets_to_enter_innodb = 0;
- return;
-}
-#endif
-
-UNIV_INTERN
-void
-srv_conc_enter_innodb(
-/*==================*/
- trx_t* trx) /*!< in: transaction object associated with the
- thread */
-{
- ibool has_slept = FALSE;
- srv_conc_slot_t* slot = NULL;
- ulint i;
- ib_uint64_t start_time = 0L;
- ib_uint64_t finish_time = 0L;
- ulint sec;
- ulint ms;
-
- ut_ad(!trx->has_search_latch);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!btr_search_own_any());
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (trx->mysql_thd != NULL
- && thd_is_replication_slave_thread(trx->mysql_thd)) {
-
- UT_WAIT_FOR(srv_conc_n_threads
- < (lint)srv_thread_concurrency,
- srv_replication_delay * 1000);
-
- return;
- }
-
- /* If trx has 'free tickets' to enter the engine left, then use one
- such ticket */
-
- if (trx->n_tickets_to_enter_innodb > 0) {
- trx->n_tickets_to_enter_innodb--;
-
- return;
- }
-
-#ifdef HAVE_ATOMIC_BUILTINS
- if (srv_thread_concurrency_timer_based) {
- srv_conc_enter_innodb_timer_based(trx);
- return;
- }
-#endif
-
- os_fast_mutex_lock(&srv_conc_mutex);
-retry:
- if (trx->declared_to_be_inside_innodb) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: trying to declare trx"
- " to enter InnoDB, but\n"
- "InnoDB: it already is declared.\n", stderr);
- trx_print(stderr, trx, 0);
- putc('\n', stderr);
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- return;
- }
-
- ut_ad(srv_conc_n_threads >= 0);
-
- if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
-
- srv_conc_n_threads++;
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- return;
- }
-
- /* If the transaction is not holding resources, let it sleep
- for SRV_THREAD_SLEEP_DELAY microseconds, and try again then */
-
- ut_ad(!trx->has_search_latch);
-
- if (!has_slept
- && NULL == UT_LIST_GET_FIRST(trx->trx_locks)) {
-
- has_slept = TRUE; /* We let it sleep only once to avoid
- starvation */
-
- srv_conc_n_waiting_threads++;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- trx->op_info = "sleeping before joining InnoDB queue";
-
- /* Peter Zaitsev suggested that we take the sleep away
- altogether. But the sleep may be good in pathological
- situations of lots of thread switches. Simply put some
- threads aside for a while to reduce the number of thread
- switches. */
- if (SRV_THREAD_SLEEP_DELAY > 0) {
- os_thread_sleep(SRV_THREAD_SLEEP_DELAY);
- trx->innodb_que_wait_timer += SRV_THREAD_SLEEP_DELAY;
- }
-
- trx->op_info = "";
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- srv_conc_n_waiting_threads--;
-
- goto retry;
- }
-
- /* Too many threads inside: put the current thread to a queue */
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
- slot = srv_conc_slots + i;
-
- if (!slot->reserved) {
-
- break;
- }
- }
-
- if (i == OS_THREAD_MAX_N) {
- /* Could not find a free wait slot, we must let the
- thread enter */
-
- srv_conc_n_threads++;
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = 0;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- return;
- }
-
- /* No-op for XtraDB. */
- trx_search_latch_release_if_reserved(trx);
-
- /* Add to the queue */
- slot->reserved = TRUE;
- slot->wait_ended = FALSE;
-
- UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot);
-
- os_event_reset(slot->event);
-
- srv_conc_n_waiting_threads++;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- /* Go to wait for the event; when a thread leaves InnoDB it will
- release this thread */
-
- ut_ad(!trx->has_search_latch);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!btr_search_own_any());
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (UNIV_UNLIKELY(trx->take_stats)) {
- ut_usectime(&sec, &ms);
- start_time = (ib_uint64_t)sec * 1000000 + ms;
- } else {
- start_time = 0;
- }
-
- trx->op_info = "waiting in InnoDB queue";
-
- thd_wait_begin(trx->mysql_thd, THD_WAIT_USER_LOCK);
- os_event_wait(slot->event);
- thd_wait_end(trx->mysql_thd);
-
- trx->op_info = "";
-
- if (UNIV_UNLIKELY(start_time != 0)) {
- ut_usectime(&sec, &ms);
- finish_time = (ib_uint64_t)sec * 1000000 + ms;
- trx->innodb_que_wait_timer += (ulint)(finish_time - start_time);
- }
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- srv_conc_n_waiting_threads--;
-
- /* NOTE that the thread which released this thread already
- incremented the thread counter on behalf of this thread */
-
- slot->reserved = FALSE;
-
- UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot);
-
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-}
-
-/*********************************************************************//**
-This lets a thread enter InnoDB regardless of the number of threads inside
-InnoDB. This must be called when a thread ends a lock wait. */
-UNIV_INTERN
-void
-srv_conc_force_enter_innodb(
-/*========================*/
- trx_t* trx) /*!< in: transaction object associated with the
- thread */
-{
- ut_ad(!trx->has_search_latch);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!btr_search_own_any());
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (UNIV_LIKELY(!srv_thread_concurrency)) {
-
- return;
- }
-
- ut_ad(srv_conc_n_threads >= 0);
-#ifdef HAVE_ATOMIC_BUILTINS
- if (srv_thread_concurrency_timer_based) {
- (void) os_atomic_increment_lint(&srv_conc_n_threads, 1);
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = 1;
- return;
- }
-#endif
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- srv_conc_n_threads++;
- trx->declared_to_be_inside_innodb = TRUE;
- trx->n_tickets_to_enter_innodb = 1;
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-}
-
-/*********************************************************************//**
-This must be called when a thread exits InnoDB in a lock wait or at the
-end of an SQL statement. */
-UNIV_INTERN
-void
-srv_conc_force_exit_innodb(
-/*=======================*/
- trx_t* trx) /*!< in: transaction object associated with the
- thread */
-{
- srv_conc_slot_t* slot = NULL;
-
- if (trx->mysql_thd != NULL
- && thd_is_replication_slave_thread(trx->mysql_thd)) {
-
- return;
- }
-
- if (trx->declared_to_be_inside_innodb == FALSE) {
-
- return;
- }
-
-#ifdef HAVE_ATOMIC_BUILTINS
- if (srv_thread_concurrency_timer_based) {
- srv_conc_exit_innodb_timer_based(trx);
- return;
- }
-#endif
-
- os_fast_mutex_lock(&srv_conc_mutex);
-
- ut_ad(srv_conc_n_threads > 0);
- srv_conc_n_threads--;
- trx->declared_to_be_inside_innodb = FALSE;
- trx->n_tickets_to_enter_innodb = 0;
-
- if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
- /* Look for a slot where a thread is waiting and no other
- thread has yet released the thread */
-
- slot = UT_LIST_GET_FIRST(srv_conc_queue);
-
- while (slot && slot->wait_ended == TRUE) {
- slot = UT_LIST_GET_NEXT(srv_conc_queue, slot);
- }
-
- if (slot != NULL) {
- slot->wait_ended = TRUE;
-
- /* We increment the count on behalf of the released
- thread */
-
- srv_conc_n_threads++;
- }
- }
-
- os_fast_mutex_unlock(&srv_conc_mutex);
-
- if (slot != NULL) {
- os_event_set(slot->event);
- }
-
- ut_ad(!trx->has_search_latch);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!btr_search_own_any());
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
-}
-
-/*********************************************************************//**
-This must be called when a thread exits InnoDB. */
-UNIV_INTERN
-void
-srv_conc_exit_innodb(
-/*=================*/
- trx_t* trx) /*!< in: transaction object associated with the
- thread */
-{
- ut_ad(!trx->has_search_latch);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!btr_search_own_any());
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (trx->n_tickets_to_enter_innodb > 0) {
- /* We will pretend the thread is still inside InnoDB though it
- now leaves the InnoDB engine. In this way we save
- a lot of semaphore operations. srv_conc_force_exit_innodb is
- used to declare the thread definitely outside InnoDB. It
- should be called when there is a lock wait or an SQL statement
- ends. */
-
- return;
- }
-
- srv_conc_force_exit_innodb(trx);
-}
-
-/*========================================================================*/
-
-/*********************************************************************//**
-Normalizes init parameter values to use units we use inside InnoDB.
-@return DB_SUCCESS or error code */
-static
-ulint
-srv_normalize_init_values(void)
-/*===========================*/
-{
- ulint n;
- ulint i;
-
- n = srv_n_data_files;
-
- for (i = 0; i < n; i++) {
- srv_data_file_sizes[i] = srv_data_file_sizes[i]
- * ((1024 * 1024) / UNIV_PAGE_SIZE);
- }
-
- srv_last_file_size_max = srv_last_file_size_max
- * ((1024 * 1024) / UNIV_PAGE_SIZE);
-
- srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
-
- srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
-
- srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Boots the InnoDB server.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-ulint
-srv_boot(void)
-/*==========*/
-{
- ulint err;
-
- /* Transform the init parameter values given by MySQL to
- use units we use inside InnoDB: */
-
- err = srv_normalize_init_values();
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- /* Initialize synchronization primitives, memory management, and thread
- local storage */
-
- srv_general_init();
-
- /* Initialize this module */
-
- srv_init();
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Reserves a slot in the thread table for the current MySQL OS thread.
-NOTE! The kernel mutex has to be reserved by the caller!
-@return reserved slot */
-static
-srv_slot_t*
-srv_table_reserve_slot_for_mysql(void)
-/*==================================*/
-{
- srv_slot_t* slot;
- ulint i;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- i = 0;
- slot = srv_mysql_table + i;
-
- while (slot->in_use) {
- i++;
-
- if (UNIV_UNLIKELY(i >= OS_THREAD_MAX_N)) {
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: There appear to be %lu MySQL"
- " threads currently waiting\n"
- "InnoDB: inside InnoDB, which is the"
- " upper limit. Cannot continue operation.\n"
- "InnoDB: We intentionally generate"
- " a seg fault to print a stack trace\n"
- "InnoDB: on Linux. But first we print"
- " a list of waiting threads.\n", (ulong) i);
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = srv_mysql_table + i;
-
- fprintf(stderr,
- "Slot %lu: thread type %lu,"
- " in use %lu, susp %lu, time %lu\n",
- (ulong) i,
- (ulong) slot->type,
- (ulong) slot->in_use,
- (ulong) slot->suspended,
- (ulong) difftime(ut_time(),
- slot->suspend_time));
- }
-
- ut_error;
- }
-
- slot = srv_mysql_table + i;
- }
-
- ut_a(slot->in_use == FALSE);
-
- slot->in_use = TRUE;
-
- return(slot);
-}
-
-/***************************************************************//**
-Puts a MySQL OS thread to wait for a lock to be released. If an error
-occurs during the wait trx->error_state associated with thr is
-!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK
-are possible errors. DB_DEADLOCK is returned if selective deadlock
-resolution chose this transaction as a victim. */
-UNIV_INTERN
-void
-srv_suspend_mysql_thread(
-/*=====================*/
- que_thr_t* thr) /*!< in: query thread associated with the MySQL
- OS thread */
-{
- srv_slot_t* slot;
- os_event_t event;
- double wait_time;
- trx_t* trx;
- ulint had_dict_lock;
- ibool was_declared_inside_innodb = FALSE;
- ib_int64_t start_time = 0;
- ib_int64_t finish_time;
- ulint diff_time;
- ulint sec;
- ulint ms;
- ulong lock_wait_timeout;
-
- ut_ad(!mutex_own(&kernel_mutex));
-
- trx = thr_get_trx(thr);
-
- if (trx->mysql_thd != 0) {
- DEBUG_SYNC_C("srv_suspend_mysql_thread_enter");
- }
-
- os_event_set(srv_lock_timeout_thread_event);
-
- mutex_enter(&kernel_mutex);
-
- trx->error_state = DB_SUCCESS;
-
- if (thr->state == QUE_THR_RUNNING) {
-
- ut_ad(thr->is_active == TRUE);
-
- /* The lock has already been released or this transaction
- was chosen as a deadlock victim: no need to suspend */
-
- if (trx->was_chosen_as_deadlock_victim) {
-
- trx->error_state = DB_DEADLOCK;
- trx->was_chosen_as_deadlock_victim = FALSE;
- }
-
- mutex_exit(&kernel_mutex);
-
- return;
- }
-
- ut_ad(thr->is_active == FALSE);
-
- slot = srv_table_reserve_slot_for_mysql();
-
- event = slot->event;
-
- slot->thr = thr;
-
- os_event_reset(event);
-
- slot->suspend_time = ut_time();
-
- if (thr->lock_state == QUE_THR_LOCK_ROW) {
- srv_n_lock_wait_count++;
- srv_n_lock_wait_current_count++;
-
- if (ut_usectime(&sec, &ms) == -1) {
- start_time = -1;
- } else {
- start_time = (ib_int64_t) sec * 1000000 + ms;
- }
- }
- /* Wake the lock timeout monitor thread, if it is suspended */
-
- os_event_set(srv_lock_timeout_thread_event);
-
- mutex_exit(&kernel_mutex);
-
- had_dict_lock = trx->dict_operation_lock_mode;
-
- switch (had_dict_lock) {
- case RW_S_LATCH:
- /* Release foreign key check latch */
- row_mysql_unfreeze_data_dictionary(trx);
- break;
- case RW_X_LATCH:
- /* There should never be a lock wait when the
- dictionary latch is reserved in X mode. Dictionary
- transactions should only acquire locks on dictionary
- tables, not other tables. All access to dictionary
- tables should be covered by dictionary
- transactions. */
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Error: dict X latch held in "
- "srv_suspend_mysql_thread\n", stderr);
- /* This should never occur. This incorrect handling
- was added in the early development of
- ha_innobase::add_index() in InnoDB Plugin 1.0. */
- /* Release fast index creation latch */
- row_mysql_unlock_data_dictionary(trx);
- break;
- }
-
- ut_a(trx->dict_operation_lock_mode == 0);
-
- if (trx->declared_to_be_inside_innodb) {
-
- was_declared_inside_innodb = TRUE;
-
- /* We must declare this OS thread to exit InnoDB, since a
- possible other thread holding a lock which this thread waits
- for must be allowed to enter, sooner or later */
-
- srv_conc_force_exit_innodb(trx);
- }
-
- /* Suspend this thread and wait for the event. */
-
- thd_wait_begin(trx->mysql_thd, THD_WAIT_ROW_LOCK);
- os_event_wait(event);
- thd_wait_end(trx->mysql_thd);
-
- ut_ad(!trx->has_search_latch);
-#ifdef UNIV_SYNC_DEBUG
- ut_ad(!btr_search_own_any());
- ut_ad(!sync_thread_levels_nonempty_trx(trx->has_search_latch));
-#endif /* UNIV_SYNC_DEBUG */
-
- if (was_declared_inside_innodb) {
-
- /* Return back inside InnoDB */
-
- srv_conc_force_enter_innodb(trx);
- }
-
- /* After resuming, reacquire the data dictionary latch if
- necessary. */
-
- switch (had_dict_lock) {
- case RW_S_LATCH:
- row_mysql_freeze_data_dictionary(trx);
- break;
- case RW_X_LATCH:
- /* This should never occur. This incorrect handling
- was added in the early development of
- ha_innobase::add_index() in InnoDB Plugin 1.0. */
- row_mysql_lock_data_dictionary(trx);
- break;
- }
-
- mutex_enter(&kernel_mutex);
-
- /* Release the slot for others to use */
-
- slot->in_use = FALSE;
-
- wait_time = ut_difftime(ut_time(), slot->suspend_time);
-
- if (thr->lock_state == QUE_THR_LOCK_ROW) {
- if (ut_usectime(&sec, &ms) == -1) {
- finish_time = -1;
- } else {
- finish_time = (ib_int64_t) sec * 1000000 + ms;
- }
-
- diff_time = (finish_time > start_time) ?
- (ulint) (finish_time - start_time) : 0;
-
- srv_n_lock_wait_current_count--;
- srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time;
- if (diff_time > srv_n_lock_max_wait_time &&
- /* only update the variable if we successfully
- retrieved the start and finish times. See Bug#36819. */
- start_time != -1 && finish_time != -1) {
- srv_n_lock_max_wait_time = diff_time;
- }
-
- /* Record the lock wait time for this thread */
- thd_set_lock_wait_time(trx->mysql_thd, diff_time);
- }
-
- if (trx->was_chosen_as_deadlock_victim) {
-
- trx->error_state = DB_DEADLOCK;
- trx->was_chosen_as_deadlock_victim = FALSE;
- }
-
- mutex_exit(&kernel_mutex);
-
- /* InnoDB system transactions (such as the purge, and
- incomplete transactions that are being rolled back after crash
- recovery) will use the global value of
- innodb_lock_wait_timeout, because trx->mysql_thd == NULL. */
- lock_wait_timeout = thd_lock_wait_timeout(trx->mysql_thd);
-
- if (lock_wait_timeout < 100000000
- && wait_time > (double) lock_wait_timeout) {
-
- trx->error_state = DB_LOCK_WAIT_TIMEOUT;
- }
-
- if (trx_is_interrupted(trx)) {
-
- trx->error_state = DB_INTERRUPTED;
- }
-}
-
-/********************************************************************//**
-Releases a MySQL OS thread waiting for a lock to be released, if the
-thread is already suspended. */
-UNIV_INTERN
-void
-srv_release_mysql_thread_if_suspended(
-/*==================================*/
- que_thr_t* thr) /*!< in: query thread associated with the
- MySQL OS thread */
-{
- srv_slot_t* slot;
- ulint i;
-
- ut_ad(mutex_own(&kernel_mutex));
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = srv_mysql_table + i;
-
- if (slot->in_use && slot->thr == thr) {
- /* Found */
-
- os_event_set(slot->event);
-
- return;
- }
- }
-
- /* not found */
-}
-
-/******************************************************************//**
-Refreshes the values used to calculate per-second averages. */
-static
-void
-srv_refresh_innodb_monitor_stats(void)
-/*==================================*/
-{
- mutex_enter(&srv_innodb_monitor_mutex);
-
- srv_last_monitor_time = time(NULL);
-
- os_aio_refresh_stats();
-
- btr_cur_n_sea_old = btr_cur_n_sea;
- btr_cur_n_non_sea_old = btr_cur_n_non_sea;
-
- log_refresh_stats();
-
- buf_refresh_io_stats_all();
-
- srv_n_rows_inserted_old = srv_n_rows_inserted;
- srv_n_rows_updated_old = srv_n_rows_updated;
- srv_n_rows_deleted_old = srv_n_rows_deleted;
- srv_n_rows_read_old = srv_n_rows_read;
-
- mutex_exit(&srv_innodb_monitor_mutex);
-}
-
-/******************************************************************//**
-Outputs to a file the output of the InnoDB Monitor.
-@return FALSE if not all information printed
-due to failure to obtain necessary mutex */
-UNIV_INTERN
-ibool
-srv_printf_innodb_monitor(
-/*======================*/
- FILE* file, /*!< in: output stream */
- ibool nowait, /*!< in: whether to wait for kernel mutex */
- ulint* trx_start, /*!< out: file position of the start of
- the list of active transactions */
- ulint* trx_end) /*!< out: file position of the end of
- the list of active transactions */
-{
- double time_elapsed;
- time_t current_time;
- ulint n_reserved;
- ibool ret;
-
- ulong btr_search_sys_constant;
- ulong btr_search_sys_variable;
- ulint lock_sys_subtotal;
- ulint recv_sys_subtotal;
-
- ulint i;
- trx_t* trx;
-
- mutex_enter(&srv_innodb_monitor_mutex);
-
- current_time = time(NULL);
-
- /* We add 0.001 seconds to time_elapsed to prevent division
- by zero if two users happen to call SHOW INNODB STATUS at the same
- time */
-
- time_elapsed = difftime(current_time, srv_last_monitor_time)
- + 0.001;
-
- srv_last_monitor_time = time(NULL);
-
- fputs("\n=====================================\n", file);
-
- ut_print_timestamp(file);
- fprintf(file,
- " INNODB MONITOR OUTPUT\n"
- "=====================================\n"
- "Per second averages calculated from the last %lu seconds\n",
- (ulong)time_elapsed);
-
- fputs("-----------------\n"
- "BACKGROUND THREAD\n"
- "-----------------\n", file);
- srv_print_master_thread_info(file);
-
- fputs("----------\n"
- "SEMAPHORES\n"
- "----------\n", file);
- sync_print(file);
-
- /* Conceptually, srv_innodb_monitor_mutex has a very high latching
- order level in sync0sync.h, while dict_foreign_err_mutex has a very
- low level 135. Therefore we can reserve the latter mutex here without
- a danger of a deadlock of threads. */
-
- mutex_enter(&dict_foreign_err_mutex);
-
- if (ftell(dict_foreign_err_file) != 0L) {
- fputs("------------------------\n"
- "LATEST FOREIGN KEY ERROR\n"
- "------------------------\n", file);
- ut_copy_file(file, dict_foreign_err_file);
- }
-
- mutex_exit(&dict_foreign_err_mutex);
-
- fputs("--------\n"
- "FILE I/O\n"
- "--------\n", file);
- os_aio_print(file);
-
- fputs("-------------------------------------\n"
- "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
- "-------------------------------------\n", file);
- ibuf_print(file);
-
- for (i = 0; i < btr_search_index_num; i++) {
- ha_print_info(file, btr_search_sys->hash_tables[i]);
- }
-
- fprintf(file,
- "%.2f hash searches/s, %.2f non-hash searches/s\n",
- (btr_cur_n_sea - btr_cur_n_sea_old)
- / time_elapsed,
- (btr_cur_n_non_sea - btr_cur_n_non_sea_old)
- / time_elapsed);
- btr_cur_n_sea_old = btr_cur_n_sea;
- btr_cur_n_non_sea_old = btr_cur_n_non_sea;
-
- fputs("---\n"
- "LOG\n"
- "---\n", file);
- log_print(file);
-
- fputs("----------------------\n"
- "BUFFER POOL AND MEMORY\n"
- "----------------------\n", file);
- fprintf(file,
- "Total memory allocated " ULINTPF
- "; in additional pool allocated " ULINTPF "\n",
- ut_total_allocated_memory,
- mem_pool_get_reserved(mem_comm_pool));
- fprintf(file,
- "Total memory allocated by read views " ULINTPF "\n",
- srv_read_views_memory);
-
- /* Calculate AHI constant and variable memory allocations */
-
- btr_search_sys_constant = 0;
- btr_search_sys_variable = 0;
-
- ut_ad(btr_search_sys->hash_tables);
-
- for (i = 0; i < btr_search_index_num; i++) {
- hash_table_t* ht = btr_search_sys->hash_tables[i];
-
- ut_ad(ht);
- ut_ad(ht->heap);
-
- /* Multiple mutexes/heaps are currently never used for adaptive
- hash index tables. */
- ut_ad(!ht->n_mutexes);
- ut_ad(!ht->heaps);
-
- btr_search_sys_variable += mem_heap_get_size(ht->heap);
- btr_search_sys_constant += ht->n_cells * sizeof(hash_cell_t);
- }
-
- lock_sys_subtotal = 0;
- if (trx_sys) {
- mutex_enter(&kernel_mutex);
- trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
- while (trx) {
- lock_sys_subtotal += ((trx->lock_heap) ? mem_heap_get_size(trx->lock_heap) : 0);
- trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
- }
- mutex_exit(&kernel_mutex);
- }
-
- recv_sys_subtotal = ((recv_sys && recv_sys->addr_hash)
- ? mem_heap_get_size(recv_sys->heap) : 0);
-
- fprintf(file,
- "Internal hash tables (constant factor + variable factor)\n"
- " Adaptive hash index %lu \t(%lu + %lu)\n"
- " Page hash %lu (buffer pool 0 only)\n"
- " Dictionary cache %lu \t(%lu + %lu)\n"
- " File system %lu \t(%lu + %lu)\n"
- " Lock system %lu \t(%lu + %lu)\n"
- " Recovery system %lu \t(%lu + %lu)\n",
-
- btr_search_sys_constant + btr_search_sys_variable,
- btr_search_sys_constant,
- btr_search_sys_variable,
-
- (ulong) (buf_pool_from_array(0)->page_hash->n_cells * sizeof(hash_cell_t)),
-
- (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells
- + dict_sys->table_id_hash->n_cells
- ) * sizeof(hash_cell_t)
- + dict_sys->size) : 0),
- (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells
- + dict_sys->table_id_hash->n_cells
- ) * sizeof(hash_cell_t)) : 0),
- (ulong) (dict_sys ? (dict_sys->size) : 0),
-
- (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)
- + fil_system_hash_nodes()),
- (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)),
- (ulong) fil_system_hash_nodes(),
-
- (ulong) ((lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0)
- + lock_sys_subtotal),
- (ulong) (lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0),
- (ulong) lock_sys_subtotal,
-
- (ulong) (((recv_sys && recv_sys->addr_hash)
- ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0)
- + recv_sys_subtotal),
- (ulong) ((recv_sys && recv_sys->addr_hash)
- ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0),
- (ulong) recv_sys_subtotal);
-
- fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
- dict_sys->size);
-
- buf_print_io(file);
-
- fputs("--------------\n"
- "ROW OPERATIONS\n"
- "--------------\n", file);
- fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
- (long) srv_conc_n_threads,
- (ulong) srv_conc_n_waiting_threads);
-
- mutex_enter(&kernel_mutex);
-
- fprintf(file, "%lu read views open inside InnoDB\n",
- UT_LIST_GET_LEN(trx_sys->view_list));
-
- fprintf(file, "%lu transactions active inside InnoDB\n",
- UT_LIST_GET_LEN(trx_sys->trx_list));
-
- fprintf(file, "%lu out of %lu descriptors used\n",
- trx_sys->descr_n_used, trx_sys->descr_n_max);
-
- if (UT_LIST_GET_LEN(trx_sys->view_list)) {
- read_view_t* view = UT_LIST_GET_LAST(trx_sys->view_list);
-
- if (view) {
- fprintf(file, "---OLDEST VIEW---\n");
- read_view_print(file, view);
- fprintf(file, "-----------------\n");
- }
- }
-
- mutex_exit(&kernel_mutex);
-
- n_reserved = fil_space_get_n_reserved_extents(0);
- if (n_reserved > 0) {
- fprintf(file,
- "%lu tablespace extents now reserved for"
- " B-tree split operations\n",
- (ulong) n_reserved);
- }
-
-#ifdef UNIV_LINUX
- fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
- (ulong) srv_main_thread_process_no,
- (ulong) srv_main_thread_id,
- srv_main_thread_op_info);
-#else
- fprintf(file, "Main thread id %lu, state: %s\n",
- (ulong) srv_main_thread_id,
- srv_main_thread_op_info);
-#endif
- fprintf(file,
- "Number of rows inserted " ULINTPF
- ", updated " ULINTPF ", deleted " ULINTPF
- ", read " ULINTPF "\n",
- srv_n_rows_inserted,
- srv_n_rows_updated,
- srv_n_rows_deleted,
- srv_n_rows_read);
- fprintf(file,
- "%.2f inserts/s, %.2f updates/s,"
- " %.2f deletes/s, %.2f reads/s\n",
- (srv_n_rows_inserted - srv_n_rows_inserted_old)
- / time_elapsed,
- (srv_n_rows_updated - srv_n_rows_updated_old)
- / time_elapsed,
- (srv_n_rows_deleted - srv_n_rows_deleted_old)
- / time_elapsed,
- (srv_n_rows_read - srv_n_rows_read_old)
- / time_elapsed);
-
- srv_n_rows_inserted_old = srv_n_rows_inserted;
- srv_n_rows_updated_old = srv_n_rows_updated;
- srv_n_rows_deleted_old = srv_n_rows_deleted;
- srv_n_rows_read_old = srv_n_rows_read;
-
- /* Only if lock_print_info_summary proceeds correctly,
- before we call the lock_print_info_all_transactions
- to print all the lock information. */
- ret = lock_print_info_summary(file, nowait);
-
- if (ret) {
- if (trx_start) {
- long t = ftell(file);
- if (t < 0) {
- *trx_start = ULINT_UNDEFINED;
- } else {
- *trx_start = (ulint) t;
- }
- }
- lock_print_info_all_transactions(file);
- if (trx_end) {
- long t = ftell(file);
- if (t < 0) {
- *trx_end = ULINT_UNDEFINED;
- } else {
- *trx_end = (ulint) t;
- }
- }
- }
-
- fputs("----------------------------\n"
- "END OF INNODB MONITOR OUTPUT\n"
- "============================\n", file);
- mutex_exit(&srv_innodb_monitor_mutex);
- fflush(file);
-
- return(ret);
-}
-
-/******************************************************************//**
-Function to pass InnoDB status variables to MySQL */
-UNIV_INTERN
-void
-srv_export_innodb_status(void)
-/*==========================*/
-{
- buf_pool_stat_t stat;
- buf_pools_list_size_t buf_pools_list_size;
- ulint LRU_len;
- ulint free_len;
- ulint flush_list_len;
- ulint mem_adaptive_hash, mem_dictionary;
- read_view_t* oldest_view;
- ulint i;
-
- buf_get_total_stat(&stat);
- buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
- buf_get_total_list_size_in_bytes(&buf_pools_list_size);
-
- mem_adaptive_hash = 0;
-
- ut_ad(btr_search_sys->hash_tables);
-
- for (i = 0; i < btr_search_index_num; i++) {
- hash_table_t* ht = btr_search_sys->hash_tables[i];
-
- ut_ad(ht);
- ut_ad(ht->heap);
- /* Multiple mutexes/heaps are currently never used for adaptive
- hash index tables. */
- ut_ad(!ht->n_mutexes);
- ut_ad(!ht->heaps);
-
- mem_adaptive_hash += mem_heap_get_size(ht->heap);
- mem_adaptive_hash += ht->n_cells * sizeof(hash_cell_t);
- }
-
- mem_dictionary = (dict_sys ? ((dict_sys->table_hash->n_cells
- + dict_sys->table_id_hash->n_cells
- ) * sizeof(hash_cell_t)
- + dict_sys->size) : 0);
-
- mutex_enter(&srv_innodb_monitor_mutex);
-
- export_vars.innodb_adaptive_hash_cells = 0;
- export_vars.innodb_adaptive_hash_heap_buffers = 0;
- for (i = 0; i < btr_search_index_num; i++) {
- hash_table_t* table = btr_search_sys->hash_tables[i];
-
- export_vars.innodb_adaptive_hash_cells
- += hash_get_n_cells(table);
- export_vars.innodb_adaptive_hash_heap_buffers
- += (UT_LIST_GET_LEN(table->heap->base) - 1);
- }
- export_vars.innodb_adaptive_hash_hash_searches
- = btr_cur_n_sea;
- export_vars.innodb_adaptive_hash_non_hash_searches
- = btr_cur_n_non_sea;
- export_vars.innodb_background_log_sync
- = srv_log_writes_and_flush;
- export_vars.innodb_data_pending_reads
- = os_n_pending_reads;
- export_vars.innodb_data_pending_writes
- = os_n_pending_writes;
- export_vars.innodb_data_pending_fsyncs
- = fil_n_pending_log_flushes
- + fil_n_pending_tablespace_flushes;
- export_vars.innodb_data_fsyncs = os_n_fsyncs;
- export_vars.innodb_data_read = srv_data_read;
- export_vars.innodb_data_reads = os_n_file_reads;
- export_vars.innodb_data_writes = os_n_file_writes;
- export_vars.innodb_data_written = srv_data_written;
- export_vars.innodb_dict_tables= (dict_sys ? UT_LIST_GET_LEN(dict_sys->table_LRU) : 0);
- export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets;
- export_vars.innodb_buffer_pool_write_requests
- = srv_buf_pool_write_requests;
- export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free;
- export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed;
- export_vars.innodb_buffer_pool_pages_LRU_flushed = buf_lru_flush_page_count;
- export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads;
- export_vars.innodb_buffer_pool_read_ahead_rnd
- = stat.n_ra_pages_read_rnd;
- export_vars.innodb_buffer_pool_read_ahead
- = stat.n_ra_pages_read;
- export_vars.innodb_buffer_pool_read_ahead_evicted
- = stat.n_ra_pages_evicted;
- export_vars.innodb_buffer_pool_pages_data = LRU_len;
- export_vars.innodb_buffer_pool_bytes_data =
- buf_pools_list_size.LRU_bytes
- + buf_pools_list_size.unzip_LRU_bytes;
- export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
- export_vars.innodb_buffer_pool_bytes_dirty =
- buf_pools_list_size.flush_list_bytes;
- export_vars.innodb_buffer_pool_pages_free = free_len;
- export_vars.innodb_deadlocks = srv_n_lock_deadlock_count;
-#ifdef UNIV_DEBUG
- export_vars.innodb_buffer_pool_pages_latched
- = buf_get_latched_pages_number();
-#endif /* UNIV_DEBUG */
- export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages();
-
- export_vars.innodb_buffer_pool_pages_misc
- = buf_pool_get_n_pages() - LRU_len - free_len;
-
- export_vars.innodb_buffer_pool_pages_made_young
- = stat.n_pages_made_young;
- export_vars.innodb_buffer_pool_pages_made_not_young
- = stat.n_pages_not_made_young;
- export_vars.innodb_buffer_pool_pages_old = 0;
- for (i = 0; i < srv_buf_pool_instances; i++) {
- buf_pool_t* buf_pool = buf_pool_from_array(i);
- export_vars.innodb_buffer_pool_pages_old
- += buf_pool->LRU_old_len;
- }
- export_vars.innodb_checkpoint_age
- = (log_sys->lsn - log_sys->last_checkpoint_lsn);
- export_vars.innodb_checkpoint_max_age
- = log_sys->max_checkpoint_age;
- export_vars.innodb_checkpoint_target_age
- = srv_checkpoint_age_target
- ? ut_min(log_sys->max_checkpoint_age_async, srv_checkpoint_age_target)
- : log_sys->max_checkpoint_age_async;
- export_vars.innodb_history_list_length
- = trx_sys->rseg_history_len;
- ibuf_export_ibuf_status(
- &export_vars.innodb_ibuf_size,
- &export_vars.innodb_ibuf_free_list,
- &export_vars.innodb_ibuf_segment_size,
- &export_vars.innodb_ibuf_merges,
- &export_vars.innodb_ibuf_merged_inserts,
- &export_vars.innodb_ibuf_merged_delete_marks,
- &export_vars.innodb_ibuf_merged_deletes,
- &export_vars.innodb_ibuf_discarded_inserts,
- &export_vars.innodb_ibuf_discarded_delete_marks,
- &export_vars.innodb_ibuf_discarded_deletes);
- export_vars.innodb_lsn_current
- = log_sys->lsn;
- export_vars.innodb_lsn_flushed
- = log_sys->flushed_to_disk_lsn;
- export_vars.innodb_lsn_last_checkpoint
- = log_sys->last_checkpoint_lsn;
- export_vars.innodb_master_thread_1_second_loops
- = srv_main_1_second_loops;
- export_vars.innodb_master_thread_10_second_loops
- = srv_main_10_second_loops;
- export_vars.innodb_master_thread_background_loops
- = srv_main_background_loops;
- export_vars.innodb_master_thread_main_flush_loops
- = srv_main_flush_loops;
- export_vars.innodb_master_thread_sleeps
- = srv_main_sleeps;
- export_vars.innodb_max_trx_id
- = trx_sys->max_trx_id;
- export_vars.innodb_mem_adaptive_hash
- = mem_adaptive_hash;
- export_vars.innodb_mem_dictionary
- = mem_dictionary;
- export_vars.innodb_mem_total
- = ut_total_allocated_memory;
- export_vars.innodb_mutex_os_waits
- = mutex_os_wait_count;
- export_vars.innodb_mutex_spin_rounds
- = mutex_spin_round_count;
- export_vars.innodb_mutex_spin_waits
- = mutex_spin_wait_count;
- export_vars.innodb_s_lock_os_waits
- = rw_s_os_wait_count;
- export_vars.innodb_s_lock_spin_rounds
- = rw_s_spin_round_count;
- export_vars.innodb_s_lock_spin_waits
- = rw_s_spin_wait_count;
- export_vars.innodb_x_lock_os_waits
- = rw_x_os_wait_count;
- export_vars.innodb_x_lock_spin_rounds
- = rw_x_spin_round_count;
- export_vars.innodb_x_lock_spin_waits
- = rw_x_spin_wait_count;
-
- oldest_view = UT_LIST_GET_LAST(trx_sys->view_list);
- export_vars.innodb_oldest_view_low_limit_trx_id
- = oldest_view ? oldest_view->low_limit_id : 0;
-
- export_vars.innodb_purge_trx_id
- = purge_sys->purge_trx_no;
- export_vars.innodb_purge_undo_no
- = purge_sys->purge_undo_no;
- export_vars.innodb_current_row_locks
- = lock_sys->rec_num;
-
-#ifdef HAVE_ATOMIC_BUILTINS
- export_vars.innodb_have_atomic_builtins = 1;
-#else
- export_vars.innodb_have_atomic_builtins = 0;
-#endif
- export_vars.innodb_page_size = UNIV_PAGE_SIZE;
- export_vars.innodb_log_waits = srv_log_waits;
- export_vars.innodb_os_log_written = srv_os_log_written;
- export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
- export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
- export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes;
- export_vars.innodb_log_write_requests = srv_log_write_requests;
- export_vars.innodb_log_writes = srv_log_writes;
- export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written;
- export_vars.innodb_dblwr_writes = srv_dblwr_writes;
- export_vars.innodb_pages_created = stat.n_pages_created;
- export_vars.innodb_pages_read = stat.n_pages_read;
- export_vars.innodb_pages_written = stat.n_pages_written;
- export_vars.innodb_row_lock_waits = srv_n_lock_wait_count;
- export_vars.innodb_row_lock_current_waits
- = srv_n_lock_wait_current_count;
- export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000;
- if (srv_n_lock_wait_count > 0) {
- export_vars.innodb_row_lock_time_avg = (ulint)
- (srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count);
- } else {
- export_vars.innodb_row_lock_time_avg = 0;
- }
- export_vars.innodb_row_lock_time_max
- = srv_n_lock_max_wait_time / 1000;
- export_vars.innodb_rows_read = srv_n_rows_read;
- export_vars.innodb_rows_inserted = srv_n_rows_inserted;
- export_vars.innodb_rows_updated = srv_n_rows_updated;
- export_vars.innodb_rows_deleted = srv_n_rows_deleted;
- export_vars.innodb_truncated_status_writes = srv_truncated_status_writes;
- export_vars.innodb_read_views_memory = srv_read_views_memory;
- export_vars.innodb_descriptors_memory = srv_descriptors_memory;
-
-#ifdef UNIV_DEBUG
- {
- trx_id_t done_trx_no;
- trx_id_t up_limit_id;
-
- rw_lock_s_lock(&purge_sys->latch);
- done_trx_no = purge_sys->done_trx_no;
- up_limit_id = purge_sys->view
- ? purge_sys->view->up_limit_id
- : 0;
- rw_lock_s_unlock(&purge_sys->latch);
-
- if (trx_sys->max_trx_id < done_trx_no) {
- export_vars.innodb_purge_trx_id_age = 0;
- } else {
- export_vars.innodb_purge_trx_id_age =
- trx_sys->max_trx_id - done_trx_no;
- }
-
- if (!up_limit_id
- || trx_sys->max_trx_id < up_limit_id) {
- export_vars.innodb_purge_view_trx_id_age = 0;
- } else {
- export_vars.innodb_purge_view_trx_id_age =
- trx_sys->max_trx_id - up_limit_id;
- }
- }
-#endif /* UNIV_DEBUG */
-
- mutex_exit(&srv_innodb_monitor_mutex);
-}
-
-/*********************************************************************//**
-A thread which prints the info output by various InnoDB monitors.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-srv_monitor_thread(
-/*===============*/
- void* arg __attribute__((unused)))
- /*!< in: a dummy parameter required by
- os_thread_create */
-{
- ib_int64_t sig_count;
- double time_elapsed;
- time_t current_time;
- time_t last_table_monitor_time;
- time_t last_tablespace_monitor_time;
- time_t last_monitor_time;
- ulint mutex_skipped;
- ibool last_srv_print_monitor;
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Lock timeout thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(srv_monitor_thread_key);
-#endif
-
- UT_NOT_USED(arg);
- srv_last_monitor_time = ut_time();
- last_table_monitor_time = ut_time();
- last_tablespace_monitor_time = ut_time();
- last_monitor_time = ut_time();
- mutex_skipped = 0;
- last_srv_print_monitor = srv_print_innodb_monitor;
-loop:
- srv_monitor_active = TRUE;
-
- /* Wake up every 5 seconds to see if we need to print
- monitor information or if signalled at shutdown. */
-
- sig_count = os_event_reset(srv_monitor_event);
-
- os_event_wait_time_low(srv_monitor_event, 5000000, sig_count);
-
- current_time = ut_time();
-
- time_elapsed = difftime(current_time, last_monitor_time);
-
- if (time_elapsed > 15) {
- last_monitor_time = ut_time();
-
- if (srv_print_innodb_monitor) {
- /* Reset mutex_skipped counter everytime
- srv_print_innodb_monitor changes. This is to
- ensure we will not be blocked by kernel_mutex
- for short duration information printing,
- such as requested by sync_array_print_long_waits() */
- if (!last_srv_print_monitor) {
- mutex_skipped = 0;
- last_srv_print_monitor = TRUE;
- }
-
- if (!srv_printf_innodb_monitor(stderr,
- MUTEX_NOWAIT(mutex_skipped),
- NULL, NULL)) {
- mutex_skipped++;
- } else {
- /* Reset the counter */
- mutex_skipped = 0;
- }
- } else {
- last_srv_print_monitor = FALSE;
- }
-
-
- if (srv_innodb_status) {
- mutex_enter(&srv_monitor_file_mutex);
- rewind(srv_monitor_file);
- if (!srv_printf_innodb_monitor(srv_monitor_file,
- MUTEX_NOWAIT(mutex_skipped),
- NULL, NULL)) {
- mutex_skipped++;
- } else {
- mutex_skipped = 0;
- }
-
- os_file_set_eof(srv_monitor_file);
- mutex_exit(&srv_monitor_file_mutex);
- }
-
- if (srv_print_innodb_tablespace_monitor
- && difftime(current_time,
- last_tablespace_monitor_time) > 60) {
- last_tablespace_monitor_time = ut_time();
-
- fputs("========================"
- "========================\n",
- stderr);
-
- ut_print_timestamp(stderr);
-
- fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
- "========================"
- "========================\n",
- stderr);
-
- fsp_print(0);
- fputs("Validating tablespace\n", stderr);
- fsp_validate(0);
- fputs("Validation ok\n"
- "---------------------------------------\n"
- "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
- "=======================================\n",
- stderr);
- }
-
- if (srv_print_innodb_table_monitor
- && difftime(current_time, last_table_monitor_time) > 60) {
-
- last_table_monitor_time = ut_time();
-
- fputs("===========================================\n",
- stderr);
-
- ut_print_timestamp(stderr);
-
- fputs(" INNODB TABLE MONITOR OUTPUT\n"
- "===========================================\n",
- stderr);
- dict_print();
-
- fputs("-----------------------------------\n"
- "END OF INNODB TABLE MONITOR OUTPUT\n"
- "==================================\n",
- stderr);
- }
- }
-
- if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
- goto exit_func;
- }
-
- if (srv_print_innodb_monitor
- || srv_print_innodb_lock_monitor
- || srv_print_innodb_tablespace_monitor
- || srv_print_innodb_table_monitor) {
- goto loop;
- }
-
- srv_monitor_active = FALSE;
-
- goto loop;
-
-exit_func:
- srv_monitor_active = FALSE;
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/*********************************************************************//**
-A thread which wakes up threads whose lock wait may have lasted too long.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-srv_lock_timeout_thread(
-/*====================*/
- void* arg __attribute__((unused)))
- /* in: a dummy parameter required by
- os_thread_create */
-{
- srv_slot_t* slot;
- ibool some_waits;
- double wait_time;
- ulint i;
- ib_int64_t sig_count;
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(srv_lock_timeout_thread_key);
-#endif
-
-loop:
-
- /* When someone is waiting for a lock, we wake up every second
- and check if a timeout has passed for a lock wait */
-
- sig_count = os_event_reset(srv_timeout_event);
-
- os_event_wait_time_low(srv_timeout_event, 1000000, sig_count);
-
- srv_lock_timeout_active = TRUE;
-
- mutex_enter(&kernel_mutex);
-
- some_waits = FALSE;
-
- /* Check of all slots if a thread is waiting there, and if it
- has exceeded the time limit */
-
- for (i = 0; i < OS_THREAD_MAX_N; i++) {
-
- slot = srv_mysql_table + i;
-
- if (slot->in_use) {
- trx_t* trx;
- ulong lock_wait_timeout;
-
- some_waits = TRUE;
-
- wait_time = ut_difftime(ut_time(), slot->suspend_time);
-
- trx = thr_get_trx(slot->thr);
- lock_wait_timeout = thd_lock_wait_timeout(
- trx->mysql_thd);
-
- if (trx_is_interrupted(trx)
- || (lock_wait_timeout < 100000000
- && (wait_time > (double) lock_wait_timeout
- || wait_time < 0))) {
-
- /* Timeout exceeded or a wrap-around in system
- time counter: cancel the lock request queued
- by the transaction and release possible
- other transactions waiting behind; it is
- possible that the lock has already been
- granted: in that case do nothing */
-
- if (trx->wait_lock) {
- lock_cancel_waiting_and_release(
- trx->wait_lock);
- }
- }
- }
- }
-
- os_event_reset(srv_lock_timeout_thread_event);
-
- mutex_exit(&kernel_mutex);
-
- if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
- goto exit_func;
- }
-
- if (some_waits) {
- goto loop;
- }
-
- srv_lock_timeout_active = FALSE;
-
-#if 0
- /* The following synchronisation is disabled, since
- the InnoDB monitor output is to be updated every 15 seconds. */
- os_event_wait(srv_lock_timeout_thread_event);
-#endif
- goto loop;
-
-exit_func:
- srv_lock_timeout_active = FALSE;
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/*********************************************************************//**
-A thread which prints warnings about semaphore waits which have lasted
-too long. These can be used to track bugs which cause hangs.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-srv_error_monitor_thread(
-/*=====================*/
- void* arg __attribute__((unused)))
- /*!< in: a dummy parameter required by
- os_thread_create */
-{
- /* number of successive fatal timeouts observed */
- ulint fatal_cnt = 0;
- ib_uint64_t old_lsn;
- ib_uint64_t new_lsn;
- ib_int64_t sig_count;
- /* longest waiting thread for a semaphore */
- os_thread_id_t waiter = os_thread_get_curr_id();
- os_thread_id_t old_waiter = waiter;
- /* the semaphore that is being waited for */
- const void* sema = NULL;
- const void* old_sema = NULL;
-
- old_lsn = srv_start_lsn;
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Error monitor thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(srv_error_monitor_thread_key);
-#endif
-
-loop:
- srv_error_monitor_active = TRUE;
-
- /* Try to track a strange bug reported by Harald Fuchs and others,
- where the lsn seems to decrease at times */
-
- new_lsn = log_get_lsn();
-
- if (new_lsn < old_lsn) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: old log sequence number %llu"
- " was greater\n"
- "InnoDB: than the new log sequence number %llu!\n"
- "InnoDB: Please submit a bug report"
- " to http://bugs.mysql.com\n",
- old_lsn, new_lsn);
- ut_ad(0);
- }
-
- old_lsn = new_lsn;
-
- if (difftime(time(NULL), srv_last_monitor_time) > 60) {
- /* We referesh InnoDB Monitor values so that averages are
- printed from at most 60 last seconds */
-
- srv_refresh_innodb_monitor_stats();
- }
-
- /* Update the statistics collected for deciding LRU
- eviction policy. */
- buf_LRU_stat_update();
-
- /* Update the statistics collected for flush rate policy. */
- buf_flush_stat_update();
-
- /* In case mutex_exit is not a memory barrier, it is
- theoretically possible some threads are left waiting though
- the semaphore is already released. Wake up those threads: */
-
- sync_arr_wake_threads_if_sema_free();
-
- if (sync_array_print_long_waits(&waiter, &sema)
- && sema == old_sema && os_thread_eq(waiter, old_waiter)) {
- fatal_cnt++;
- if (fatal_cnt > 10) {
-
- fprintf(stderr,
- "InnoDB: Error: semaphore wait has lasted"
- " > %lu seconds\n"
- "InnoDB: We intentionally crash the server,"
- " because it appears to be hung.\n",
- (ulong) srv_fatal_semaphore_wait_threshold);
-
- ut_error;
- }
- } else {
- fatal_cnt = 0;
- old_waiter = waiter;
- old_sema = sema;
- }
-
- if (srv_kill_idle_transaction && trx_sys) {
- trx_t* trx;
- time_t now;
-rescan_idle:
- now = time(NULL);
- mutex_enter(&kernel_mutex);
- trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
- while (trx) {
- if (trx->state == TRX_ACTIVE
- && trx->mysql_thd
- && innobase_thd_is_idle(trx->mysql_thd)) {
- ib_int64_t start_time = innobase_thd_get_start_time(trx->mysql_thd);
- ulong thd_id = innobase_thd_get_thread_id(trx->mysql_thd);
-
- if (trx->last_stmt_start != start_time) {
- trx->idle_start = now;
- trx->last_stmt_start = start_time;
- } else if (difftime(now, trx->idle_start)
- > srv_kill_idle_transaction) {
- /* kill the session */
- mutex_exit(&kernel_mutex);
- innobase_thd_kill(thd_id);
- goto rescan_idle;
- }
- }
- trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
- }
- mutex_exit(&kernel_mutex);
- }
-
- /* Flush stderr so that a database user gets the output
- to possible MySQL error file */
-
- fflush(stderr);
-
- sig_count = os_event_reset(srv_error_event);
-
- os_event_wait_time_low(srv_error_event, 1000000, sig_count);
-
- if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
-
- goto loop;
- }
-
- srv_error_monitor_active = FALSE;
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/*********************************************************************//**
-A thread which restores the buffer pool from a dump file on startup and does
-periodic buffer pool dumps.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-srv_LRU_dump_restore_thread(
-/*====================*/
- void* arg __attribute__((unused)))
- /*!< in: a dummy parameter required by
- os_thread_create */
-{
- uint auto_lru_dump;
- time_t last_dump_time;
- time_t time_elapsed;
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "The LRU dump/restore thread has started, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
-
- /* If srv_blocking_lru_restore is TRUE, restore will be done
- synchronously on startup. */
- if (srv_auto_lru_dump && !srv_blocking_lru_restore)
- buf_LRU_file_restore();
-
- last_dump_time = time(NULL);
-
-loop:
- os_event_wait_time_low(srv_shutdown_event, 5000000, 0);
-
- if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
- goto exit_func;
- }
-
- time_elapsed = time(NULL) - last_dump_time;
- auto_lru_dump = srv_auto_lru_dump;
- if (auto_lru_dump > 0 && (time_t) auto_lru_dump < time_elapsed) {
- last_dump_time = time(NULL);
- buf_LRU_file_dump();
- }
-
- goto loop;
-exit_func:
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/**********************************************************************//**
-Check whether any background thread is active. If so return the thread
-type
-@return ULINT_UNDEFINED if all are suspended or have exited, thread
-type if any are still active. */
-UNIV_INTERN
-ulint
-srv_get_active_thread_type(void)
-/*============================*/
-{
- ulint i;
- ibool ret = ULINT_UNDEFINED;
-
- mutex_enter(&kernel_mutex);
-
- for (i = 0; i <= SRV_MASTER; ++i) {
- if (srv_n_threads_active[i] != 0) {
- ret = i;
- break;
- }
- }
-
- mutex_exit(&kernel_mutex);
-
- return(ret);
-}
-
-/*********************************************************************//**
-This function prints progress message every 60 seconds during server
-shutdown, for any activities that master thread is pending on. */
-static
-void
-srv_shutdown_print_master_pending(
-/*==============================*/
- ib_time_t* last_print_time, /*!< last time the function
- print the message */
- ulint n_tables_to_drop, /*!< number of tables to
- be dropped */
- ulint n_bytes_merged, /*!< number of change buffer
- just merged */
- ulint n_pages_flushed) /*!< number of pages flushed */
-{
- ib_time_t current_time;
- double time_elapsed;
-
- current_time = ut_time();
- time_elapsed = ut_difftime(current_time, *last_print_time);
-
- if (time_elapsed > 60) {
- *last_print_time = ut_time();
-
- if (n_tables_to_drop) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Waiting for "
- "%lu table(s) to be dropped\n",
- (ulong) n_tables_to_drop);
- }
-
- /* Check change buffer merge, we only wait for change buffer
- merge if it is a slow shutdown */
- if (!srv_fast_shutdown && n_bytes_merged) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Waiting for change "
- "buffer merge to complete\n"
- " InnoDB: number of bytes of change buffer "
- "just merged: %lu\n",
- n_bytes_merged);
- }
-
- if (n_pages_flushed) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Waiting for "
- "%lu pages to be flushed\n",
- (ulong) n_pages_flushed);
- }
- }
-}
-
-/******************************************************************//**
-A thread which follows the redo log and outputs the changed page bitmap.
-@return a dummy value */
-os_thread_ret_t
-srv_redo_log_follow_thread(
-/*=======================*/
- void* arg __attribute__((unused))) /*!< in: a dummy parameter
- required by
- os_thread_create */
-{
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Redo log follower thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(srv_log_tracking_thread_key);
-#endif
-
- my_thread_init();
-
- do {
- os_event_wait(srv_checkpoint_completed_event);
- os_event_reset(srv_checkpoint_completed_event);
-
-#ifdef UNIV_DEBUG
- if (!srv_track_changed_pages) {
- continue;
- }
-#endif
-
- if (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) {
- if (!log_online_follow_redo_log()) {
- /* TODO: sync with I_S log tracking status? */
- fprintf(stderr,
- "InnoDB: Error: log tracking bitmap "
- "write failed, stopping log tracking "
- "thread!\n");
- break;
- }
- }
-
- } while (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE);
-
- srv_track_changed_pages = FALSE;
- log_online_read_shutdown();
- os_event_set(srv_redo_log_thread_finished_event);
-
- my_thread_end();
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-
-/*******************************************************************//**
-Tells the InnoDB server that there has been activity in the database
-and wakes up the master thread if it is suspended (not sleeping). Used
-in the MySQL interface. Note that there is a small chance that the master
-thread stays suspended (we do not protect our operation with the
-srv_sys_t->mutex, for performance reasons). */
-UNIV_INTERN
-void
-srv_active_wake_master_thread(void)
-/*===============================*/
-{
- srv_activity_count++;
-
- if (srv_n_threads_active[SRV_MASTER] == 0) {
-
- mutex_enter(&kernel_mutex);
-
- srv_release_threads(SRV_MASTER, 1);
-
- mutex_exit(&kernel_mutex);
- }
-}
-
-/*******************************************************************//**
-Tells the purge thread that there has been activity in the database
-and wakes up the purge thread if it is suspended (not sleeping). Note
-that there is a small chance that the purge thread stays suspended
-(we do not protect our operation with the kernel mutex, for
-performace reasons). */
-UNIV_INTERN
-void
-srv_wake_purge_thread_if_not_active(void)
-/*=====================================*/
-{
- ut_ad(!mutex_own(&kernel_mutex));
-
- if (srv_n_purge_threads > 0
- && srv_n_threads_active[SRV_WORKER] == 0) {
-
- mutex_enter(&kernel_mutex);
-
- srv_release_threads(SRV_WORKER, 1);
-
- mutex_exit(&kernel_mutex);
- }
-}
-
-/*******************************************************************//**
-Wakes up the master thread if it is suspended or being suspended. */
-UNIV_INTERN
-void
-srv_wake_master_thread(void)
-/*========================*/
-{
- srv_activity_count++;
-
- mutex_enter(&kernel_mutex);
-
- srv_release_threads(SRV_MASTER, 1);
-
- mutex_exit(&kernel_mutex);
-}
-
-/*******************************************************************//**
-Wakes up the purge thread if it's not already awake. */
-UNIV_INTERN
-void
-srv_wake_purge_thread(void)
-/*=======================*/
-{
- ut_ad(!mutex_own(&kernel_mutex));
-
- if (srv_n_purge_threads > 0) {
-
- mutex_enter(&kernel_mutex);
-
- srv_release_threads(SRV_WORKER, 1);
-
- mutex_exit(&kernel_mutex);
- }
-}
-
-/**********************************************************************
-The master thread is tasked to ensure that flush of log file happens
-once every second in the background. This is to ensure that not more
-than one second of trxs are lost in case of crash when
-innodb_flush_logs_at_trx_commit != 1 */
-static
-void
-srv_sync_log_buffer_in_background(void)
-/*===================================*/
-{
- time_t current_time = time(NULL);
-
- srv_main_thread_op_info = "flushing log";
- if (difftime(current_time, srv_last_log_flush_time) >= 1) {
- log_buffer_sync_in_background(TRUE);
- srv_last_log_flush_time = current_time;
- srv_log_writes_and_flush++;
- }
-}
-
-/********************************************************************//**
-Do a full purge, reconfigure the purge sub-system if a dynamic
-change is detected. */
-static
-void
-srv_master_do_purge(void)
-/*=====================*/
-{
- ulint n_pages_purged;
-
- ut_ad(!mutex_own(&kernel_mutex));
-
- ut_a(srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0));
-
- do {
- /* Check for shutdown and change in purge config. */
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
- /* Nothing to purge. */
- n_pages_purged = 0;
- } else {
- n_pages_purged = trx_purge(srv_purge_batch_size);
- }
-
- srv_sync_log_buffer_in_background();
-
- } while (n_pages_purged > 0);
-}
-
-/*********************************************************************//**
-The master thread controlling the server.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-srv_master_thread(
-/*==============*/
- void* arg __attribute__((unused)))
- /*!< in: a dummy parameter required by
- os_thread_create */
-{
- buf_pool_stat_t buf_stat;
- srv_slot_t* slot;
- ulint old_activity_count;
- ulint n_pages_purged = 0;
- ulint n_bytes_merged;
- ulint n_pages_flushed;
- ulint n_pages_flushed_prev = 0;
- ulint n_bytes_archived;
- ulint n_tables_to_drop;
- ulint n_ios;
- ulint n_ios_old;
- ulint n_ios_very_old;
- ulint n_pend_ios;
- ulint next_itr_time;
- ulint prev_adaptive_flushing_method = ULINT_UNDEFINED;
- ulint inner_loop = 0;
- ibool skip_sleep = FALSE;
- ulint i;
- struct t_prev_flush_info_struct {
- ulint count;
- unsigned space:32;
- unsigned offset:32;
- ib_uint64_t oldest_modification;
- } prev_flush_info[MAX_BUFFER_POOLS];
-
- ib_uint64_t lsn_old;
-
- ib_uint64_t oldest_lsn;
- ib_time_t last_print_time;
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Master thread starts, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(srv_master_thread_key);
-#endif
-
- srv_main_thread_process_no = os_proc_get_number();
- srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
-
- memset(&prev_flush_info, 0, sizeof(prev_flush_info));
- mutex_enter(&kernel_mutex);
-
- slot = srv_table_reserve_slot(SRV_MASTER);
-
- srv_n_threads_active[SRV_MASTER]++;
-
- mutex_exit(&kernel_mutex);
-
- mutex_enter(&(log_sys->mutex));
- lsn_old = log_sys->lsn;
- mutex_exit(&(log_sys->mutex));
-
- last_print_time = ut_time();
-
-loop:
- /*****************************************************************/
- /* ---- When there is database activity by users, we cycle in this
- loop */
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- buf_get_total_stat(&buf_stat);
- n_ios_very_old = log_sys->n_log_ios + buf_stat.n_pages_read
- + buf_stat.n_pages_written;
- n_pages_flushed= 0;
-
- mutex_enter(&kernel_mutex);
-
- /* Store the user activity counter at the start of this loop */
- old_activity_count = srv_activity_count;
-
- mutex_exit(&kernel_mutex);
-
- if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
-
- goto suspend_thread;
- }
-
- /* ---- We run the following loop approximately once per second
- when there is database activity */
-
- srv_last_log_flush_time = time(NULL);
-
- /* Sleep for 1 second on entrying the for loop below the first time. */
- next_itr_time = ut_time_ms() + 1000;
-
- skip_sleep = FALSE;
-
- for (i = 0; i < 10; i++) {
- ulint cur_time = ut_time_ms();
-
-#ifdef UNIV_DEBUG
- if (btr_cur_limit_optimistic_insert_debug
- && srv_n_purge_threads == 0) {
- /* If btr_cur_limit_optimistic_insert_debug is enabled
- and no purge_threads, purge opportunity is increased
- by x100 (1purge/100msec), to speed up debug scripts
- which should wait for purged. */
- next_itr_time -= 900;
-
- srv_main_thread_op_info = "master purging";
-
- srv_master_do_purge();
-
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
-
- goto background_loop;
- }
- }
-#endif /* UNIV_DEBUG */
-
- n_pages_flushed = 0; /* initialize */
-
- /* ALTER TABLE in MySQL requires on Unix that the table handler
- can drop tables lazily after there no longer are SELECT
- queries to them. */
-
- srv_main_thread_op_info = "doing background drop tables";
-
- row_drop_tables_for_mysql_in_background();
-
- srv_main_thread_op_info = "";
-
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
-
- goto background_loop;
- }
-
- buf_get_total_stat(&buf_stat);
-
- n_ios_old = log_sys->n_log_ios + buf_stat.n_pages_read
- + buf_stat.n_pages_written;
-
- srv_main_thread_op_info = "sleeping";
- srv_main_1_second_loops++;
-
- if (!skip_sleep) {
- if (next_itr_time > cur_time
- && srv_shutdown_state == SRV_SHUTDOWN_NONE) {
-
- /* Get sleep interval in micro seconds. We use
- ut_min() to avoid long sleep in case of
- wrap around. */
- os_event_wait_time_low(srv_shutdown_event,
- ut_min(1000000,
- (next_itr_time - cur_time)
- * 1000),
- 0);
- srv_main_sleeps++;
-
- /*
- mutex_enter(&(log_sys->mutex));
- oldest_lsn = buf_pool_get_oldest_modification();
- ib_uint64_t lsn = log_sys->lsn;
- mutex_exit(&(log_sys->mutex));
-
- if(oldest_lsn)
- fprintf(stderr,
- "InnoDB flush: age pct: %lu, lsn progress: %lu\n",
- (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
- lsn - lsn_old);
- */
- }
-
- /* Each iteration should happen at 1 second interval. */
- next_itr_time = ut_time_ms() + 1000;
- } /* if (!skip_sleep) */
-
- skip_sleep = FALSE;
-
- /* Flush logs if needed */
- srv_sync_log_buffer_in_background();
-
- srv_main_thread_op_info = "making checkpoint";
- log_free_check();
-
- /* If i/os during one second sleep were less than 5% of
- capacity, we assume that there is free disk i/o capacity
- available, and it makes sense to do an insert buffer merge. */
-
- buf_get_total_stat(&buf_stat);
- n_pend_ios = buf_get_n_pending_ios()
- + log_sys->n_pending_writes;
- n_ios = log_sys->n_log_ios + buf_stat.n_pages_read
- + buf_stat.n_pages_written;
- if (n_pend_ios < SRV_PEND_IO_THRESHOLD
- && (n_ios - n_ios_old < SRV_RECENT_IO_ACTIVITY)) {
- srv_main_thread_op_info = "doing insert buffer merge";
- ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
-
- /* Flush logs if needed */
- srv_sync_log_buffer_in_background();
- }
-
- if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
- > srv_max_buf_pool_modified_pct)) {
-
- /* Try to keep the number of modified pages in the
- buffer pool under the limit wished by the user */
-
- srv_main_thread_op_info =
- "flushing buffer pool pages";
- n_pages_flushed = buf_flush_list(
- PCT_IO(100), IB_ULONGLONG_MAX);
-
- mutex_enter(&(log_sys->mutex));
- lsn_old = log_sys->lsn;
- mutex_exit(&(log_sys->mutex));
- prev_adaptive_flushing_method = ULINT_UNDEFINED;
- } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 0) {
-
- /* Try to keep the rate of flushing of dirty
- pages such that redo log generation does not
- produce bursts of IO at checkpoint time. */
- ulint n_flush = buf_flush_get_desired_flush_rate();
-
- if (n_flush) {
- srv_main_thread_op_info =
- "flushing buffer pool pages";
- n_flush = ut_min(PCT_IO(100), n_flush);
- n_pages_flushed =
- buf_flush_list(
- n_flush,
- IB_ULONGLONG_MAX);
- }
-
- mutex_enter(&(log_sys->mutex));
- lsn_old = log_sys->lsn;
- mutex_exit(&(log_sys->mutex));
- prev_adaptive_flushing_method = ULINT_UNDEFINED;
- } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 1) {
-
- /* Try to keep modified age not to exceed
- max_checkpoint_age * 7/8 line */
-
- mutex_enter(&(log_sys->mutex));
-
- oldest_lsn = buf_pool_get_oldest_modification();
- if (oldest_lsn == 0) {
- lsn_old = log_sys->lsn;
- mutex_exit(&(log_sys->mutex));
-
- } else {
- if ((log_sys->lsn - oldest_lsn)
- > (log_sys->max_checkpoint_age) - ((log_sys->max_checkpoint_age) / 8)) {
- /* LOG_POOL_PREFLUSH_RATIO_ASYNC is exceeded. */
- /* We should not flush from here. */
- lsn_old = log_sys->lsn;
- mutex_exit(&(log_sys->mutex));
- } else if ((log_sys->lsn - oldest_lsn)
- > (log_sys->max_checkpoint_age)/4 ) {
-
- /* defence line (max_checkpoint_age * 1/2) */
- ib_uint64_t lsn = log_sys->lsn;
-
- ib_uint64_t level, bpl;
- buf_page_t* bpage;
- ulint j;
-
- mutex_exit(&(log_sys->mutex));
-
- bpl = 0;
-
- for (j = 0; j < srv_buf_pool_instances; j++) {
- buf_pool_t* buf_pool;
- ulint n_blocks;
-
- buf_pool = buf_pool_from_array(j);
-
- buf_flush_list_mutex_enter(buf_pool);
- level = 0;
- n_blocks = 0;
- bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
-
- while (bpage != NULL) {
- ib_uint64_t oldest_modification = bpage->oldest_modification;
- if (oldest_modification != 0) {
- level += log_sys->max_checkpoint_age
- - (lsn - oldest_modification);
- }
- bpage = UT_LIST_GET_NEXT(flush_list, bpage);
- n_blocks++;
- }
- buf_flush_list_mutex_exit(buf_pool);
-
- if (level) {
- bpl += ((ib_uint64_t) n_blocks * n_blocks
- * (lsn - lsn_old)) / level;
- }
-
- }
-
- if (!srv_use_doublewrite_buf) {
- /* flush is faster than when doublewrite */
- bpl = (bpl * 7) / 8;
- }
-
- if (bpl) {
-retry_flush_batch:
- n_pages_flushed = buf_flush_list(bpl,
- oldest_lsn + (lsn - lsn_old));
- if (n_pages_flushed == ULINT_UNDEFINED) {
- os_thread_sleep(5000);
- goto retry_flush_batch;
- }
- }
-
- lsn_old = lsn;
- /*
- fprintf(stderr,
- "InnoDB flush: age pct: %lu, lsn progress: %lu, blocks to flush:%llu\n",
- (lsn - oldest_lsn) * 100 / log_sys->max_checkpoint_age,
- lsn - lsn_old, bpl);
- */
- } else {
- lsn_old = log_sys->lsn;
- mutex_exit(&(log_sys->mutex));
- }
- }
- prev_adaptive_flushing_method = 1;
- } else if (srv_adaptive_flushing && srv_adaptive_flushing_method == 2) {
- buf_pool_t* buf_pool;
- buf_page_t* bpage;
- ib_uint64_t lsn;
- ulint j;
-
- mutex_enter(&(log_sys->mutex));
- oldest_lsn = buf_pool_get_oldest_modification();
- lsn = log_sys->lsn;
- mutex_exit(&(log_sys->mutex));
-
- /* upper loop/sec. (x10) */
- next_itr_time -= 900; /* 1000 - 900 == 100 */
- inner_loop++;
- if (inner_loop < 10) {
- i--;
- } else {
- inner_loop = 0;
- }
-
- if (prev_adaptive_flushing_method == 2) {
- lint n_flush;
- lint blocks_sum;
- ulint new_blocks_sum, flushed_blocks_sum;
-
- blocks_sum = new_blocks_sum = flushed_blocks_sum = 0;
-
- /* prev_flush_info[j] should be the previous loop's */
- for (j = 0; j < srv_buf_pool_instances; j++) {
- lint blocks_num, new_blocks_num = 0;
- lint flushed_blocks_num;
-
- buf_pool = buf_pool_from_array(j);
- buf_flush_list_mutex_enter(buf_pool);
-
- blocks_num = UT_LIST_GET_LEN(buf_pool->flush_list);
- bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
-
- while (bpage != NULL) {
- if (prev_flush_info[j].space == bpage->space
- && prev_flush_info[j].offset == bpage->offset
- && prev_flush_info[j].oldest_modification
- == bpage->oldest_modification) {
- break;
- }
- bpage = UT_LIST_GET_NEXT(flush_list, bpage);
- new_blocks_num++;
- }
-
- flushed_blocks_num = new_blocks_num + prev_flush_info[j].count
- - blocks_num;
- if (flushed_blocks_num < 0) {
- flushed_blocks_num = 0;
- }
-
- bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
-
- prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
- if (bpage) {
- prev_flush_info[j].space = bpage->space;
- prev_flush_info[j].offset = bpage->offset;
- prev_flush_info[j].oldest_modification = bpage->oldest_modification;
- buf_flush_list_mutex_exit(buf_pool);
- } else {
- buf_flush_list_mutex_exit(buf_pool);
- prev_flush_info[j].space = 0;
- prev_flush_info[j].offset = 0;
- prev_flush_info[j].oldest_modification = 0;
- }
-
- new_blocks_sum += new_blocks_num;
- flushed_blocks_sum += flushed_blocks_num;
- blocks_sum += blocks_num;
- }
-
- n_flush = (lint) (blocks_sum * (lsn - lsn_old) / log_sys->max_modified_age_async);
- if ((ulint) flushed_blocks_sum > n_pages_flushed_prev) {
- n_flush -= (flushed_blocks_sum - n_pages_flushed_prev);
- }
-
- if (n_flush > 0) {
- n_flush++;
- n_pages_flushed = buf_flush_list(n_flush, oldest_lsn + (lsn - lsn_old));
- } else {
- n_pages_flushed = 0;
- }
- } else {
- /* store previous first pages of the flush_list */
- for (j = 0; j < srv_buf_pool_instances; j++) {
- buf_pool = buf_pool_from_array(j);
- buf_flush_list_mutex_enter(buf_pool);
-
- bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
-
- prev_flush_info[j].count = UT_LIST_GET_LEN(buf_pool->flush_list);
- if (bpage) {
- prev_flush_info[j].space = bpage->space;
- prev_flush_info[j].offset = bpage->offset;
- prev_flush_info[j].oldest_modification = bpage->oldest_modification;
- buf_flush_list_mutex_exit(buf_pool);
- } else {
- buf_flush_list_mutex_exit(buf_pool);
- prev_flush_info[j].space = 0;
- prev_flush_info[j].offset = 0;
- prev_flush_info[j].oldest_modification = 0;
- }
- }
- n_pages_flushed = 0;
- }
-
- lsn_old = lsn;
- prev_adaptive_flushing_method = 2;
- } else {
- mutex_enter(&(log_sys->mutex));
- lsn_old = log_sys->lsn;
- mutex_exit(&(log_sys->mutex));
- prev_adaptive_flushing_method = ULINT_UNDEFINED;
- }
-
- if (n_pages_flushed == ULINT_UNDEFINED) {
- n_pages_flushed_prev = 0;
- } else {
- n_pages_flushed_prev = n_pages_flushed;
- }
-
- if (srv_activity_count == old_activity_count) {
-
- /* There is no user activity at the moment, go to
- the background loop */
-
- goto background_loop;
- }
- }
-
- /* ---- We perform the following code approximately once per
- 10 seconds when there is database activity */
-
-#ifdef MEM_PERIODIC_CHECK
- /* Check magic numbers of every allocated mem block once in 10
- seconds */
- mem_validate_all_blocks();
-#endif
- /* If i/os during the 10 second period were less than 200% of
- capacity, we assume that there is free disk i/o capacity
- available, and it makes sense to flush srv_io_capacity pages.
-
- Note that this is done regardless of the fraction of dirty
- pages relative to the max requested by the user. The one second
- loop above requests writes for that case. The writes done here
- are not required, and may be disabled. */
-
- buf_get_total_stat(&buf_stat);
- n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
- n_ios = log_sys->n_log_ios + buf_stat.n_pages_read
- + buf_stat.n_pages_written;
-
- srv_main_10_second_loops++;
- if (n_pend_ios < SRV_PEND_IO_THRESHOLD
- && (n_ios - n_ios_very_old < SRV_PAST_IO_ACTIVITY)) {
-
- srv_main_thread_op_info = "flushing buffer pool pages";
- buf_flush_list(PCT_IO(100), IB_ULONGLONG_MAX);
-
- /* Flush logs if needed */
- srv_sync_log_buffer_in_background();
- }
-
- /* We run a batch of insert buffer merge every 10 seconds,
- even if the server were active */
-
- srv_main_thread_op_info = "doing insert buffer merge";
- ibuf_contract_for_n_pages(FALSE, PCT_IBUF_IO(5));
-
- /* Flush logs if needed */
- srv_sync_log_buffer_in_background();
-
- if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) {
- srv_main_thread_op_info = "master purging";
-
- srv_master_do_purge();
-
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
-
- goto background_loop;
- }
- }
-
- srv_main_thread_op_info = "flushing buffer pool pages";
-
- /* Flush a few oldest pages to make a new checkpoint younger */
-
- if (buf_get_modified_ratio_pct() > 70) {
-
- /* If there are lots of modified pages in the buffer pool
- (> 70 %), we assume we can afford reserving the disk(s) for
- the time it requires to flush 100 pages */
-
- n_pages_flushed = buf_flush_list(
- PCT_IO(100), IB_ULONGLONG_MAX);
- } else {
- /* Otherwise, we only flush a small number of pages so that
- we do not unnecessarily use much disk i/o capacity from
- other work */
-
- n_pages_flushed = buf_flush_list(
- PCT_IO(10), IB_ULONGLONG_MAX);
- }
-
- srv_main_thread_op_info = "making checkpoint";
-
- /* Make a new checkpoint about once in 10 seconds */
-
- log_checkpoint(TRUE, FALSE, TRUE);
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
-
- /* ---- When there is database activity, we jump from here back to
- the start of loop */
-
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
-
- mutex_exit(&kernel_mutex);
-
- /* If the database is quiet, we enter the background loop */
-
- /*****************************************************************/
-background_loop:
- /* ---- In this loop we run background operations when the server
- is quiet from user activity. Also in the case of a shutdown, we
- loop here, flushing the buffer pool to the data files. */
-
- /* The server has been quiet for a while: start running background
- operations */
- srv_main_background_loops++;
- srv_main_thread_op_info = "doing background drop tables";
-
- n_tables_to_drop = row_drop_tables_for_mysql_in_background();
-
- if (n_tables_to_drop > 0) {
- /* Do not monopolize the CPU even if there are tables waiting
- in the background drop queue. (It is essentially a bug if
- MySQL tries to drop a table while there are still open handles
- to it and we had to put it to the background drop queue.) */
-
- if (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
- os_thread_sleep(100000);
- }
- }
-
- if (srv_n_purge_threads == 0 || (srv_shutdown_state > 0 && srv_n_threads_active[SRV_WORKER] == 0)) {
- srv_main_thread_op_info = "master purging";
-
- srv_master_do_purge();
- }
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
- mutex_exit(&kernel_mutex);
-
- srv_main_thread_op_info = "doing insert buffer merge";
-
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
- n_bytes_merged = 0;
- } else {
- /* This should do an amount of IO similar to the number of
- dirty pages that will be flushed in the call to
- buf_flush_list below. Otherwise, the system favors
- clean pages over cleanup throughput. */
- n_bytes_merged = ibuf_contract_for_n_pages(FALSE,
- PCT_IBUF_IO(100));
- }
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
- mutex_exit(&kernel_mutex);
-
-flush_loop:
- srv_main_thread_op_info = "flushing buffer pool pages";
- srv_main_flush_loops++;
- if (srv_fast_shutdown < 2 || srv_shutdown_state == SRV_SHUTDOWN_NONE) {
- n_pages_flushed = buf_flush_list(
- PCT_IO(100), IB_ULONGLONG_MAX);
- } else {
- /* In the fastest shutdown we do not flush the buffer pool
- to data files: we set n_pages_flushed to 0 artificially. */
- ut_ad(srv_fast_shutdown == 2);
- ut_ad(srv_shutdown_state > 0);
-
- n_pages_flushed = 0;
-
- DBUG_PRINT("master", ("doing very fast shutdown"));
- }
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
- mutex_exit(&kernel_mutex);
-
- srv_main_thread_op_info = "waiting for buffer pool flush to end";
- buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
-
- /* Flush logs if needed */
- srv_sync_log_buffer_in_background();
-
- srv_main_thread_op_info = "making checkpoint";
-
- log_checkpoint(TRUE, FALSE, TRUE);
-
- if (!(srv_fast_shutdown == 2 && srv_shutdown_state > 0)
- && (buf_get_modified_ratio_pct()
- > srv_max_buf_pool_modified_pct)) {
-
- /* If the server is doing a very fast shutdown, then
- we will not come here. */
-
- /* Try to keep the number of modified pages in the
- buffer pool under the limit wished by the user */
-
- goto flush_loop;
- }
-
- srv_main_thread_op_info = "reserving kernel mutex";
-
- mutex_enter(&kernel_mutex);
- if (srv_activity_count != old_activity_count) {
- mutex_exit(&kernel_mutex);
- goto loop;
- }
- mutex_exit(&kernel_mutex);
- /*
- srv_main_thread_op_info = "archiving log (if log archive is on)";
-
- log_archive_do(FALSE, &n_bytes_archived);
- */
- n_bytes_archived = 0;
-
- /* Print progress message every 60 seconds during shutdown */
- if (srv_shutdown_state > 0 && srv_print_verbose_log) {
- srv_shutdown_print_master_pending(&last_print_time,
- n_tables_to_drop,
- n_bytes_merged,
- n_pages_flushed);
- }
-
- /* Keep looping in the background loop if still work to do */
-
- if (srv_fast_shutdown && srv_shutdown_state > 0) {
- if (n_tables_to_drop + n_pages_flushed
- + n_bytes_archived != 0) {
-
- /* If we are doing a fast shutdown (= the default)
- we do not do purge or insert buffer merge. But we
- flush the buffer pool completely to disk.
- In a 'very fast' shutdown we do not flush the buffer
- pool to data files: we have set n_pages_flushed to
- 0 artificially. */
-
- goto background_loop;
- }
- } else if (n_tables_to_drop
- + n_pages_purged + n_bytes_merged + n_pages_flushed
- + n_bytes_archived != 0) {
-
- /* In a 'slow' shutdown we run purge and the insert buffer
- merge to completion */
-
- goto background_loop;
- }
-
- /* There is no work for background operations either: suspend
- master thread to wait for more server activity */
-
-suspend_thread:
- srv_main_thread_op_info = "suspending";
-
- mutex_enter(&kernel_mutex);
-
- if (row_get_background_drop_list_len_low() > 0) {
- mutex_exit(&kernel_mutex);
-
- goto loop;
- }
-
- srv_suspend_thread(slot);
-
- mutex_exit(&kernel_mutex);
-
- /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
- waits for database activity to die down when converting < 4.1.x
- databases, and relies on this string being exactly as it is. InnoDB
- manual also mentions this string in several places. */
- srv_main_thread_op_info = "waiting for server activity";
-
- os_event_wait(slot->event);
-
- if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
- os_thread_exit(NULL);
- }
-
- /* When there is user activity, InnoDB will set the event and the
- main thread goes back to loop. */
-
- goto loop;
-}
-
-/*********************************************************************//**
-Asynchronous purge thread.
-@return a dummy parameter */
-UNIV_INTERN
-os_thread_ret_t
-srv_purge_thread(
-/*=============*/
- void* arg __attribute__((unused))) /*!< in: a dummy parameter
- required by os_thread_create */
-{
- srv_slot_t* slot;
- ulint retries = 0;
- ulint n_total_purged = ULINT_UNDEFINED;
- ulint next_itr_time;
- ib_int64_t sig_count;
-
- ut_a(srv_n_purge_threads == 1);
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(srv_purge_thread_key);
-#endif /* UNIV_PFS_THREAD */
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "InnoDB: Purge thread running, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif /* UNIV_DEBUG_THREAD_CREATION */
-
- mutex_enter(&kernel_mutex);
-
- slot = srv_table_reserve_slot(SRV_WORKER);
-
- ++srv_n_threads_active[SRV_WORKER];
-
- mutex_exit(&kernel_mutex);
-
- next_itr_time = ut_time_ms();
-
- while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
-
- ulint n_pages_purged = 0;
- ulint cur_time;
-
- /* If there are very few records to purge or the last
- purge didn't purge any records then wait for activity.
- We peek at the history len without holding any mutex
- because in the worst case we will end up waiting for
- the next purge event. */
- if (trx_sys->rseg_history_len < srv_purge_batch_size
- || (n_total_purged == 0
- && retries >= TRX_SYS_N_RSEGS)) {
-
- mutex_enter(&kernel_mutex);
-
- srv_suspend_thread(slot);
-
- mutex_exit(&kernel_mutex);
-
- os_event_wait(slot->event);
-
- retries = 0;
- }
-
- /* Check for shutdown and whether we should do purge at all. */
- if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND
- || srv_shutdown_state != 0
- || srv_fast_shutdown) {
-
- break;
- }
-
- if (n_total_purged == 0 && retries <= TRX_SYS_N_RSEGS) {
- ++retries;
- } else if (n_total_purged > 0) {
- retries = 0;
- n_total_purged = 0;
- }
-
- /* Purge until there are no more records to purge and there is
- no change in configuration or server state. */
- do {
- n_pages_purged = trx_purge(srv_purge_batch_size);
-
- n_total_purged += n_pages_purged;
-
- } while (n_pages_purged > 0 && !srv_fast_shutdown);
-
- srv_sync_log_buffer_in_background();
-
- cur_time = ut_time_ms();
- sig_count = os_event_reset(srv_shutdown_event);
- if (next_itr_time > cur_time) {
- os_event_wait_time_low(srv_shutdown_event,
- ut_min(1000000,
- (next_itr_time - cur_time)
- * 1000),
- sig_count);
- next_itr_time = ut_time_ms() + 1000;
- } else {
- next_itr_time = cur_time + 1000;
- }
- }
-
- mutex_enter(&kernel_mutex);
-
- /* Decrement the active count. */
- srv_suspend_thread(slot);
-
- slot->in_use = FALSE;
-
- mutex_exit(&kernel_mutex);
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "InnoDB: Purge thread exiting, id %lu\n",
- os_thread_pf(os_thread_get_curr_id()));
-#endif /* UNIV_DEBUG_THREAD_CREATION */
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit. */
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
-}
-
-/**********************************************************************//**
-Enqueues a task to server task queue and releases a worker thread, if there
-is a suspended one. */
-UNIV_INTERN
-void
-srv_que_task_enqueue_low(
-/*=====================*/
- que_thr_t* thr) /*!< in: query thread */
-{
- ut_ad(thr);
-
- mutex_enter(&kernel_mutex);
-
- UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
-
- srv_release_threads(SRV_WORKER, 1);
-
- mutex_exit(&kernel_mutex);
-}
diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc
new file mode 100644
index 00000000000..953bbba11f7
--- /dev/null
+++ b/storage/xtradb/srv/srv0srv.cc
@@ -0,0 +1,3508 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2008, 2009 Google Inc.
+Copyright (c) 2009, Percona Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file srv/srv0srv.cc
+The database server main program
+
+Created 10/8/1995 Heikki Tuuri
+*******************************************************/
+
+/* Dummy comment */
+#include "srv0srv.h"
+
+#include "ut0mem.h"
+#include "ut0ut.h"
+#include "os0proc.h"
+#include "mem0mem.h"
+#include "mem0pool.h"
+#include "sync0sync.h"
+#include "que0que.h"
+#include "log0online.h"
+#include "log0recv.h"
+#include "pars0pars.h"
+#include "usr0sess.h"
+#include "lock0lock.h"
+#include "trx0purge.h"
+#include "ibuf0ibuf.h"
+#include "buf0flu.h"
+#include "buf0lru.h"
+#include "btr0sea.h"
+#include "dict0load.h"
+#include "dict0boot.h"
+#include "dict0stats_bg.h" /* dict_stats_event */
+#include "srv0start.h"
+#include "row0mysql.h"
+#include "ha_prototypes.h"
+#include "trx0i_s.h"
+#include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
+#include "srv0mon.h"
+#include "ut0crc32.h"
+#include "os0file.h"
+
+#include "mysql/plugin.h"
+#include "mysql/service_thd_wait.h"
+
+/* prototypes of new functions added to ha_innodb.cc for kill_idle_transaction */
+ibool innobase_thd_is_idle(const void* thd);
+ib_int64_t innobase_thd_get_start_time(const void* thd);
+void innobase_thd_kill(ulong thd_id);
+ulong innobase_thd_get_thread_id(const void* thd);
+
+/* prototypes for new functions added to ha_innodb.cc */
+ibool innobase_get_slow_log();
+
+/* The following is the maximum allowed duration of a lock wait. */
+UNIV_INTERN ulint srv_fatal_semaphore_wait_threshold = 600;
+
+/**/
+UNIV_INTERN long long srv_kill_idle_transaction = 0;
+
+/* How much data manipulation language (DML) statements need to be delayed,
+in microseconds, in order to reduce the lagging of the purge thread. */
+UNIV_INTERN ulint srv_dml_needed_delay = 0;
+
+UNIV_INTERN ibool srv_monitor_active = FALSE;
+UNIV_INTERN ibool srv_error_monitor_active = FALSE;
+
+UNIV_INTERN ibool srv_buf_dump_thread_active = FALSE;
+
+UNIV_INTERN ibool srv_dict_stats_thread_active = FALSE;
+
+UNIV_INTERN const char* srv_main_thread_op_info = "";
+
+/** Prefix used by MySQL to indicate pre-5.1 table name encoding */
+const char srv_mysql50_table_name_prefix[10] = "#mysql50#";
+
+/* Server parameters which are read from the initfile */
+
+/* The following three are dir paths which are catenated before file
+names, where the file name itself may also contain a path */
+
+UNIV_INTERN char* srv_data_home = NULL;
+
+/** Rollback files directory, can be absolute. */
+UNIV_INTERN char* srv_undo_dir = NULL;
+
+/** The number of tablespaces to use for rollback segments. */
+UNIV_INTERN ulong srv_undo_tablespaces = 8;
+
+/** The number of UNDO tablespaces that are open and ready to use. */
+UNIV_INTERN ulint srv_undo_tablespaces_open = 8;
+
+/* The number of rollback segments to use */
+UNIV_INTERN ulong srv_undo_logs = 1;
+
+#ifdef UNIV_LOG_ARCHIVE
+UNIV_INTERN char* srv_arch_dir = NULL;
+UNIV_INTERN ulong srv_log_arch_expire_sec = 0;
+#endif /* UNIV_LOG_ARCHIVE */
+
+/** Set if InnoDB must operate in read-only mode. We don't do any
+recovery and open all tables in RO mode instead of RW mode. We don't
+sync the max trx id to disk either. */
+UNIV_INTERN my_bool srv_read_only_mode;
+/** store to its own file each table created by an user; data
+dictionary tables are in the system tablespace 0 */
+UNIV_INTERN my_bool srv_file_per_table;
+/** The file format to use on new *.ibd files. */
+UNIV_INTERN ulint srv_file_format = 0;
+/** Whether to check file format during startup. A value of
+UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
+set it to the highest format we support. */
+UNIV_INTERN ulint srv_max_file_format_at_startup = UNIV_FORMAT_MAX;
+
+#if UNIV_FORMAT_A
+# error "UNIV_FORMAT_A must be 0!"
+#endif
+
+/** Place locks to records only i.e. do not use next-key locking except
+on duplicate key checking and foreign key checking */
+UNIV_INTERN ibool srv_locks_unsafe_for_binlog = FALSE;
+/** Sort buffer size in index creation */
+UNIV_INTERN ulong srv_sort_buf_size = 1048576;
+/** Maximum modification log file size for online index creation */
+UNIV_INTERN unsigned long long srv_online_max_size;
+
+/* If this flag is TRUE, then we will use the native aio of the
+OS (provided we compiled Innobase with it in), otherwise we will
+use simulated aio we build below with threads.
+Currently we support native aio on windows and linux */
+UNIV_INTERN my_bool srv_use_native_aio = TRUE;
+
+#ifdef __WIN__
+/* Windows native condition variables. We use runtime loading / function
+pointers, because they are not available on Windows Server 2003 and
+Windows XP/2000.
+
+We use condition for events on Windows if possible, even if os_event
+resembles Windows kernel event object well API-wise. The reason is
+performance, kernel objects are heavyweights and WaitForSingleObject() is a
+performance killer causing calling thread to context switch. Besides, Innodb
+is preallocating large number (often millions) of os_events. With kernel event
+objects it takes a big chunk out of non-paged pool, which is better suited
+for tasks like IO than for storing idle event objects. */
+UNIV_INTERN ibool srv_use_native_conditions = FALSE;
+#endif /* __WIN__ */
+
+UNIV_INTERN ulint srv_n_data_files = 0;
+UNIV_INTERN char** srv_data_file_names = NULL;
+/* size in database pages */
+UNIV_INTERN ulint* srv_data_file_sizes = NULL;
+
+UNIV_INTERN my_bool srv_track_changed_pages = FALSE;
+
+UNIV_INTERN ulonglong srv_max_bitmap_file_size = 100 * 1024 * 1024;
+
+UNIV_INTERN ulonglong srv_max_changed_pages = 0;
+
+/** When TRUE, fake change transcations take S rather than X row locks.
+ When FALSE, row locks are not taken at all. */
+UNIV_INTERN my_bool srv_fake_changes_locks = TRUE;
+
+/* if TRUE, then we auto-extend the last data file */
+UNIV_INTERN ibool srv_auto_extend_last_data_file = FALSE;
+/* if != 0, this tells the max size auto-extending may increase the
+last data file size */
+UNIV_INTERN ulint srv_last_file_size_max = 0;
+/* If the last data file is auto-extended, we add this
+many pages to it at a time */
+UNIV_INTERN ulong srv_auto_extend_increment = 8;
+UNIV_INTERN ulint* srv_data_file_is_raw_partition = NULL;
+
+/* If the following is TRUE we do not allow inserts etc. This protects
+the user from forgetting the 'newraw' keyword to my.cnf */
+
+UNIV_INTERN ibool srv_created_new_raw = FALSE;
+
+UNIV_INTERN char* srv_log_group_home_dir = NULL;
+
+UNIV_INTERN ulong srv_n_log_files = SRV_N_LOG_FILES_MAX;
+/* size in database pages */
+UNIV_INTERN ib_uint64_t srv_log_file_size = IB_UINT64_MAX;
+UNIV_INTERN ib_uint64_t srv_log_file_size_requested;
+/* size in database pages */
+UNIV_INTERN ulint srv_log_buffer_size = ULINT_MAX;
+UNIV_INTERN uint srv_flush_log_at_timeout = 1;
+UNIV_INTERN ulong srv_page_size = UNIV_PAGE_SIZE_DEF;
+UNIV_INTERN ulong srv_page_size_shift = UNIV_PAGE_SIZE_SHIFT_DEF;
+UNIV_INTERN char srv_use_global_flush_log_at_trx_commit = TRUE;
+
+/* Try to flush dirty pages so as to avoid IO bursts at
+the checkpoints. */
+UNIV_INTERN char srv_adaptive_flushing = TRUE;
+
+UNIV_INTERN ulong srv_show_locks_held = 10;
+UNIV_INTERN ulong srv_show_verbose_locks = 0;
+
+/** Maximum number of times allowed to conditionally acquire
+mutex before switching to blocking wait on the mutex */
+#define MAX_MUTEX_NOWAIT 20
+
+/** Check whether the number of failed nonblocking mutex
+acquisition attempts exceeds maximum allowed value. If so,
+srv_printf_innodb_monitor() will request mutex acquisition
+with mutex_enter(), which will wait until it gets the mutex. */
+#define MUTEX_NOWAIT(mutex_skipped) ((mutex_skipped) < MAX_MUTEX_NOWAIT)
+
+/** The sort order table of the MySQL latin1_swedish_ci character set
+collation */
+UNIV_INTERN const byte* srv_latin1_ordering;
+
+/* use os/external memory allocator */
+UNIV_INTERN my_bool srv_use_sys_malloc = TRUE;
+/* requested size in kilobytes */
+UNIV_INTERN ulint srv_buf_pool_size = ULINT_MAX;
+/* force virtual page preallocation (prefault) */
+UNIV_INTERN my_bool srv_buf_pool_populate = FALSE;
+/* requested number of buffer pool instances */
+UNIV_INTERN ulint srv_buf_pool_instances = 1;
+/* number of locks to protect buf_pool->page_hash */
+UNIV_INTERN ulong srv_n_page_hash_locks = 16;
+/** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
+UNIV_INTERN ulong srv_LRU_scan_depth = 1024;
+/** whether or not to flush neighbors of a block */
+UNIV_INTERN ulong srv_flush_neighbors = 1;
+/* previously requested size */
+UNIV_INTERN ulint srv_buf_pool_old_size;
+/* current size in kilobytes */
+UNIV_INTERN ulint srv_buf_pool_curr_size = 0;
+/* size in bytes */
+UNIV_INTERN ulint srv_mem_pool_size = ULINT_MAX;
+UNIV_INTERN ulint srv_lock_table_size = ULINT_MAX;
+
+/** Query thread preflush algorithm */
+UNIV_INTERN ulong srv_foreground_preflush
+ = SRV_FOREGROUND_PREFLUSH_EXP_BACKOFF;
+
+/** The maximum time limit for a single LRU tail flush iteration by the page
+cleaner thread */
+UNIV_INTERN ulint srv_cleaner_max_lru_time = 1000;
+
+/** The maximum time limit for a single flush list flush iteration by the page
+cleaner thread */
+UNIV_INTERN ulint srv_cleaner_max_flush_time = 1000;
+
+/** Page cleaner flush list flush batches are further divided into this chunk
+size */
+UNIV_INTERN ulint srv_cleaner_flush_chunk_size = 100;
+
+/** Page cleaner LRU list flush batches are further divided into this chunk
+size */
+UNIV_INTERN ulint srv_cleaner_lru_chunk_size = 100;
+
+/** If free list length is lower than this percentage of srv_LRU_scan_depth,
+page cleaner LRU flushes will issue flush batches to the same instance in a
+row */
+UNIV_INTERN ulint srv_cleaner_free_list_lwm = 10;
+
+/** If TRUE, page cleaner heuristics use evicted instead of flushed page counts
+for its heuristics */
+UNIV_INTERN my_bool srv_cleaner_eviction_factor = FALSE;
+
+/** Page cleaner LSN age factor formula option */
+UNIV_INTERN ulong srv_cleaner_lsn_age_factor
+ = SRV_CLEANER_LSN_AGE_FACTOR_HIGH_CHECKPOINT;
+
+/** Empty free list for a query thread handling algorithm option */
+UNIV_INTERN ulong srv_empty_free_list_algorithm
+ = SRV_EMPTY_FREE_LIST_BACKOFF;
+
+/* This parameter is deprecated. Use srv_n_io_[read|write]_threads
+instead. */
+UNIV_INTERN ulint srv_n_file_io_threads = ULINT_MAX;
+UNIV_INTERN ulint srv_n_read_io_threads = ULINT_MAX;
+UNIV_INTERN ulint srv_n_write_io_threads = ULINT_MAX;
+
+/* Switch to enable random read ahead. */
+UNIV_INTERN my_bool srv_random_read_ahead = FALSE;
+
+/* The log block size */
+UNIV_INTERN ulint srv_log_block_size = 0;
+
+/* User settable value of the number of pages that must be present
+in the buffer cache and accessed sequentially for InnoDB to trigger a
+readahead request. */
+UNIV_INTERN ulong srv_read_ahead_threshold = 56;
+
+#ifdef UNIV_LOG_ARCHIVE
+UNIV_INTERN ibool srv_log_archive_on = FALSE;
+UNIV_INTERN ibool srv_archive_recovery = 0;
+UNIV_INTERN ib_uint64_t srv_archive_recovery_limit_lsn;
+#endif /* UNIV_LOG_ARCHIVE */
+
+/* This parameter is used to throttle the number of insert buffers that are
+merged in a batch. By increasing this parameter on a faster disk you can
+possibly reduce the number of I/O operations performed to complete the
+merge operation. The value of this parameter is used as is by the
+background loop when the system is idle (low load), on a busy system
+the parameter is scaled down by a factor of 4, this is to avoid putting
+a heavier load on the I/O sub system. */
+
+UNIV_INTERN ulong srv_insert_buffer_batch_size = 20;
+
+UNIV_INTERN char* srv_file_flush_method_str = NULL;
+UNIV_INTERN ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC;
+UNIV_INTERN ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
+
+UNIV_INTERN ulint srv_max_n_open_files = 300;
+
+/* Number of IO operations per second the server can do */
+UNIV_INTERN ulong srv_io_capacity = 200;
+UNIV_INTERN ulong srv_max_io_capacity = 400;
+
+/* The InnoDB main thread tries to keep the ratio of modified pages
+in the buffer pool to all database pages in the buffer pool smaller than
+the following number. But it is not guaranteed that the value stays below
+that during a time of heavy update/insert activity. */
+
+UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
+UNIV_INTERN ulong srv_max_dirty_pages_pct_lwm = 50;
+
+/* This is the percentage of log capacity at which adaptive flushing,
+if enabled, will kick in. */
+UNIV_INTERN ulong srv_adaptive_flushing_lwm = 10;
+
+/* Number of iterations over which adaptive flushing is averaged. */
+UNIV_INTERN ulong srv_flushing_avg_loops = 30;
+
+/* The tid of the cleaner thread */
+UNIV_INTERN os_tid_t srv_cleaner_tid;
+
+/* The tids of the purge threads */
+UNIV_INTERN os_tid_t srv_purge_tids[SRV_MAX_N_PURGE_THREADS];
+
+/* The tids of the I/O threads */
+UNIV_INTERN os_tid_t srv_io_tids[SRV_MAX_N_IO_THREADS];
+
+/* The tid of the master thread */
+UNIV_INTERN os_tid_t srv_master_tid;
+
+/* The relative scheduling priority of the cleaner thread */
+UNIV_INTERN ulint srv_sched_priority_cleaner = 19;
+
+/* The relative scheduling priority of the purge threads */
+UNIV_INTERN ulint srv_sched_priority_purge = 19;
+
+/* The relative scheduling priority of the I/O threads */
+UNIV_INTERN ulint srv_sched_priority_io = 19;
+
+/* The relative scheduling priority of the master thread */
+UNIV_INTERN ulint srv_sched_priority_master = 19;
+
+/* The relative priority of the current thread. If 0, low priority; if 1, high
+priority. */
+UNIV_INTERN UNIV_THREAD_LOCAL ulint srv_current_thread_priority = 0;
+
+/* The relative priority of the purge coordinator and worker threads. */
+UNIV_INTERN my_bool srv_purge_thread_priority = FALSE;
+
+/* The relative priority of the I/O threads. */
+UNIV_INTERN my_bool srv_io_thread_priority = FALSE;
+
+/* The relative priority of the cleaner thread. */
+UNIV_INTERN my_bool srv_cleaner_thread_priority = FALSE;
+
+/* The relative priority of the master thread. */
+UNIV_INTERN my_bool srv_master_thread_priority = FALSE;
+
+/* The number of purge threads to use.*/
+UNIV_INTERN ulong srv_n_purge_threads = 1;
+
+/* the number of pages to purge in one batch */
+UNIV_INTERN ulong srv_purge_batch_size = 20;
+
+/* Internal setting for "innodb_stats_method". Decides how InnoDB treats
+NULL value when collecting statistics. By default, it is set to
+SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
+UNIV_INTERN ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
+
+UNIV_INTERN srv_stats_t srv_stats;
+
+/* structure to pass status variables to MySQL */
+UNIV_INTERN export_var_t export_vars;
+
+/** Normally 0. When nonzero, skip some phases of crash recovery,
+starting from SRV_FORCE_IGNORE_CORRUPT, so that data can be recovered
+by SELECT or mysqldump. When this is nonzero, we do not allow any user
+modifications to the data. */
+UNIV_INTERN ulong srv_force_recovery;
+#ifndef DBUG_OFF
+/** Inject a crash at different steps of the recovery process.
+This is for testing and debugging only. */
+UNIV_INTERN ulong srv_force_recovery_crash;
+#endif /* !DBUG_OFF */
+
+/** Print all user-level transactions deadlocks to mysqld stderr */
+
+UNIV_INTERN my_bool srv_print_all_deadlocks = FALSE;
+
+/* Produce a stacktrace on long semaphore wait */
+UNIV_INTERN my_bool srv_use_stacktrace = FALSE;
+
+/** Enable INFORMATION_SCHEMA.innodb_cmp_per_index */
+UNIV_INTERN my_bool srv_cmp_per_index_enabled = FALSE;
+
+/* If the following is set to 1 then we do not run purge and insert buffer
+merge to completion before shutdown. If it is set to 2, do not even flush the
+buffer pool to data files at the shutdown: we effectively 'crash'
+InnoDB (but lose no committed transactions). */
+UNIV_INTERN ulint srv_fast_shutdown = 0;
+
+/* Generate a innodb_status.<pid> file */
+UNIV_INTERN ibool srv_innodb_status = FALSE;
+
+/* When estimating number of different key values in an index, sample
+this many index pages, there are 2 ways to calculate statistics:
+* persistent stats that are calculated by ANALYZE TABLE and saved
+ in the innodb database.
+* quick transient stats, that are used if persistent stats for the given
+ table/index are not found in the innodb database */
+UNIV_INTERN unsigned long long srv_stats_transient_sample_pages = 8;
+UNIV_INTERN my_bool srv_stats_persistent = TRUE;
+UNIV_INTERN unsigned long long srv_stats_persistent_sample_pages = 20;
+UNIV_INTERN my_bool srv_stats_auto_recalc = TRUE;
+
+UNIV_INTERN ibool srv_use_doublewrite_buf = TRUE;
+UNIV_INTERN ibool srv_use_atomic_writes = FALSE;
+#ifdef HAVE_POSIX_FALLOCATE
+UNIV_INTERN ibool srv_use_posix_fallocate = FALSE;
+#endif
+
+/** doublewrite buffer is 1MB is size i.e.: it can hold 128 16K pages.
+The following parameter is the size of the buffer that is used for
+batch flushing i.e.: LRU flushing and flush_list flushing. The rest
+of the pages are used for single page flushing. */
+UNIV_INTERN ulong srv_doublewrite_batch_size = 120;
+
+UNIV_INTERN ulong srv_replication_delay = 0;
+
+UNIV_INTERN ulong srv_pass_corrupt_table = 0; /* 0:disable 1:enable */
+
+UNIV_INTERN ulong srv_log_checksum_algorithm =
+ SRV_CHECKSUM_ALGORITHM_INNODB;
+
+/*-------------------------------------------*/
+UNIV_INTERN ulong srv_n_spin_wait_rounds = 30;
+UNIV_INTERN ulong srv_spin_wait_delay = 6;
+UNIV_INTERN ibool srv_priority_boost = TRUE;
+
+#ifdef UNIV_DEBUG
+UNIV_INTERN ibool srv_print_thread_releases = FALSE;
+UNIV_INTERN ibool srv_print_lock_waits = FALSE;
+UNIV_INTERN ibool srv_print_buf_io = FALSE;
+UNIV_INTERN ibool srv_print_log_io = FALSE;
+UNIV_INTERN ibool srv_print_latch_waits = FALSE;
+#endif /* UNIV_DEBUG */
+
+static ulint srv_n_rows_inserted_old = 0;
+static ulint srv_n_rows_updated_old = 0;
+static ulint srv_n_rows_deleted_old = 0;
+static ulint srv_n_rows_read_old = 0;
+
+UNIV_INTERN ulint srv_truncated_status_writes = 0;
+UNIV_INTERN ulint srv_available_undo_logs = 0;
+
+/* Ensure status variables are on separate cache lines */
+
+#define CACHE_LINE_SIZE 64
+#define CACHE_ALIGNED __attribute__ ((aligned (CACHE_LINE_SIZE)))
+
+UNIV_INTERN byte
+counters_pad_start[CACHE_LINE_SIZE] __attribute__((unused)) = {0};
+
+UNIV_INTERN ulint srv_read_views_memory CACHE_ALIGNED = 0;
+UNIV_INTERN ulint srv_descriptors_memory CACHE_ALIGNED = 0;
+
+UNIV_INTERN byte
+counters_pad_end[CACHE_LINE_SIZE] __attribute__((unused)) = {0};
+
+/* Set the following to 0 if you want InnoDB to write messages on
+stderr on startup/shutdown. */
+UNIV_INTERN ibool srv_print_verbose_log = TRUE;
+UNIV_INTERN ibool srv_print_innodb_monitor = FALSE;
+UNIV_INTERN ibool srv_print_innodb_lock_monitor = FALSE;
+UNIV_INTERN ibool srv_print_innodb_tablespace_monitor = FALSE;
+UNIV_INTERN ibool srv_print_innodb_table_monitor = FALSE;
+
+/* Array of English strings describing the current state of an
+i/o handler thread */
+
+UNIV_INTERN const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS];
+UNIV_INTERN const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS];
+
+UNIV_INTERN time_t srv_last_monitor_time;
+
+UNIV_INTERN ib_mutex_t srv_innodb_monitor_mutex;
+
+/* Mutex for locking srv_monitor_file. Not created if srv_read_only_mode */
+UNIV_INTERN ib_mutex_t srv_monitor_file_mutex;
+
+#ifdef UNIV_PFS_MUTEX
+# ifndef HAVE_ATOMIC_BUILTINS
+/* Key to register server_mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t server_mutex_key;
+# endif /* !HAVE_ATOMIC_BUILTINS */
+/** Key to register srv_innodb_monitor_mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t srv_innodb_monitor_mutex_key;
+/** Key to register srv_monitor_file_mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t srv_monitor_file_mutex_key;
+/** Key to register srv_dict_tmpfile_mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
+/** Key to register the mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
+/** Key to register srv_sys_t::mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t srv_sys_mutex_key;
+/** Key to register srv_sys_t::tasks_mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t srv_sys_tasks_mutex_key;
+#endif /* UNIV_PFS_MUTEX */
+
+/** Temporary file for innodb monitor output */
+UNIV_INTERN FILE* srv_monitor_file;
+/** Mutex for locking srv_dict_tmpfile. Not created if srv_read_only_mode.
+This mutex has a very high rank; threads reserving it should not
+be holding any InnoDB latches. */
+UNIV_INTERN ib_mutex_t srv_dict_tmpfile_mutex;
+/** Temporary file for output from the data dictionary */
+UNIV_INTERN FILE* srv_dict_tmpfile;
+/** Mutex for locking srv_misc_tmpfile. Not created if srv_read_only_mode.
+This mutex has a very low rank; threads reserving it should not
+acquire any further latches or sleep before releasing this one. */
+UNIV_INTERN ib_mutex_t srv_misc_tmpfile_mutex;
+/** Temporary file for miscellanous diagnostic output */
+UNIV_INTERN FILE* srv_misc_tmpfile;
+
+UNIV_INTERN ulint srv_main_thread_process_no = 0;
+UNIV_INTERN ulint srv_main_thread_id = 0;
+
+/* The following counts are used by the srv_master_thread. */
+
+/** Iterations of the loop bounded by 'srv_active' label. */
+static ulint srv_main_active_loops = 0;
+/** Iterations of the loop bounded by the 'srv_idle' label. */
+static ulint srv_main_idle_loops = 0;
+/** Iterations of the loop bounded by the 'srv_shutdown' label. */
+static ulint srv_main_shutdown_loops = 0;
+/** Log writes involving flush. */
+static ulint srv_log_writes_and_flush = 0;
+
+/* This is only ever touched by the master thread. It records the
+time when the last flush of log file has happened. The master
+thread ensures that we flush the log files at least once per
+second. */
+static time_t srv_last_log_flush_time;
+
+/* Interval in seconds at which various tasks are performed by the
+master thread when server is active. In order to balance the workload,
+we should try to keep intervals such that they are not multiple of
+each other. For example, if we have intervals for various tasks
+defined as 5, 10, 15, 60 then all tasks will be performed when
+current_time % 60 == 0 and no tasks will be performed when
+current_time % 5 != 0. */
+
+# define SRV_MASTER_CHECKPOINT_INTERVAL (7)
+# define SRV_MASTER_PURGE_INTERVAL (10)
+#ifdef MEM_PERIODIC_CHECK
+# define SRV_MASTER_MEM_VALIDATE_INTERVAL (13)
+#endif /* MEM_PERIODIC_CHECK */
+# define SRV_MASTER_DICT_LRU_INTERVAL (47)
+
+/** Acquire the system_mutex. */
+#define srv_sys_mutex_enter() do { \
+ mutex_enter(&srv_sys->mutex); \
+} while (0)
+
+/** Test if the system mutex is owned. */
+#define srv_sys_mutex_own() (mutex_own(&srv_sys->mutex) \
+ && !srv_read_only_mode)
+
+/** Release the system mutex. */
+#define srv_sys_mutex_exit() do { \
+ mutex_exit(&srv_sys->mutex); \
+} while (0)
+
+#define fetch_lock_wait_timeout(trx) \
+ ((trx)->lock.allowed_to_wait \
+ ? thd_lock_wait_timeout((trx)->mysql_thd) \
+ : 0)
+
+/*
+ IMPLEMENTATION OF THE SERVER MAIN PROGRAM
+ =========================================
+
+There is the following analogue between this database
+server and an operating system kernel:
+
+DB concept equivalent OS concept
+---------- ---------------------
+transaction -- process;
+
+query thread -- thread;
+
+lock -- semaphore;
+
+kernel -- kernel;
+
+query thread execution:
+(a) without lock mutex
+reserved -- process executing in user mode;
+(b) with lock mutex reserved
+ -- process executing in kernel mode;
+
+The server has several backgroind threads all running at the same
+priority as user threads. It periodically checks if here is anything
+happening in the server which requires intervention of the master
+thread. Such situations may be, for example, when flushing of dirty
+blocks is needed in the buffer pool or old version of database rows
+have to be cleaned away (purged). The user can configure a separate
+dedicated purge thread(s) too, in which case the master thread does not
+do any purging.
+
+The threads which we call user threads serve the queries of the MySQL
+server. They run at normal priority.
+
+When there is no activity in the system, also the master thread
+suspends itself to wait for an event making the server totally silent.
+
+There is still one complication in our server design. If a
+background utility thread obtains a resource (e.g., mutex) needed by a user
+thread, and there is also some other user activity in the system,
+the user thread may have to wait indefinitely long for the
+resource, as the OS does not schedule a background thread if
+there is some other runnable user thread. This problem is called
+priority inversion in real-time programming.
+
+One solution to the priority inversion problem would be to keep record
+of which thread owns which resource and in the above case boost the
+priority of the background thread so that it will be scheduled and it
+can release the resource. This solution is called priority inheritance
+in real-time programming. A drawback of this solution is that the overhead
+of acquiring a mutex increases slightly, maybe 0.2 microseconds on a 100
+MHz Pentium, because the thread has to call os_thread_get_curr_id. This may
+be compared to 0.5 microsecond overhead for a mutex lock-unlock pair. Note
+that the thread cannot store the information in the resource , say mutex,
+itself, because competing threads could wipe out the information if it is
+stored before acquiring the mutex, and if it stored afterwards, the
+information is outdated for the time of one machine instruction, at least.
+(To be precise, the information could be stored to lock_word in mutex if
+the machine supports atomic swap.)
+
+The above solution with priority inheritance may become actual in the
+future, currently we do not implement any priority twiddling solution.
+Our general aim is to reduce the contention of all mutexes by making
+them more fine grained.
+
+The thread table contains information of the current status of each
+thread existing in the system, and also the event semaphores used in
+suspending the master thread and utility threads when they have nothing
+to do. The thread table can be seen as an analogue to the process table
+in a traditional Unix implementation. */
+
+/** The server system struct */
+struct srv_sys_t{
+ ib_mutex_t tasks_mutex; /*!< variable protecting the
+ tasks queue */
+ UT_LIST_BASE_NODE_T(que_thr_t)
+ tasks; /*!< task queue */
+
+ ib_mutex_t mutex; /*!< variable protecting the
+ fields below. */
+ ulint n_sys_threads; /*!< size of the sys_threads
+ array */
+
+ srv_slot_t* sys_threads; /*!< server thread table */
+
+ ulint n_threads_active[SRV_MASTER + 1];
+ /*!< number of threads active
+ in a thread class */
+
+ srv_stats_t::ulint_ctr_1_t
+ activity_count; /*!< For tracking server
+ activity */
+};
+
+#ifndef HAVE_ATOMIC_BUILTINS
+/** Mutex protecting some server global variables. */
+UNIV_INTERN ib_mutex_t server_mutex;
+#endif /* !HAVE_ATOMIC_BUILTINS */
+
+static srv_sys_t* srv_sys = NULL;
+
+/** Event to signal the monitor thread. */
+UNIV_INTERN os_event_t srv_monitor_event;
+
+/** Event to signal the error thread */
+UNIV_INTERN os_event_t srv_error_event;
+
+/** Event to signal the buffer pool dump/load thread */
+UNIV_INTERN os_event_t srv_buf_dump_event;
+
+/** The buffer pool dump/load file name */
+UNIV_INTERN char* srv_buf_dump_filename;
+
+/** Boolean config knobs that tell InnoDB to dump the buffer pool at shutdown
+and/or load it during startup. */
+UNIV_INTERN char srv_buffer_pool_dump_at_shutdown = FALSE;
+UNIV_INTERN char srv_buffer_pool_load_at_startup = FALSE;
+
+/** Slot index in the srv_sys->sys_threads array for the purge thread. */
+static const ulint SRV_PURGE_SLOT = 1;
+
+/** Slot index in the srv_sys->sys_threads array for the master thread. */
+static const ulint SRV_MASTER_SLOT = 0;
+
+UNIV_INTERN os_event_t srv_checkpoint_completed_event;
+
+UNIV_INTERN os_event_t srv_redo_log_thread_finished_event;
+
+/*********************************************************************//**
+Prints counters for work done by srv_master_thread. */
+static
+void
+srv_print_master_thread_info(
+/*=========================*/
+ FILE *file) /* in: output stream */
+{
+ fprintf(file, "srv_master_thread loops: %lu srv_active, "
+ "%lu srv_shutdown, %lu srv_idle\n",
+ srv_main_active_loops,
+ srv_main_shutdown_loops,
+ srv_main_idle_loops);
+ fprintf(file, "srv_master_thread log flush and writes: %lu\n",
+ srv_log_writes_and_flush);
+}
+
+/*********************************************************************//**
+Sets the info describing an i/o thread current state. */
+UNIV_INTERN
+void
+srv_set_io_thread_op_info(
+/*======================*/
+ ulint i, /*!< in: the 'segment' of the i/o thread */
+ const char* str) /*!< in: constant char string describing the
+ state */
+{
+ ut_a(i < SRV_MAX_N_IO_THREADS);
+
+ srv_io_thread_op_info[i] = str;
+}
+
+/*********************************************************************//**
+Resets the info describing an i/o thread current state. */
+UNIV_INTERN
+void
+srv_reset_io_thread_op_info()
+/*=========================*/
+{
+ for (ulint i = 0; i < UT_ARR_SIZE(srv_io_thread_op_info); ++i) {
+ srv_io_thread_op_info[i] = "not started yet";
+ }
+}
+
+#ifdef UNIV_DEBUG
+/*********************************************************************//**
+Validates the type of a thread table slot.
+@return TRUE if ok */
+static
+ibool
+srv_thread_type_validate(
+/*=====================*/
+ srv_thread_type type) /*!< in: thread type */
+{
+ switch (type) {
+ case SRV_NONE:
+ break;
+ case SRV_WORKER:
+ case SRV_PURGE:
+ case SRV_MASTER:
+ return(TRUE);
+ }
+ ut_error;
+ return(FALSE);
+}
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Gets the type of a thread table slot.
+@return thread type */
+static
+srv_thread_type
+srv_slot_get_type(
+/*==============*/
+ const srv_slot_t* slot) /*!< in: thread slot */
+{
+ srv_thread_type type = slot->type;
+ ut_ad(srv_thread_type_validate(type));
+ return(type);
+}
+
+/*********************************************************************//**
+Reserves a slot in the thread table for the current thread.
+@return reserved slot */
+static
+srv_slot_t*
+srv_reserve_slot(
+/*=============*/
+ srv_thread_type type) /*!< in: type of the thread */
+{
+ srv_slot_t* slot = 0;
+
+ srv_sys_mutex_enter();
+
+ ut_ad(srv_thread_type_validate(type));
+
+ switch (type) {
+ case SRV_MASTER:
+ slot = &srv_sys->sys_threads[SRV_MASTER_SLOT];
+ break;
+
+ case SRV_PURGE:
+ slot = &srv_sys->sys_threads[SRV_PURGE_SLOT];
+ break;
+
+ case SRV_WORKER:
+ /* Find an empty slot, skip the master and purge slots. */
+ for (slot = &srv_sys->sys_threads[2];
+ slot->in_use;
+ ++slot) {
+
+ ut_a(slot < &srv_sys->sys_threads[
+ srv_sys->n_sys_threads]);
+ }
+ break;
+
+ case SRV_NONE:
+ ut_error;
+ }
+
+ ut_a(!slot->in_use);
+
+ slot->in_use = TRUE;
+ slot->suspended = FALSE;
+ slot->type = type;
+
+ ut_ad(srv_slot_get_type(slot) == type);
+
+ ++srv_sys->n_threads_active[type];
+
+ srv_sys_mutex_exit();
+
+ return(slot);
+}
+
+/*********************************************************************//**
+Suspends the calling thread to wait for the event in its thread slot.
+@return the current signal count of the event. */
+static
+ib_int64_t
+srv_suspend_thread_low(
+/*===================*/
+ srv_slot_t* slot) /*!< in/out: thread slot */
+{
+
+ ut_ad(!srv_read_only_mode);
+ ut_ad(srv_sys_mutex_own());
+
+ ut_ad(slot->in_use);
+
+ srv_thread_type type = srv_slot_get_type(slot);
+
+ switch (type) {
+ case SRV_NONE:
+ ut_error;
+
+ case SRV_MASTER:
+ /* We have only one master thread and it
+ should be the first entry always. */
+ ut_a(srv_sys->n_threads_active[type] == 1);
+ break;
+
+ case SRV_PURGE:
+ /* We have only one purge coordinator thread
+ and it should be the second entry always. */
+ ut_a(srv_sys->n_threads_active[type] == 1);
+ break;
+
+ case SRV_WORKER:
+ ut_a(srv_n_purge_threads > 1);
+ ut_a(srv_sys->n_threads_active[type] > 0);
+ break;
+ }
+
+ ut_a(!slot->suspended);
+ slot->suspended = TRUE;
+
+ ut_a(srv_sys->n_threads_active[type] > 0);
+
+ srv_sys->n_threads_active[type]--;
+
+ return(os_event_reset(slot->event));
+}
+
+/*********************************************************************//**
+Suspends the calling thread to wait for the event in its thread slot.
+@return the current signal count of the event. */
+static
+ib_int64_t
+srv_suspend_thread(
+/*===============*/
+ srv_slot_t* slot) /*!< in/out: thread slot */
+{
+ srv_sys_mutex_enter();
+
+ ib_int64_t sig_count = srv_suspend_thread_low(slot);
+
+ srv_sys_mutex_exit();
+
+ return(sig_count);
+}
+
+/*********************************************************************//**
+Releases threads of the type given from suspension in the thread table.
+NOTE! The server mutex has to be reserved by the caller!
+@return number of threads released: this may be less than n if not
+ enough threads were suspended at the moment. */
+UNIV_INTERN
+ulint
+srv_release_threads(
+/*================*/
+ srv_thread_type type, /*!< in: thread type */
+ ulint n) /*!< in: number of threads to release */
+{
+ ulint i;
+ ulint count = 0;
+
+ ut_ad(srv_thread_type_validate(type));
+ ut_ad(n > 0);
+
+ srv_sys_mutex_enter();
+
+ for (i = 0; i < srv_sys->n_sys_threads; i++) {
+ srv_slot_t* slot;
+
+ slot = &srv_sys->sys_threads[i];
+
+ if (slot->in_use
+ && srv_slot_get_type(slot) == type
+ && slot->suspended) {
+
+ switch (type) {
+ case SRV_NONE:
+ ut_error;
+
+ case SRV_MASTER:
+ /* We have only one master thread and it
+ should be the first entry always. */
+ ut_a(n == 1);
+ ut_a(i == SRV_MASTER_SLOT);
+ ut_a(srv_sys->n_threads_active[type] == 0);
+ break;
+
+ case SRV_PURGE:
+ /* We have only one purge coordinator thread
+ and it should be the second entry always. */
+ ut_a(n == 1);
+ ut_a(i == SRV_PURGE_SLOT);
+ ut_a(srv_n_purge_threads > 0);
+ ut_a(srv_sys->n_threads_active[type] == 0);
+ break;
+
+ case SRV_WORKER:
+ ut_a(srv_n_purge_threads > 1);
+ ut_a(srv_sys->n_threads_active[type]
+ < srv_n_purge_threads - 1);
+ break;
+ }
+
+ slot->suspended = FALSE;
+
+ ++srv_sys->n_threads_active[type];
+
+ os_event_set(slot->event);
+
+ if (++count == n) {
+ break;
+ }
+ }
+ }
+
+ srv_sys_mutex_exit();
+
+ return(count);
+}
+
+/*********************************************************************//**
+Release a thread's slot. */
+static
+void
+srv_free_slot(
+/*==========*/
+ srv_slot_t* slot) /*!< in/out: thread slot */
+{
+ srv_sys_mutex_enter();
+
+ if (!slot->suspended) {
+ /* Mark the thread as inactive. */
+ srv_suspend_thread_low(slot);
+ }
+
+ /* Free the slot for reuse. */
+ ut_ad(slot->in_use);
+ slot->in_use = FALSE;
+
+ srv_sys_mutex_exit();
+}
+
+/*********************************************************************//**
+Initializes the server. */
+UNIV_INTERN
+void
+srv_init(void)
+/*==========*/
+{
+ ulint n_sys_threads = 0;
+ ulint srv_sys_sz = sizeof(*srv_sys);
+
+#ifndef HAVE_ATOMIC_BUILTINS
+ mutex_create(server_mutex_key, &server_mutex, SYNC_ANY_LATCH);
+#endif /* !HAVE_ATOMIC_BUILTINS */
+
+ mutex_create(srv_innodb_monitor_mutex_key,
+ &srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK);
+
+ if (!srv_read_only_mode) {
+
+ /* Number of purge threads + master thread */
+ n_sys_threads = srv_n_purge_threads + 1;
+
+ srv_sys_sz += n_sys_threads * sizeof(*srv_sys->sys_threads);
+ }
+
+ srv_sys = static_cast<srv_sys_t*>(mem_zalloc(srv_sys_sz));
+
+ srv_sys->n_sys_threads = n_sys_threads;
+
+ if (!srv_read_only_mode) {
+
+ mutex_create(srv_sys_mutex_key, &srv_sys->mutex, SYNC_THREADS);
+
+ mutex_create(srv_sys_tasks_mutex_key,
+ &srv_sys->tasks_mutex, SYNC_ANY_LATCH);
+
+ srv_sys->sys_threads = (srv_slot_t*) &srv_sys[1];
+
+ for (ulint i = 0; i < srv_sys->n_sys_threads; ++i) {
+ srv_slot_t* slot = &srv_sys->sys_threads[i];
+
+ slot->event = os_event_create();
+
+ ut_a(slot->event);
+ }
+
+ srv_error_event = os_event_create();
+
+ srv_monitor_event = os_event_create();
+
+ srv_buf_dump_event = os_event_create();
+
+ srv_checkpoint_completed_event = os_event_create();
+
+ srv_redo_log_thread_finished_event = os_event_create();
+
+ UT_LIST_INIT(srv_sys->tasks);
+ }
+
+ /* page_zip_stat_per_index_mutex is acquired from:
+ 1. page_zip_compress() (after SYNC_FSP)
+ 2. page_zip_decompress()
+ 3. i_s_cmp_per_index_fill_low() (where SYNC_DICT is acquired)
+ 4. innodb_cmp_per_index_update(), no other latches
+ since we do not acquire any other latches while holding this mutex,
+ it can have very low level. We pick SYNC_ANY_LATCH for it. */
+
+ mutex_create(
+ page_zip_stat_per_index_mutex_key,
+ &page_zip_stat_per_index_mutex, SYNC_ANY_LATCH);
+
+ /* Create dummy indexes for infimum and supremum records */
+
+ dict_ind_init();
+
+ srv_conc_init();
+
+ /* Initialize some INFORMATION SCHEMA internal structures */
+ trx_i_s_cache_init(trx_i_s_cache);
+
+ ut_crc32_init();
+}
+
+/*********************************************************************//**
+Frees the data structures created in srv_init(). */
+UNIV_INTERN
+void
+srv_free(void)
+/*==========*/
+{
+ srv_conc_free();
+
+ /* The mutexes srv_sys->mutex and srv_sys->tasks_mutex should have
+ been freed by sync_close() already. */
+ mem_free(srv_sys);
+ srv_sys = NULL;
+
+ trx_i_s_cache_free(trx_i_s_cache);
+
+ if (!srv_read_only_mode) {
+ os_event_free(srv_buf_dump_event);
+ srv_buf_dump_event = NULL;
+ }
+}
+
+/*********************************************************************//**
+Initializes the synchronization primitives, memory system, and the thread
+local storage. */
+UNIV_INTERN
+void
+srv_general_init(void)
+/*==================*/
+{
+ ut_mem_init();
+ /* Reset the system variables in the recovery module. */
+ recv_sys_var_init();
+ os_sync_init();
+ sync_init();
+ mem_init(srv_mem_pool_size);
+ que_init();
+ row_mysql_init();
+}
+
+/*********************************************************************//**
+Normalizes init parameter values to use units we use inside InnoDB. */
+static
+void
+srv_normalize_init_values(void)
+/*===========================*/
+{
+ ulint n;
+ ulint i;
+
+ n = srv_n_data_files;
+
+ for (i = 0; i < n; i++) {
+ srv_data_file_sizes[i] = srv_data_file_sizes[i]
+ * ((1024 * 1024) / UNIV_PAGE_SIZE);
+ }
+
+ srv_last_file_size_max = srv_last_file_size_max
+ * ((1024 * 1024) / UNIV_PAGE_SIZE);
+
+ srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;
+
+ srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
+
+ srv_lock_table_size = 5 * (srv_buf_pool_size / UNIV_PAGE_SIZE);
+}
+
+/*********************************************************************//**
+Boots the InnoDB server. */
+UNIV_INTERN
+void
+srv_boot(void)
+/*==========*/
+{
+ /* Transform the init parameter values given by MySQL to
+ use units we use inside InnoDB: */
+
+ srv_normalize_init_values();
+
+ /* Initialize synchronization primitives, memory management, and thread
+ local storage */
+
+ srv_general_init();
+
+ /* Initialize this module */
+
+ srv_init();
+ srv_mon_create();
+}
+
+/******************************************************************//**
+Refreshes the values used to calculate per-second averages. */
+static
+void
+srv_refresh_innodb_monitor_stats(void)
+/*==================================*/
+{
+ mutex_enter(&srv_innodb_monitor_mutex);
+
+ srv_last_monitor_time = time(NULL);
+
+ os_aio_refresh_stats();
+
+ btr_cur_n_sea_old = btr_cur_n_sea;
+ btr_cur_n_non_sea_old = btr_cur_n_non_sea;
+
+ log_refresh_stats();
+
+ buf_refresh_io_stats_all();
+
+ srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
+ srv_n_rows_updated_old = srv_stats.n_rows_updated;
+ srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
+ srv_n_rows_read_old = srv_stats.n_rows_read;
+
+ mutex_exit(&srv_innodb_monitor_mutex);
+}
+
+/******************************************************************//**
+Outputs to a file the output of the InnoDB Monitor.
+@return FALSE if not all information printed
+due to failure to obtain necessary mutex */
+UNIV_INTERN
+ibool
+srv_printf_innodb_monitor(
+/*======================*/
+ FILE* file, /*!< in: output stream */
+ ibool nowait, /*!< in: whether to wait for the
+ lock_sys_t:: mutex */
+ ulint* trx_start_pos, /*!< out: file position of the start of
+ the list of active transactions */
+ ulint* trx_end) /*!< out: file position of the end of
+ the list of active transactions */
+{
+ double time_elapsed;
+ time_t current_time;
+ ulint n_reserved;
+ ibool ret;
+
+ ulong btr_search_sys_constant;
+ ulong btr_search_sys_variable;
+ ulint lock_sys_subtotal;
+ ulint recv_sys_subtotal;
+
+ ulint i;
+ trx_t* trx;
+
+ mutex_enter(&srv_innodb_monitor_mutex);
+
+ current_time = time(NULL);
+
+ /* We add 0.001 seconds to time_elapsed to prevent division
+ by zero if two users happen to call SHOW ENGINE INNODB STATUS at the
+ same time */
+
+ time_elapsed = difftime(current_time, srv_last_monitor_time)
+ + 0.001;
+
+ srv_last_monitor_time = time(NULL);
+
+ fputs("\n=====================================\n", file);
+
+ ut_print_timestamp(file);
+ fprintf(file,
+ " INNODB MONITOR OUTPUT\n"
+ "=====================================\n"
+ "Per second averages calculated from the last %lu seconds\n",
+ (ulong) time_elapsed);
+
+ fputs("-----------------\n"
+ "BACKGROUND THREAD\n"
+ "-----------------\n", file);
+ srv_print_master_thread_info(file);
+
+ fputs("----------\n"
+ "SEMAPHORES\n"
+ "----------\n", file);
+ sync_print(file);
+
+ /* Conceptually, srv_innodb_monitor_mutex has a very high latching
+ order level in sync0sync.h, while dict_foreign_err_mutex has a very
+ low level 135. Therefore we can reserve the latter mutex here without
+ a danger of a deadlock of threads. */
+
+ mutex_enter(&dict_foreign_err_mutex);
+
+ if (!srv_read_only_mode && ftell(dict_foreign_err_file) != 0L) {
+ fputs("------------------------\n"
+ "LATEST FOREIGN KEY ERROR\n"
+ "------------------------\n", file);
+ ut_copy_file(file, dict_foreign_err_file);
+ }
+
+ mutex_exit(&dict_foreign_err_mutex);
+
+ /* Only if lock_print_info_summary proceeds correctly,
+ before we call the lock_print_info_all_transactions
+ to print all the lock information. IMPORTANT NOTE: This
+ function acquires the lock mutex on success. */
+ ret = lock_print_info_summary(file, nowait);
+
+ if (ret) {
+ if (trx_start_pos) {
+ long t = ftell(file);
+ if (t < 0) {
+ *trx_start_pos = ULINT_UNDEFINED;
+ } else {
+ *trx_start_pos = (ulint) t;
+ }
+ }
+
+ /* NOTE: If we get here then we have the lock mutex. This
+ function will release the lock mutex that we acquired when
+ we called the lock_print_info_summary() function earlier. */
+
+ lock_print_info_all_transactions(file);
+
+ if (trx_end) {
+ long t = ftell(file);
+ if (t < 0) {
+ *trx_end = ULINT_UNDEFINED;
+ } else {
+ *trx_end = (ulint) t;
+ }
+ }
+ }
+
+ fputs("--------\n"
+ "FILE I/O\n"
+ "--------\n", file);
+ os_aio_print(file);
+
+ fputs("-------------------------------------\n"
+ "INSERT BUFFER AND ADAPTIVE HASH INDEX\n"
+ "-------------------------------------\n", file);
+ ibuf_print(file);
+
+
+ fprintf(file,
+ "%.2f hash searches/s, %.2f non-hash searches/s\n",
+ (btr_cur_n_sea - btr_cur_n_sea_old)
+ / time_elapsed,
+ (btr_cur_n_non_sea - btr_cur_n_non_sea_old)
+ / time_elapsed);
+ btr_cur_n_sea_old = btr_cur_n_sea;
+ btr_cur_n_non_sea_old = btr_cur_n_non_sea;
+
+ fputs("---\n"
+ "LOG\n"
+ "---\n", file);
+ log_print(file);
+
+ fputs("----------------------\n"
+ "BUFFER POOL AND MEMORY\n"
+ "----------------------\n", file);
+ fprintf(file,
+ "Total memory allocated " ULINTPF
+ "; in additional pool allocated " ULINTPF "\n",
+ ut_total_allocated_memory,
+ mem_pool_get_reserved(mem_comm_pool));
+
+ fprintf(file,
+ "Total memory allocated by read views " ULINTPF "\n",
+ os_atomic_increment_ulint(&srv_read_views_memory, 0));
+
+ /* Calculate AHI constant and variable memory allocations */
+
+ btr_search_sys_constant = 0;
+ btr_search_sys_variable = 0;
+
+ ut_ad(btr_search_sys->hash_tables);
+
+ for (i = 0; i < btr_search_index_num; i++) {
+ hash_table_t* ht = btr_search_sys->hash_tables[i];
+
+ ut_ad(ht);
+ ut_ad(ht->heap);
+
+ /* Multiple mutexes/heaps are currently never used for adaptive
+ hash index tables. */
+ ut_ad(!ht->n_sync_obj);
+ ut_ad(!ht->heaps);
+
+ btr_search_sys_variable += mem_heap_get_size(ht->heap);
+ btr_search_sys_constant += ht->n_cells * sizeof(hash_cell_t);
+ }
+
+ lock_sys_subtotal = 0;
+ if (trx_sys) {
+ mutex_enter(&trx_sys->mutex);
+ trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
+ while (trx) {
+ lock_sys_subtotal
+ += ((trx->lock.lock_heap)
+ ? mem_heap_get_size(trx->lock.lock_heap)
+ : 0);
+ trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
+ }
+ mutex_exit(&trx_sys->mutex);
+ }
+
+ recv_sys_subtotal = ((recv_sys && recv_sys->addr_hash)
+ ? mem_heap_get_size(recv_sys->heap) : 0);
+
+ fprintf(file,
+ "Internal hash tables (constant factor + variable factor)\n"
+ " Adaptive hash index %lu \t(%lu + " ULINTPF ")\n"
+ " Page hash %lu (buffer pool 0 only)\n"
+ " Dictionary cache %lu \t(%lu + " ULINTPF ")\n"
+ " File system %lu \t(%lu + " ULINTPF ")\n"
+ " Lock system %lu \t(%lu + " ULINTPF ")\n"
+ " Recovery system %lu \t(%lu + " ULINTPF ")\n",
+
+ btr_search_sys_constant + btr_search_sys_variable,
+ btr_search_sys_constant,
+ btr_search_sys_variable,
+
+ (ulong) (buf_pool_from_array(0)->page_hash->n_cells * sizeof(hash_cell_t)),
+
+ (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells
+ + dict_sys->table_id_hash->n_cells
+ ) * sizeof(hash_cell_t)
+ + dict_sys->size) : 0),
+ (ulong) (dict_sys ? ((dict_sys->table_hash->n_cells
+ + dict_sys->table_id_hash->n_cells
+ ) * sizeof(hash_cell_t)) : 0),
+ dict_sys ? (dict_sys->size) : 0,
+
+ (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)
+ + fil_system_hash_nodes()),
+ (ulong) (fil_system_hash_cells() * sizeof(hash_cell_t)),
+ fil_system_hash_nodes(),
+
+ (ulong) ((lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0)
+ + lock_sys_subtotal),
+ (ulong) (lock_sys ? (lock_sys->rec_hash->n_cells * sizeof(hash_cell_t)) : 0),
+ lock_sys_subtotal,
+
+ (ulong) (((recv_sys && recv_sys->addr_hash)
+ ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0)
+ + recv_sys_subtotal),
+ (ulong) ((recv_sys && recv_sys->addr_hash)
+ ? (recv_sys->addr_hash->n_cells * sizeof(hash_cell_t)) : 0),
+ recv_sys_subtotal);
+
+ fprintf(file, "Dictionary memory allocated " ULINTPF "\n",
+ dict_sys->size);
+
+ buf_print_io(file);
+
+ fputs("--------------\n"
+ "ROW OPERATIONS\n"
+ "--------------\n", file);
+ fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n",
+ (long) srv_conc_get_active_threads(),
+ srv_conc_get_waiting_threads());
+
+ mutex_enter(&trx_sys->mutex);
+
+ fprintf(file, "%lu read views open inside InnoDB\n",
+ UT_LIST_GET_LEN(trx_sys->view_list));
+
+ fprintf(file, "%lu RW transactions active inside InnoDB\n",
+ UT_LIST_GET_LEN(trx_sys->rw_trx_list));
+
+ fprintf(file, "%lu RO transactions active inside InnoDB\n",
+ UT_LIST_GET_LEN(trx_sys->ro_trx_list));
+
+ fprintf(file, "%lu out of %lu descriptors used\n",
+ trx_sys->descr_n_used, trx_sys->descr_n_max);
+
+ if (UT_LIST_GET_LEN(trx_sys->view_list)) {
+ read_view_t* view = UT_LIST_GET_LAST(trx_sys->view_list);
+
+ if (view) {
+ fprintf(file, "---OLDEST VIEW---\n");
+ read_view_print(file, view);
+ fprintf(file, "-----------------\n");
+ }
+ }
+
+ mutex_exit(&trx_sys->mutex);
+
+ n_reserved = fil_space_get_n_reserved_extents(0);
+ if (n_reserved > 0) {
+ fprintf(file,
+ "%lu tablespace extents now reserved for"
+ " B-tree split operations\n",
+ (ulong) n_reserved);
+ }
+
+#ifdef UNIV_LINUX
+ fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n",
+ (ulong) srv_main_thread_process_no,
+ (ulong) srv_main_thread_id,
+ srv_main_thread_op_info);
+#else
+ fprintf(file, "Main thread id %lu, state: %s\n",
+ (ulong) srv_main_thread_id,
+ srv_main_thread_op_info);
+#endif
+ fprintf(file,
+ "Number of rows inserted " ULINTPF
+ ", updated " ULINTPF ", deleted " ULINTPF
+ ", read " ULINTPF "\n",
+ (ulint) srv_stats.n_rows_inserted,
+ (ulint) srv_stats.n_rows_updated,
+ (ulint) srv_stats.n_rows_deleted,
+ (ulint) srv_stats.n_rows_read);
+ fprintf(file,
+ "%.2f inserts/s, %.2f updates/s,"
+ " %.2f deletes/s, %.2f reads/s\n",
+ ((ulint) srv_stats.n_rows_inserted - srv_n_rows_inserted_old)
+ / time_elapsed,
+ ((ulint) srv_stats.n_rows_updated - srv_n_rows_updated_old)
+ / time_elapsed,
+ ((ulint) srv_stats.n_rows_deleted - srv_n_rows_deleted_old)
+ / time_elapsed,
+ ((ulint) srv_stats.n_rows_read - srv_n_rows_read_old)
+ / time_elapsed);
+
+ srv_n_rows_inserted_old = srv_stats.n_rows_inserted;
+ srv_n_rows_updated_old = srv_stats.n_rows_updated;
+ srv_n_rows_deleted_old = srv_stats.n_rows_deleted;
+ srv_n_rows_read_old = srv_stats.n_rows_read;
+
+ /* Only if lock_print_info_summary proceeds correctly,
+ before we call the lock_print_info_all_transactions
+ to print all the lock information. */
+ ret = lock_print_info_summary(file, nowait);
+
+ if (ret) {
+ lock_print_info_all_transactions(file);
+ }
+
+ fputs("----------------------------\n"
+ "END OF INNODB MONITOR OUTPUT\n"
+ "============================\n", file);
+ mutex_exit(&srv_innodb_monitor_mutex);
+ fflush(file);
+
+ return(ret);
+}
+
+/******************************************************************//**
+Function to pass InnoDB status variables to MySQL */
+UNIV_INTERN
+void
+srv_export_innodb_status(void)
+/*==========================*/
+{
+ buf_pool_stat_t stat;
+ buf_pools_list_size_t buf_pools_list_size;
+ ulint LRU_len;
+ ulint free_len;
+ ulint flush_list_len;
+ ulint mem_adaptive_hash, mem_dictionary;
+ read_view_t* oldest_view;
+ ulint i;
+
+ buf_get_total_stat(&stat);
+ buf_get_total_list_len(&LRU_len, &free_len, &flush_list_len);
+ buf_get_total_list_size_in_bytes(&buf_pools_list_size);
+
+ mem_adaptive_hash = 0;
+
+ ut_ad(btr_search_sys->hash_tables);
+
+ for (i = 0; i < btr_search_index_num; i++) {
+ hash_table_t* ht = btr_search_sys->hash_tables[i];
+
+ ut_ad(ht);
+ ut_ad(ht->heap);
+ /* Multiple mutexes/heaps are currently never used for adaptive
+ hash index tables. */
+ ut_ad(!ht->n_sync_obj);
+ ut_ad(!ht->heaps);
+
+ mem_adaptive_hash += mem_heap_get_size(ht->heap);
+ mem_adaptive_hash += ht->n_cells * sizeof(hash_cell_t);
+ }
+
+ mem_dictionary = (dict_sys ? ((dict_sys->table_hash->n_cells
+ + dict_sys->table_id_hash->n_cells
+ ) * sizeof(hash_cell_t)
+ + dict_sys->size) : 0);
+
+ mutex_enter(&srv_innodb_monitor_mutex);
+
+ export_vars.innodb_data_pending_reads =
+ os_n_pending_reads;
+
+ export_vars.innodb_data_pending_writes =
+ os_n_pending_writes;
+
+ export_vars.innodb_data_pending_fsyncs =
+ fil_n_pending_log_flushes
+ + fil_n_pending_tablespace_flushes;
+ export_vars.innodb_adaptive_hash_hash_searches
+ = btr_cur_n_sea;
+ export_vars.innodb_adaptive_hash_non_hash_searches
+ = btr_cur_n_non_sea;
+ export_vars.innodb_background_log_sync
+ = srv_log_writes_and_flush;
+
+ export_vars.innodb_data_fsyncs = os_n_fsyncs;
+
+ export_vars.innodb_data_read = srv_stats.data_read;
+
+ export_vars.innodb_data_reads = os_n_file_reads;
+
+ export_vars.innodb_data_writes = os_n_file_writes;
+
+ export_vars.innodb_data_written = srv_stats.data_written;
+
+ export_vars.innodb_buffer_pool_read_requests = stat.n_page_gets;
+
+ export_vars.innodb_buffer_pool_write_requests =
+ srv_stats.buf_pool_write_requests;
+
+ export_vars.innodb_buffer_pool_wait_free =
+ srv_stats.buf_pool_wait_free;
+
+ export_vars.innodb_buffer_pool_pages_flushed =
+ srv_stats.buf_pool_flushed;
+
+ export_vars.innodb_buffer_pool_reads = srv_stats.buf_pool_reads;
+
+ export_vars.innodb_buffer_pool_read_ahead_rnd =
+ stat.n_ra_pages_read_rnd;
+
+ export_vars.innodb_buffer_pool_read_ahead =
+ stat.n_ra_pages_read;
+
+ export_vars.innodb_buffer_pool_read_ahead_evicted =
+ stat.n_ra_pages_evicted;
+
+ export_vars.innodb_buffer_pool_pages_LRU_flushed =
+ stat.buf_lru_flush_page_count;
+
+ export_vars.innodb_buffer_pool_pages_data = LRU_len;
+
+ export_vars.innodb_buffer_pool_bytes_data =
+ buf_pools_list_size.LRU_bytes
+ + buf_pools_list_size.unzip_LRU_bytes;
+
+ export_vars.innodb_buffer_pool_pages_dirty = flush_list_len;
+
+ export_vars.innodb_buffer_pool_bytes_dirty =
+ buf_pools_list_size.flush_list_bytes;
+
+ export_vars.innodb_buffer_pool_pages_free = free_len;
+
+ export_vars.innodb_deadlocks = srv_stats.lock_deadlock_count;
+
+#ifdef UNIV_DEBUG
+ export_vars.innodb_buffer_pool_pages_latched =
+ buf_get_latched_pages_number();
+#endif /* UNIV_DEBUG */
+ export_vars.innodb_buffer_pool_pages_total = buf_pool_get_n_pages();
+
+ export_vars.innodb_buffer_pool_pages_misc =
+ buf_pool_get_n_pages() - LRU_len - free_len;
+
+ export_vars.innodb_buffer_pool_pages_made_young
+ = stat.n_pages_made_young;
+ export_vars.innodb_buffer_pool_pages_made_not_young
+ = stat.n_pages_not_made_young;
+ export_vars.innodb_buffer_pool_pages_old = 0;
+ for (i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool_t* buf_pool = buf_pool_from_array(i);
+ export_vars.innodb_buffer_pool_pages_old
+ += buf_pool->LRU_old_len;
+ }
+ export_vars.innodb_checkpoint_age
+ = (log_sys->lsn - log_sys->last_checkpoint_lsn);
+ export_vars.innodb_checkpoint_max_age
+ = log_sys->max_checkpoint_age;
+ export_vars.innodb_history_list_length
+ = trx_sys->rseg_history_len;
+ ibuf_export_ibuf_status(
+ &export_vars.innodb_ibuf_size,
+ &export_vars.innodb_ibuf_free_list,
+ &export_vars.innodb_ibuf_segment_size,
+ &export_vars.innodb_ibuf_merges,
+ &export_vars.innodb_ibuf_merged_inserts,
+ &export_vars.innodb_ibuf_merged_delete_marks,
+ &export_vars.innodb_ibuf_merged_deletes,
+ &export_vars.innodb_ibuf_discarded_inserts,
+ &export_vars.innodb_ibuf_discarded_delete_marks,
+ &export_vars.innodb_ibuf_discarded_deletes);
+ export_vars.innodb_lsn_current
+ = log_sys->lsn;
+ export_vars.innodb_lsn_flushed
+ = log_sys->flushed_to_disk_lsn;
+ export_vars.innodb_lsn_last_checkpoint
+ = log_sys->last_checkpoint_lsn;
+ export_vars.innodb_master_thread_active_loops
+ = srv_main_active_loops;
+ export_vars.innodb_master_thread_idle_loops
+ = srv_main_idle_loops;
+ export_vars.innodb_max_trx_id
+ = trx_sys->max_trx_id;
+ export_vars.innodb_mem_adaptive_hash
+ = mem_adaptive_hash;
+ export_vars.innodb_mem_dictionary
+ = mem_dictionary;
+ export_vars.innodb_mem_total
+ = ut_total_allocated_memory;
+ export_vars.innodb_mutex_os_waits
+ = mutex_os_wait_count;
+ export_vars.innodb_mutex_spin_rounds
+ = mutex_spin_round_count;
+ export_vars.innodb_mutex_spin_waits
+ = mutex_spin_wait_count;
+ export_vars.innodb_s_lock_os_waits
+ = rw_lock_stats.rw_s_os_wait_count;
+ export_vars.innodb_s_lock_spin_rounds
+ = rw_lock_stats.rw_s_spin_round_count;
+ export_vars.innodb_s_lock_spin_waits
+ = rw_lock_stats.rw_s_spin_wait_count;
+ export_vars.innodb_x_lock_os_waits
+ = rw_lock_stats.rw_x_os_wait_count;
+ export_vars.innodb_x_lock_spin_rounds
+ = rw_lock_stats.rw_x_spin_round_count;
+ export_vars.innodb_x_lock_spin_waits
+ = rw_lock_stats.rw_x_spin_wait_count;
+
+ oldest_view = UT_LIST_GET_LAST(trx_sys->view_list);
+ export_vars.innodb_oldest_view_low_limit_trx_id
+ = oldest_view ? oldest_view->low_limit_id : 0;
+
+ export_vars.innodb_purge_trx_id = purge_sys->limit.trx_no;
+ export_vars.innodb_purge_undo_no = purge_sys->limit.undo_no;
+ export_vars.innodb_current_row_locks
+ = lock_sys->rec_num;
+
+#ifdef HAVE_ATOMIC_BUILTINS
+ export_vars.innodb_have_atomic_builtins = 1;
+#else
+ export_vars.innodb_have_atomic_builtins = 0;
+#endif
+ export_vars.innodb_page_size = UNIV_PAGE_SIZE;
+
+ export_vars.innodb_log_waits = srv_stats.log_waits;
+
+ export_vars.innodb_os_log_written = srv_stats.os_log_written;
+
+ export_vars.innodb_os_log_fsyncs = fil_n_log_flushes;
+
+ export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes;
+
+ export_vars.innodb_os_log_pending_writes =
+ srv_stats.os_log_pending_writes;
+
+ export_vars.innodb_log_write_requests = srv_stats.log_write_requests;
+
+ export_vars.innodb_log_writes = srv_stats.log_writes;
+
+ export_vars.innodb_dblwr_pages_written =
+ srv_stats.dblwr_pages_written;
+
+ export_vars.innodb_dblwr_writes = srv_stats.dblwr_writes;
+
+ export_vars.innodb_pages_created = stat.n_pages_created;
+
+ export_vars.innodb_pages_read = stat.n_pages_read;
+
+ export_vars.innodb_pages_written = stat.n_pages_written;
+
+ export_vars.innodb_row_lock_waits = srv_stats.n_lock_wait_count;
+
+ export_vars.innodb_row_lock_current_waits =
+ srv_stats.n_lock_wait_current_count;
+
+ export_vars.innodb_row_lock_time = srv_stats.n_lock_wait_time / 1000;
+
+ if (srv_stats.n_lock_wait_count > 0) {
+
+ export_vars.innodb_row_lock_time_avg = (ulint)
+ (srv_stats.n_lock_wait_time
+ / 1000 / srv_stats.n_lock_wait_count);
+
+ } else {
+ export_vars.innodb_row_lock_time_avg = 0;
+ }
+
+ export_vars.innodb_row_lock_time_max =
+ lock_sys->n_lock_max_wait_time / 1000;
+
+ export_vars.innodb_rows_read = srv_stats.n_rows_read;
+
+ export_vars.innodb_rows_inserted = srv_stats.n_rows_inserted;
+
+ export_vars.innodb_rows_updated = srv_stats.n_rows_updated;
+
+ export_vars.innodb_rows_deleted = srv_stats.n_rows_deleted;
+
+ export_vars.innodb_num_open_files = fil_n_file_opened;
+
+ export_vars.innodb_truncated_status_writes =
+ srv_truncated_status_writes;
+
+ export_vars.innodb_available_undo_logs = srv_available_undo_logs;
+ export_vars.innodb_read_views_memory
+ = os_atomic_increment_ulint(&srv_read_views_memory, 0);
+ export_vars.innodb_descriptors_memory
+ = os_atomic_increment_ulint(&srv_descriptors_memory, 0);
+
+#ifdef UNIV_DEBUG
+ rw_lock_s_lock(&purge_sys->latch);
+ trx_id_t done_trx_no = purge_sys->done.trx_no;
+ trx_id_t up_limit_id = purge_sys->view
+ ? purge_sys->view->up_limit_id
+ : 0;
+ rw_lock_s_unlock(&purge_sys->latch);
+
+ mutex_enter(&trx_sys->mutex);
+ trx_id_t max_trx_id = trx_sys->rw_max_trx_id;
+ mutex_exit(&trx_sys->mutex);
+
+ if (!done_trx_no || max_trx_id < done_trx_no - 1) {
+ export_vars.innodb_purge_trx_id_age = 0;
+ } else {
+ export_vars.innodb_purge_trx_id_age =
+ (ulint) (max_trx_id - done_trx_no + 1);
+ }
+
+ if (!up_limit_id
+ || max_trx_id < up_limit_id) {
+ export_vars.innodb_purge_view_trx_id_age = 0;
+ } else {
+ export_vars.innodb_purge_view_trx_id_age =
+ (ulint) (max_trx_id - up_limit_id);
+ }
+#endif /* UNIV_DEBUG */
+
+ mutex_exit(&srv_innodb_monitor_mutex);
+}
+
+/*********************************************************************//**
+A thread which prints the info output by various InnoDB monitors.
+@return a dummy parameter */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(srv_monitor_thread)(
+/*===============================*/
+ void* arg __attribute__((unused)))
+ /*!< in: a dummy parameter required by
+ os_thread_create */
+{
+ ib_int64_t sig_count;
+ double time_elapsed;
+ time_t current_time;
+ time_t last_table_monitor_time;
+ time_t last_tablespace_monitor_time;
+ time_t last_monitor_time;
+ ulint mutex_skipped;
+ ibool last_srv_print_monitor;
+
+ ut_ad(!srv_read_only_mode);
+
+#ifdef UNIV_DEBUG_THREAD_CREATION
+ fprintf(stderr, "Lock timeout thread starts, id %lu\n",
+ os_thread_pf(os_thread_get_curr_id()));
+#endif /* UNIV_DEBUG_THREAD_CREATION */
+
+#ifdef UNIV_PFS_THREAD
+ pfs_register_thread(srv_monitor_thread_key);
+#endif /* UNIV_PFS_THREAD */
+ srv_monitor_active = TRUE;
+
+ UT_NOT_USED(arg);
+ srv_last_monitor_time = ut_time();
+ last_table_monitor_time = ut_time();
+ last_tablespace_monitor_time = ut_time();
+ last_monitor_time = ut_time();
+ mutex_skipped = 0;
+ last_srv_print_monitor = srv_print_innodb_monitor;
+loop:
+ /* Wake up every 5 seconds to see if we need to print
+ monitor information or if signalled at shutdown. */
+
+ sig_count = os_event_reset(srv_monitor_event);
+
+ os_event_wait_time_low(srv_monitor_event, 5000000, sig_count);
+
+ current_time = ut_time();
+
+ time_elapsed = difftime(current_time, last_monitor_time);
+
+ if (time_elapsed > 15) {
+ last_monitor_time = ut_time();
+
+ if (srv_print_innodb_monitor) {
+ /* Reset mutex_skipped counter everytime
+ srv_print_innodb_monitor changes. This is to
+ ensure we will not be blocked by lock_sys->mutex
+ for short duration information printing,
+ such as requested by sync_array_print_long_waits() */
+ if (!last_srv_print_monitor) {
+ mutex_skipped = 0;
+ last_srv_print_monitor = TRUE;
+ }
+
+ if (!srv_printf_innodb_monitor(stderr,
+ MUTEX_NOWAIT(mutex_skipped),
+ NULL, NULL)) {
+ mutex_skipped++;
+ } else {
+ /* Reset the counter */
+ mutex_skipped = 0;
+ }
+ } else {
+ last_srv_print_monitor = FALSE;
+ }
+
+
+ /* We don't create the temp files or associated
+ mutexes in read-only-mode */
+
+ if (!srv_read_only_mode && srv_innodb_status) {
+ mutex_enter(&srv_monitor_file_mutex);
+ rewind(srv_monitor_file);
+ if (!srv_printf_innodb_monitor(srv_monitor_file,
+ MUTEX_NOWAIT(mutex_skipped),
+ NULL, NULL)) {
+ mutex_skipped++;
+ } else {
+ mutex_skipped = 0;
+ }
+
+ os_file_set_eof(srv_monitor_file);
+ mutex_exit(&srv_monitor_file_mutex);
+ }
+
+ if (srv_print_innodb_tablespace_monitor
+ && difftime(current_time,
+ last_tablespace_monitor_time) > 60) {
+ last_tablespace_monitor_time = ut_time();
+
+ fputs("========================"
+ "========================\n",
+ stderr);
+
+ ut_print_timestamp(stderr);
+
+ fputs(" INNODB TABLESPACE MONITOR OUTPUT\n"
+ "========================"
+ "========================\n",
+ stderr);
+
+ fsp_print(0);
+ fputs("Validating tablespace\n", stderr);
+ fsp_validate(0);
+ fputs("Validation ok\n"
+ "---------------------------------------\n"
+ "END OF INNODB TABLESPACE MONITOR OUTPUT\n"
+ "=======================================\n",
+ stderr);
+ }
+
+ if (srv_print_innodb_table_monitor
+ && difftime(current_time, last_table_monitor_time) > 60) {
+
+ last_table_monitor_time = ut_time();
+
+ fprintf(stderr, "Warning: %s\n",
+ DEPRECATED_MSG_INNODB_TABLE_MONITOR);
+
+ fputs("===========================================\n",
+ stderr);
+
+ ut_print_timestamp(stderr);
+
+ fputs(" INNODB TABLE MONITOR OUTPUT\n"
+ "===========================================\n",
+ stderr);
+ dict_print();
+
+ fputs("-----------------------------------\n"
+ "END OF INNODB TABLE MONITOR OUTPUT\n"
+ "==================================\n",
+ stderr);
+
+ fprintf(stderr, "Warning: %s\n",
+ DEPRECATED_MSG_INNODB_TABLE_MONITOR);
+ }
+ }
+
+ if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
+ goto exit_func;
+ }
+
+ if (srv_print_innodb_monitor
+ || srv_print_innodb_lock_monitor
+ || srv_print_innodb_tablespace_monitor
+ || srv_print_innodb_table_monitor) {
+ goto loop;
+ }
+
+ goto loop;
+
+exit_func:
+ srv_monitor_active = FALSE;
+
+ /* We count the number of threads in os_thread_exit(). A created
+ thread should always use that to exit and not use return() to exit. */
+
+ os_thread_exit(NULL);
+
+ OS_THREAD_DUMMY_RETURN;
+}
+
+/*********************************************************************//**
+A thread which prints warnings about semaphore waits which have lasted
+too long. These can be used to track bugs which cause hangs.
+@return a dummy parameter */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(srv_error_monitor_thread)(
+/*=====================================*/
+ void* arg __attribute__((unused)))
+ /*!< in: a dummy parameter required by
+ os_thread_create */
+{
+ /* number of successive fatal timeouts observed */
+ ulint fatal_cnt = 0;
+ lsn_t old_lsn;
+ lsn_t new_lsn;
+ ib_int64_t sig_count;
+ /* longest waiting thread for a semaphore */
+ os_thread_id_t waiter = os_thread_get_curr_id();
+ os_thread_id_t old_waiter = waiter;
+ /* the semaphore that is being waited for */
+ const void* sema = NULL;
+ const void* old_sema = NULL;
+
+ ut_ad(!srv_read_only_mode);
+
+ old_lsn = srv_start_lsn;
+
+#ifdef UNIV_DEBUG_THREAD_CREATION
+ fprintf(stderr, "Error monitor thread starts, id %lu\n",
+ os_thread_pf(os_thread_get_curr_id()));
+#endif /* UNIV_DEBUG_THREAD_CREATION */
+
+#ifdef UNIV_PFS_THREAD
+ pfs_register_thread(srv_error_monitor_thread_key);
+#endif /* UNIV_PFS_THREAD */
+ srv_error_monitor_active = TRUE;
+
+loop:
+ /* Try to track a strange bug reported by Harald Fuchs and others,
+ where the lsn seems to decrease at times */
+
+ new_lsn = log_get_lsn();
+
+ if (new_lsn < old_lsn) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Error: old log sequence number " LSN_PF
+ " was greater\n"
+ "InnoDB: than the new log sequence number " LSN_PF "!\n"
+ "InnoDB: Please submit a bug report"
+ " to http://bugs.mysql.com\n",
+ old_lsn, new_lsn);
+ ut_ad(0);
+ }
+
+ old_lsn = new_lsn;
+
+ if (difftime(time(NULL), srv_last_monitor_time) > 60) {
+ /* We referesh InnoDB Monitor values so that averages are
+ printed from at most 60 last seconds */
+
+ srv_refresh_innodb_monitor_stats();
+ }
+
+ /* Update the statistics collected for deciding LRU
+ eviction policy. */
+ buf_LRU_stat_update();
+
+ /* In case mutex_exit is not a memory barrier, it is
+ theoretically possible some threads are left waiting though
+ the semaphore is already released. Wake up those threads: */
+
+ sync_arr_wake_threads_if_sema_free();
+
+ if (sync_array_print_long_waits(&waiter, &sema)
+ && sema == old_sema && os_thread_eq(waiter, old_waiter)) {
+ fatal_cnt++;
+ if (fatal_cnt > 10) {
+
+ fprintf(stderr,
+ "InnoDB: Error: semaphore wait has lasted"
+ " > %lu seconds\n"
+ "InnoDB: We intentionally crash the server,"
+ " because it appears to be hung.\n",
+ (ulong) srv_fatal_semaphore_wait_threshold);
+
+ ut_error;
+ }
+ } else {
+ fatal_cnt = 0;
+ old_waiter = waiter;
+ old_sema = sema;
+ }
+
+ if (srv_kill_idle_transaction && trx_sys) {
+ trx_t* trx;
+ time_t now;
+rescan_idle:
+ now = time(NULL);
+ mutex_enter(&trx_sys->mutex);
+ trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list);
+ while (trx) {
+ if (!trx_state_eq(trx, TRX_STATE_NOT_STARTED)
+ && trx_state_eq(trx, TRX_STATE_ACTIVE)
+ && trx->mysql_thd
+ && innobase_thd_is_idle(trx->mysql_thd)) {
+ ib_int64_t start_time = innobase_thd_get_start_time(trx->mysql_thd);
+ ulong thd_id = innobase_thd_get_thread_id(trx->mysql_thd);
+
+ if (trx->last_stmt_start != start_time) {
+ trx->idle_start = now;
+ trx->last_stmt_start = start_time;
+ } else if (difftime(now, trx->idle_start)
+ > srv_kill_idle_transaction) {
+ /* kill the session */
+ mutex_exit(&trx_sys->mutex);
+ innobase_thd_kill(thd_id);
+ goto rescan_idle;
+ }
+ }
+ trx = UT_LIST_GET_NEXT(mysql_trx_list, trx);
+ }
+ mutex_exit(&trx_sys->mutex);
+ }
+
+ /* Flush stderr so that a database user gets the output
+ to possible MySQL error file */
+
+ fflush(stderr);
+
+ sig_count = os_event_reset(srv_error_event);
+
+ os_event_wait_time_low(srv_error_event, 1000000, sig_count);
+
+ if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
+
+ goto loop;
+ }
+
+ srv_error_monitor_active = FALSE;
+
+ /* We count the number of threads in os_thread_exit(). A created
+ thread should always use that to exit and not use return() to exit. */
+
+ os_thread_exit(NULL);
+
+ OS_THREAD_DUMMY_RETURN;
+}
+
+/******************************************************************//**
+Increment the server activity count. */
+UNIV_INTERN
+void
+srv_inc_activity_count(void)
+/*========================*/
+{
+ srv_sys->activity_count.inc();
+}
+
+/**********************************************************************//**
+Check whether any background thread is active. If so return the thread
+type.
+@return SRV_NONE if all are suspended or have exited, thread
+type if any are still active. */
+UNIV_INTERN
+srv_thread_type
+srv_get_active_thread_type(void)
+/*============================*/
+{
+ srv_thread_type ret = SRV_NONE;
+
+ if (srv_read_only_mode) {
+ return(SRV_NONE);
+ }
+
+ srv_sys_mutex_enter();
+
+ for (ulint i = SRV_WORKER; i <= SRV_MASTER; ++i) {
+ if (srv_sys->n_threads_active[i] != 0) {
+ ret = static_cast<srv_thread_type>(i);
+ break;
+ }
+ }
+
+ srv_sys_mutex_exit();
+
+ /* Check only on shutdown. */
+ if (ret == SRV_NONE
+ && srv_shutdown_state != SRV_SHUTDOWN_NONE
+ && trx_purge_state() != PURGE_STATE_DISABLED
+ && trx_purge_state() != PURGE_STATE_EXIT) {
+
+ ret = SRV_PURGE;
+ }
+
+ return(ret);
+}
+
+/**********************************************************************//**
+Check whether any background thread are active. If so print which thread
+is active. Send the threads wakeup signal.
+@return name of thread that is active or NULL */
+UNIV_INTERN
+const char*
+srv_any_background_threads_are_active(void)
+/*=======================================*/
+{
+ const char* thread_active = NULL;
+
+ if (srv_read_only_mode) {
+ return(NULL);
+ } else if (srv_error_monitor_active) {
+ thread_active = "srv_error_monitor_thread";
+ } else if (lock_sys->timeout_thread_active) {
+ thread_active = "srv_lock_timeout thread";
+ } else if (srv_monitor_active) {
+ thread_active = "srv_monitor_thread";
+ } else if (srv_buf_dump_thread_active) {
+ thread_active = "buf_dump_thread";
+ } else if (srv_dict_stats_thread_active) {
+ thread_active = "dict_stats_thread";
+ }
+
+ os_event_set(srv_error_event);
+ os_event_set(srv_monitor_event);
+ os_event_set(srv_buf_dump_event);
+ os_event_set(lock_sys->timeout_event);
+ os_event_set(dict_stats_event);
+
+ return(thread_active);
+}
+
+/******************************************************************//**
+A thread which follows the redo log and outputs the changed page bitmap.
+@return a dummy value */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(srv_redo_log_follow_thread)(
+/*=======================================*/
+ void* arg __attribute__((unused))) /*!< in: a dummy parameter
+ required by
+ os_thread_create */
+{
+ ut_ad(!srv_read_only_mode);
+
+#ifdef UNIV_DEBUG_THREAD_CREATION
+ fprintf(stderr, "Redo log follower thread starts, id %lu\n",
+ os_thread_pf(os_thread_get_curr_id()));
+#endif
+
+#ifdef UNIV_PFS_THREAD
+ pfs_register_thread(srv_log_tracking_thread_key);
+#endif
+
+ my_thread_init();
+
+ do {
+ os_event_wait(srv_checkpoint_completed_event);
+ os_event_reset(srv_checkpoint_completed_event);
+
+#ifdef UNIV_DEBUG
+ if (!srv_track_changed_pages) {
+ continue;
+ }
+#endif
+
+ if (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) {
+ if (!log_online_follow_redo_log()) {
+ /* TODO: sync with I_S log tracking status? */
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "log tracking bitmap write failed, "
+ "stopping log tracking thread!\n");
+ break;
+ }
+ }
+
+ } while (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE);
+
+ srv_track_changed_pages = FALSE;
+ log_online_read_shutdown();
+ os_event_set(srv_redo_log_thread_finished_event);
+
+ my_thread_end();
+ os_thread_exit(NULL);
+
+ OS_THREAD_DUMMY_RETURN;
+}
+
+/*************************************************************//**
+Removes old archived transaction log files.
+Both parameters couldn't be provided at the same time */
+dberr_t
+purge_archived_logs(
+ time_t before_date, /*!< in: all files modified
+ before timestamp should be removed */
+ lsn_t before_no) /*!< in: files with this number in name
+ and earler should be removed */
+{
+ log_group_t* group = UT_LIST_GET_FIRST(log_sys->log_groups);
+
+ os_file_dir_t dir;
+ os_file_stat_t fileinfo;
+ char archived_log_filename[OS_FILE_MAX_PATH];
+ char namegen[OS_FILE_MAX_PATH];
+ ulint dirnamelen;
+
+ if (srv_arch_dir) {
+ dir = os_file_opendir(srv_arch_dir, FALSE);
+ if (!dir) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "opening archived log directory %s failed. "
+ "Purge archived logs are not available\n",
+ srv_arch_dir);
+ /* failed to open directory */
+ return(DB_ERROR);
+ }
+ } else {
+ /* log archive directory is not specified */
+ return(DB_ERROR);
+ }
+
+ dirnamelen = strlen(srv_arch_dir);
+
+ memcpy(archived_log_filename, srv_arch_dir, dirnamelen);
+ if (dirnamelen &&
+ archived_log_filename[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
+ archived_log_filename[dirnamelen++] = SRV_PATH_SEPARATOR;
+ }
+
+ memset(&fileinfo, 0, sizeof(fileinfo));
+ while(!os_file_readdir_next_file(srv_arch_dir, dir,
+ &fileinfo) ) {
+ if (strncmp(fileinfo.name,
+ IB_ARCHIVED_LOGS_PREFIX, IB_ARCHIVED_LOGS_PREFIX_LEN)) {
+ continue;
+ }
+ if (dirnamelen + strlen(fileinfo.name) + 2 > OS_FILE_MAX_PATH)
+ continue;
+
+ snprintf(archived_log_filename + dirnamelen, OS_FILE_MAX_PATH,
+ "%s", fileinfo.name);
+
+ if (before_no) {
+ ib_uint64_t log_file_no = strtoull(fileinfo.name +
+ IB_ARCHIVED_LOGS_PREFIX_LEN,
+ NULL, 10);
+ if (log_file_no == 0 || before_no <= log_file_no) {
+ continue;
+ }
+ } else {
+ fileinfo.mtime = 0;
+ if (os_file_get_status(archived_log_filename,
+ &fileinfo, false) != DB_SUCCESS ||
+ fileinfo.mtime == 0) {
+ continue;
+ }
+
+ if (before_date == 0 || fileinfo.mtime > before_date) {
+ continue;
+ }
+ }
+
+ /* We are going to delete archived file. Acquire log_sys->mutex
+ to make sure that we are the only who try to delete file. This
+ also prevents log system from using this file. Do not delete
+ file if it is currently in progress of writting or have
+ pending IO. This is enforced by checking:
+ 1. fil_space_contains_node.
+ 2. group->archived_offset % group->file_size != 0, i.e.
+ there is archive in progress and we are going to delete it.
+ This covers 3 cases:
+ a. Usual case when we have one archive in progress,
+ both 1 and 2 are TRUE
+ b. When we have more then 1 archive in fil_space,
+ this can happen when flushed LSN range crosses file
+ boundary
+ c. When we have empty fil_space, but existing file will be
+ opened once archiving operation is requested. This usually
+ happens on startup.
+ */
+
+ mutex_enter(&log_sys->mutex);
+
+ log_archived_file_name_gen(namegen, sizeof(namegen),
+ group->id, group->archived_file_no);
+
+ if (fil_space_contains_node(group->archive_space_id,
+ archived_log_filename) ||
+ (group->archived_offset % group->file_size != 0 &&
+ strcmp(namegen, archived_log_filename) == 0)) {
+
+ mutex_exit(&log_sys->mutex);
+ continue;
+ }
+
+ if (!os_file_delete_if_exists(innodb_file_data_key,
+ archived_log_filename)) {
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "can't delete archived log file %s.\n",
+ archived_log_filename);
+
+ mutex_exit(&log_sys->mutex);
+ os_file_closedir(dir);
+
+ return(DB_ERROR);
+ }
+
+ mutex_exit(&log_sys->mutex);
+ }
+
+ os_file_closedir(dir);
+
+ return(DB_SUCCESS);
+}
+
+/*******************************************************************//**
+Tells the InnoDB server that there has been activity in the database
+and wakes up the master thread if it is suspended (not sleeping). Used
+in the MySQL interface. Note that there is a small chance that the master
+thread stays suspended (we do not protect our operation with the
+srv_sys_t->mutex, for performance reasons). */
+UNIV_INTERN
+void
+srv_active_wake_master_thread(void)
+/*===============================*/
+{
+ if (srv_read_only_mode) {
+ return;
+ }
+
+ ut_ad(!srv_sys_mutex_own());
+
+ srv_inc_activity_count();
+
+ if (srv_sys->n_threads_active[SRV_MASTER] == 0) {
+ srv_slot_t* slot;
+
+ srv_sys_mutex_enter();
+
+ slot = &srv_sys->sys_threads[SRV_MASTER_SLOT];
+
+ /* Only if the master thread has been started. */
+
+ if (slot->in_use) {
+ ut_a(srv_slot_get_type(slot) == SRV_MASTER);
+
+ if (slot->suspended) {
+
+ slot->suspended = FALSE;
+
+ ++srv_sys->n_threads_active[SRV_MASTER];
+
+ os_event_set(slot->event);
+ }
+ }
+
+ srv_sys_mutex_exit();
+ }
+}
+
+/*******************************************************************//**
+Tells the purge thread that there has been activity in the database
+and wakes up the purge thread if it is suspended (not sleeping). Note
+that there is a small chance that the purge thread stays suspended
+(we do not protect our check with the srv_sys_t:mutex and the
+purge_sys->latch, for performance reasons). */
+UNIV_INTERN
+void
+srv_wake_purge_thread_if_not_active(void)
+/*=====================================*/
+{
+ ut_ad(!srv_sys_mutex_own());
+
+ if (purge_sys->state == PURGE_STATE_RUN
+ && srv_sys->n_threads_active[SRV_PURGE] == 0) {
+
+ srv_release_threads(SRV_PURGE, 1);
+ }
+}
+
+/*******************************************************************//**
+Wakes up the master thread if it is suspended or being suspended. */
+UNIV_INTERN
+void
+srv_wake_master_thread(void)
+/*========================*/
+{
+ ut_ad(!srv_sys_mutex_own());
+
+ srv_inc_activity_count();
+
+ srv_release_threads(SRV_MASTER, 1);
+}
+
+/*******************************************************************//**
+Get current server activity count. We don't hold srv_sys::mutex while
+reading this value as it is only used in heuristics.
+@return activity count. */
+UNIV_INTERN
+ulint
+srv_get_activity_count(void)
+/*========================*/
+{
+ return(srv_sys->activity_count);
+}
+
+/*******************************************************************//**
+Check if there has been any activity.
+@return FALSE if no change in activity counter. */
+UNIV_INTERN
+ibool
+srv_check_activity(
+/*===============*/
+ ulint old_activity_count) /*!< in: old activity count */
+{
+ return(srv_sys->activity_count != old_activity_count);
+}
+
+/********************************************************************//**
+The master thread is tasked to ensure that flush of log file happens
+once every second in the background. This is to ensure that not more
+than one second of trxs are lost in case of crash when
+innodb_flush_logs_at_trx_commit != 1 */
+static
+void
+srv_sync_log_buffer_in_background(void)
+/*===================================*/
+{
+ time_t current_time = time(NULL);
+
+ srv_main_thread_op_info = "flushing log";
+ if (difftime(current_time, srv_last_log_flush_time)
+ >= srv_flush_log_at_timeout) {
+ log_buffer_sync_in_background(TRUE);
+ srv_last_log_flush_time = current_time;
+ srv_log_writes_and_flush++;
+ }
+}
+
+/********************************************************************//**
+Make room in the table cache by evicting an unused table.
+@return number of tables evicted. */
+static
+ulint
+srv_master_evict_from_table_cache(
+/*==============================*/
+ ulint pct_check) /*!< in: max percent to check */
+{
+ ulint n_tables_evicted = 0;
+
+ rw_lock_x_lock(&dict_operation_lock);
+
+ dict_mutex_enter_for_mysql();
+
+ n_tables_evicted = dict_make_room_in_cache(
+ innobase_get_table_cache_size(), pct_check);
+
+ dict_mutex_exit_for_mysql();
+
+ rw_lock_x_unlock(&dict_operation_lock);
+
+ return(n_tables_evicted);
+}
+
+/*********************************************************************//**
+This function prints progress message every 60 seconds during server
+shutdown, for any activities that master thread is pending on. */
+static
+void
+srv_shutdown_print_master_pending(
+/*==============================*/
+ ib_time_t* last_print_time, /*!< last time the function
+ print the message */
+ ulint n_tables_to_drop, /*!< number of tables to
+ be dropped */
+ ulint n_bytes_merged) /*!< number of change buffer
+ just merged */
+{
+ ib_time_t current_time;
+ double time_elapsed;
+
+ current_time = ut_time();
+ time_elapsed = ut_difftime(current_time, *last_print_time);
+
+ if (time_elapsed > 60) {
+ *last_print_time = ut_time();
+
+ if (n_tables_to_drop) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: Waiting for "
+ "%lu table(s) to be dropped\n",
+ (ulong) n_tables_to_drop);
+ }
+
+ /* Check change buffer merge, we only wait for change buffer
+ merge if it is a slow shutdown */
+ if (!srv_fast_shutdown && n_bytes_merged) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: Waiting for change "
+ "buffer merge to complete\n"
+ " InnoDB: number of bytes of change buffer "
+ "just merged: %lu\n",
+ n_bytes_merged);
+ }
+ }
+}
+
+/*********************************************************************//**
+Perform the tasks that the master thread is supposed to do when the
+server is active. There are two types of tasks. The first category is
+of such tasks which are performed at each inovcation of this function.
+We assume that this function is called roughly every second when the
+server is active. The second category is of such tasks which are
+performed at some interval e.g.: purge, dict_LRU cleanup etc. */
+static
+void
+srv_master_do_active_tasks(void)
+/*============================*/
+{
+ ib_time_t cur_time = ut_time();
+ ullint counter_time = ut_time_us(NULL);
+
+ /* First do the tasks that we are suppose to do at each
+ invocation of this function. */
+
+ ++srv_main_active_loops;
+
+ MONITOR_INC(MONITOR_MASTER_ACTIVE_LOOPS);
+
+ /* ALTER TABLE in MySQL requires on Unix that the table handler
+ can drop tables lazily after there no longer are SELECT
+ queries to them. */
+ srv_main_thread_op_info = "doing background drop tables";
+ row_drop_tables_for_mysql_in_background();
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND, counter_time);
+
+ if (srv_shutdown_state > 0) {
+ return;
+ }
+
+ /* make sure that there is enough reusable space in the redo
+ log files */
+ srv_main_thread_op_info = "checking free log space";
+ log_free_check();
+
+ /* Do an ibuf merge */
+ srv_main_thread_op_info = "doing insert buffer merge";
+ counter_time = ut_time_us(NULL);
+ ibuf_contract_in_background(0, FALSE);
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
+
+ /* Flush logs if needed */
+ srv_main_thread_op_info = "flushing log";
+ srv_sync_log_buffer_in_background();
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
+
+ /* Now see if various tasks that are performed at defined
+ intervals need to be performed. */
+
+#ifdef MEM_PERIODIC_CHECK
+ /* Check magic numbers of every allocated mem block once in
+ SRV_MASTER_MEM_VALIDATE_INTERVAL seconds */
+ if (cur_time % SRV_MASTER_MEM_VALIDATE_INTERVAL == 0) {
+ mem_validate_all_blocks();
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_MEM_VALIDATE_MICROSECOND, counter_time);
+ }
+#endif
+ if (srv_shutdown_state > 0) {
+ return;
+ }
+
+ if (srv_shutdown_state > 0) {
+ return;
+ }
+
+ if (cur_time % SRV_MASTER_DICT_LRU_INTERVAL == 0) {
+ srv_main_thread_op_info = "enforcing dict cache limit";
+ srv_master_evict_from_table_cache(50);
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
+ }
+
+ if (srv_shutdown_state > 0) {
+ return;
+ }
+
+ /* Make a new checkpoint */
+ if (cur_time % SRV_MASTER_CHECKPOINT_INTERVAL == 0) {
+ srv_main_thread_op_info = "making checkpoint";
+ log_checkpoint(TRUE, FALSE, TRUE);
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_CHECKPOINT_MICROSECOND, counter_time);
+ }
+}
+
+/*********************************************************************//**
+Perform the tasks that the master thread is supposed to do whenever the
+server is idle. We do check for the server state during this function
+and if the server has entered the shutdown phase we may return from
+the function without completing the required tasks.
+Note that the server can move to active state when we are executing this
+function but we don't check for that as we are suppose to perform more
+or less same tasks when server is active. */
+static
+void
+srv_master_do_idle_tasks(void)
+/*==========================*/
+{
+ ullint counter_time;
+
+ ++srv_main_idle_loops;
+
+ MONITOR_INC(MONITOR_MASTER_IDLE_LOOPS);
+
+
+ /* ALTER TABLE in MySQL requires on Unix that the table handler
+ can drop tables lazily after there no longer are SELECT
+ queries to them. */
+ counter_time = ut_time_us(NULL);
+ srv_main_thread_op_info = "doing background drop tables";
+ row_drop_tables_for_mysql_in_background();
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_BACKGROUND_DROP_TABLE_MICROSECOND,
+ counter_time);
+
+ if (srv_shutdown_state > 0) {
+ return;
+ }
+
+ /* make sure that there is enough reusable space in the redo
+ log files */
+ srv_main_thread_op_info = "checking free log space";
+ log_free_check();
+
+ /* Do an ibuf merge */
+ counter_time = ut_time_us(NULL);
+ srv_main_thread_op_info = "doing insert buffer merge";
+ ibuf_contract_in_background(0, TRUE);
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_IBUF_MERGE_MICROSECOND, counter_time);
+
+ if (srv_shutdown_state > 0) {
+ return;
+ }
+
+ srv_main_thread_op_info = "enforcing dict cache limit";
+ srv_master_evict_from_table_cache(100);
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_DICT_LRU_MICROSECOND, counter_time);
+
+ /* Flush logs if needed */
+ srv_sync_log_buffer_in_background();
+ MONITOR_INC_TIME_IN_MICRO_SECS(
+ MONITOR_SRV_LOG_FLUSH_MICROSECOND, counter_time);
+
+ if (srv_shutdown_state > 0) {
+ return;
+ }
+
+ /* Make a new checkpoint */
+ srv_main_thread_op_info = "making checkpoint";
+ log_checkpoint(TRUE, FALSE, TRUE);
+ MONITOR_INC_TIME_IN_MICRO_SECS(MONITOR_SRV_CHECKPOINT_MICROSECOND,
+ counter_time);
+
+ if (srv_shutdown_state > 0) {
+ return;
+ }
+
+ if (srv_log_arch_expire_sec) {
+ srv_main_thread_op_info = "purging archived logs";
+ purge_archived_logs(ut_time() - srv_log_arch_expire_sec,
+ 0);
+ }
+}
+
+/*********************************************************************//**
+Perform the tasks during shutdown. The tasks that we do at shutdown
+depend on srv_fast_shutdown:
+2 => very fast shutdown => do no book keeping
+1 => normal shutdown => clear drop table queue and make checkpoint
+0 => slow shutdown => in addition to above do complete purge and ibuf
+merge
+@return TRUE if some work was done. FALSE otherwise */
+static
+ibool
+srv_master_do_shutdown_tasks(
+/*=========================*/
+ ib_time_t* last_print_time)/*!< last time the function
+ print the message */
+{
+ ulint n_bytes_merged = 0;
+ ulint n_tables_to_drop = 0;
+
+ ut_ad(!srv_read_only_mode);
+
+ ++srv_main_shutdown_loops;
+
+ ut_a(srv_shutdown_state > 0);
+
+ /* In very fast shutdown none of the following is necessary */
+ if (srv_fast_shutdown == 2) {
+ return(FALSE);
+ }
+
+ /* ALTER TABLE in MySQL requires on Unix that the table handler
+ can drop tables lazily after there no longer are SELECT
+ queries to them. */
+ srv_main_thread_op_info = "doing background drop tables";
+ n_tables_to_drop = row_drop_tables_for_mysql_in_background();
+
+ /* make sure that there is enough reusable space in the redo
+ log files */
+ srv_main_thread_op_info = "checking free log space";
+ log_free_check();
+
+ /* In case of normal shutdown we don't do ibuf merge or purge */
+ if (srv_fast_shutdown == 1) {
+ goto func_exit;
+ }
+
+ /* Do an ibuf merge */
+ srv_main_thread_op_info = "doing insert buffer merge";
+ n_bytes_merged = ibuf_contract_in_background(0, TRUE);
+
+ /* Flush logs if needed */
+ srv_sync_log_buffer_in_background();
+
+func_exit:
+ /* Make a new checkpoint about once in 10 seconds */
+ srv_main_thread_op_info = "making checkpoint";
+ log_checkpoint(TRUE, FALSE, FALSE);
+
+ /* Print progress message every 60 seconds during shutdown */
+ if (srv_shutdown_state > 0 && srv_print_verbose_log) {
+ srv_shutdown_print_master_pending(
+ last_print_time, n_tables_to_drop, n_bytes_merged);
+ }
+
+ return(n_bytes_merged || n_tables_to_drop);
+}
+
+/*********************************************************************//**
+Puts master thread to sleep. At this point we are using polling to
+service various activities. Master thread sleeps for one second before
+checking the state of the server again */
+static
+void
+srv_master_sleep(void)
+/*==================*/
+{
+ srv_main_thread_op_info = "sleeping";
+ os_thread_sleep(1000000);
+ srv_main_thread_op_info = "";
+}
+
+/*********************************************************************//**
+The master thread controlling the server.
+@return a dummy parameter */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(srv_master_thread)(
+/*==============================*/
+ void* arg __attribute__((unused)))
+ /*!< in: a dummy parameter required by
+ os_thread_create */
+{
+ srv_slot_t* slot;
+ ulint old_activity_count = srv_get_activity_count();
+ ib_time_t last_print_time;
+
+ ut_ad(!srv_read_only_mode);
+
+ srv_master_tid = os_thread_get_tid();
+
+ os_thread_set_priority(srv_master_tid, srv_sched_priority_master);
+
+#ifdef UNIV_DEBUG_THREAD_CREATION
+ fprintf(stderr, "Master thread starts, id %lu\n",
+ os_thread_pf(os_thread_get_curr_id()));
+#endif /* UNIV_DEBUG_THREAD_CREATION */
+
+#ifdef UNIV_PFS_THREAD
+ pfs_register_thread(srv_master_thread_key);
+#endif /* UNIV_PFS_THREAD */
+
+ srv_main_thread_process_no = os_proc_get_number();
+ srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
+
+ slot = srv_reserve_slot(SRV_MASTER);
+ ut_a(slot == srv_sys->sys_threads);
+
+ last_print_time = ut_time();
+loop:
+ if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) {
+ goto suspend_thread;
+ }
+
+ while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
+
+ srv_master_sleep();
+
+ MONITOR_INC(MONITOR_MASTER_THREAD_SLEEP);
+
+ srv_current_thread_priority = srv_master_thread_priority;
+
+ if (srv_check_activity(old_activity_count)) {
+ old_activity_count = srv_get_activity_count();
+ srv_master_do_active_tasks();
+ } else {
+ srv_master_do_idle_tasks();
+ }
+ }
+
+ while (srv_master_do_shutdown_tasks(&last_print_time)) {
+
+ /* Shouldn't loop here in case of very fast shutdown */
+ ut_ad(srv_fast_shutdown < 2);
+ }
+
+suspend_thread:
+ srv_main_thread_op_info = "suspending";
+
+ srv_suspend_thread(slot);
+
+ /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql()
+ waits for database activity to die down when converting < 4.1.x
+ databases, and relies on this string being exactly as it is. InnoDB
+ manual also mentions this string in several places. */
+ srv_main_thread_op_info = "waiting for server activity";
+
+ os_event_wait(slot->event);
+
+ if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) {
+ os_thread_exit(NULL);
+ }
+
+ goto loop;
+
+ OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
+}
+
+/*********************************************************************//**
+Check if purge should stop.
+@return true if it should shutdown. */
+static
+bool
+srv_purge_should_exit(
+/*==============*/
+ ulint n_purged) /*!< in: pages purged in last batch */
+{
+ switch (srv_shutdown_state) {
+ case SRV_SHUTDOWN_NONE:
+ /* Normal operation. */
+ break;
+
+ case SRV_SHUTDOWN_CLEANUP:
+ case SRV_SHUTDOWN_EXIT_THREADS:
+ /* Exit unless slow shutdown requested or all done. */
+ return(srv_fast_shutdown != 0 || n_purged == 0);
+
+ case SRV_SHUTDOWN_LAST_PHASE:
+ case SRV_SHUTDOWN_FLUSH_PHASE:
+ ut_error;
+ }
+
+ return(false);
+}
+
+/*********************************************************************//**
+Fetch and execute a task from the work queue.
+@return true if a task was executed */
+static
+bool
+srv_task_execute(void)
+/*==================*/
+{
+ que_thr_t* thr = NULL;
+
+ ut_ad(!srv_read_only_mode);
+ ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
+
+ mutex_enter(&srv_sys->tasks_mutex);
+
+ if (UT_LIST_GET_LEN(srv_sys->tasks) > 0) {
+
+ thr = UT_LIST_GET_FIRST(srv_sys->tasks);
+
+ ut_a(que_node_get_type(thr->child) == QUE_NODE_PURGE);
+
+ UT_LIST_REMOVE(queue, srv_sys->tasks, thr);
+ }
+
+ mutex_exit(&srv_sys->tasks_mutex);
+
+ if (thr != NULL) {
+
+ que_run_threads(thr);
+
+ os_atomic_inc_ulint(
+ &purge_sys->bh_mutex, &purge_sys->n_completed, 1);
+
+ srv_inc_activity_count();
+ }
+
+ return(thr != NULL);
+}
+
+static ulint purge_tid_i = 0;
+
+/*********************************************************************//**
+Worker thread that reads tasks from the work queue and executes them.
+@return a dummy parameter */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(srv_worker_thread)(
+/*==============================*/
+ void* arg __attribute__((unused))) /*!< in: a dummy parameter
+ required by os_thread_create */
+{
+ srv_slot_t* slot;
+ ulint tid_i = os_atomic_increment_ulint(&purge_tid_i, 1);
+
+ ut_ad(tid_i < srv_n_purge_threads);
+ ut_ad(!srv_read_only_mode);
+ ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
+
+ srv_purge_tids[tid_i] = os_thread_get_tid();
+ os_thread_set_priority(srv_purge_tids[tid_i],
+ srv_sched_priority_purge);
+
+#ifdef UNIV_DEBUG_THREAD_CREATION
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: worker thread starting, id %lu\n",
+ os_thread_pf(os_thread_get_curr_id()));
+#endif /* UNIV_DEBUG_THREAD_CREATION */
+
+ slot = srv_reserve_slot(SRV_WORKER);
+
+ ut_a(srv_n_purge_threads > 1);
+
+ srv_sys_mutex_enter();
+
+ ut_a(srv_sys->n_threads_active[SRV_WORKER] < srv_n_purge_threads);
+
+ srv_sys_mutex_exit();
+
+ /* We need to ensure that the worker threads exit after the
+ purge coordinator thread. Otherwise the purge coordinaor can
+ end up waiting forever in trx_purge_wait_for_workers_to_complete() */
+
+ do {
+ srv_suspend_thread(slot);
+
+ os_event_wait(slot->event);
+
+ srv_current_thread_priority = srv_purge_thread_priority;
+
+ if (srv_task_execute()) {
+
+ /* If there are tasks in the queue, wakeup
+ the purge coordinator thread. */
+
+ srv_wake_purge_thread_if_not_active();
+ }
+
+ /* Note: we are checking the state without holding the
+ purge_sys->latch here. */
+ } while (purge_sys->state != PURGE_STATE_EXIT);
+
+ srv_free_slot(slot);
+
+ rw_lock_x_lock(&purge_sys->latch);
+
+ ut_a(!purge_sys->running);
+ ut_a(purge_sys->state == PURGE_STATE_EXIT);
+ ut_a(srv_shutdown_state > SRV_SHUTDOWN_NONE);
+
+ rw_lock_x_unlock(&purge_sys->latch);
+
+#ifdef UNIV_DEBUG_THREAD_CREATION
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: Purge worker thread exiting, id %lu\n",
+ os_thread_pf(os_thread_get_curr_id()));
+#endif /* UNIV_DEBUG_THREAD_CREATION */
+
+ /* We count the number of threads in os_thread_exit(). A created
+ thread should always use that to exit and not use return() to exit. */
+ os_thread_exit(NULL);
+
+ OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
+}
+
+/*********************************************************************//**
+Do the actual purge operation.
+@return length of history list before the last purge batch. */
+static
+ulint
+srv_do_purge(
+/*=========*/
+ ulint n_threads, /*!< in: number of threads to use */
+ ulint* n_total_purged) /*!< in/out: total pages purged */
+{
+ ulint n_pages_purged;
+
+ static ulint count = 0;
+ static ulint n_use_threads = 0;
+ static ulint rseg_history_len = 0;
+ ulint old_activity_count = srv_get_activity_count();
+
+ ut_a(n_threads > 0);
+ ut_ad(!srv_read_only_mode);
+
+ /* Purge until there are no more records to purge and there is
+ no change in configuration or server state. If the user has
+ configured more than one purge thread then we treat that as a
+ pool of threads and only use the extra threads if purge can't
+ keep up with updates. */
+
+ if (n_use_threads == 0) {
+ n_use_threads = n_threads;
+ }
+
+ do {
+ srv_current_thread_priority = srv_purge_thread_priority;
+
+ if (trx_sys->rseg_history_len > rseg_history_len
+ || (srv_max_purge_lag > 0
+ && rseg_history_len > srv_max_purge_lag)) {
+
+ /* History length is now longer than what it was
+ when we took the last snapshot. Use more threads. */
+
+ if (n_use_threads < n_threads) {
+ ++n_use_threads;
+ }
+
+ } else if (srv_check_activity(old_activity_count)
+ && n_use_threads > 1) {
+
+ /* History length same or smaller since last snapshot,
+ use fewer threads. */
+
+ --n_use_threads;
+
+ old_activity_count = srv_get_activity_count();
+ }
+
+ /* Ensure that the purge threads are less than what
+ was configured. */
+
+ ut_a(n_use_threads > 0);
+ ut_a(n_use_threads <= n_threads);
+
+ /* Take a snapshot of the history list before purge. */
+ if ((rseg_history_len = trx_sys->rseg_history_len) == 0) {
+ break;
+ }
+
+ n_pages_purged = trx_purge(
+ n_use_threads, srv_purge_batch_size, false);
+
+ if (!(count++ % TRX_SYS_N_RSEGS)) {
+ /* Force a truncate of the history list. */
+ n_pages_purged += trx_purge(
+ 1, srv_purge_batch_size, true);
+ }
+
+ *n_total_purged += n_pages_purged;
+
+ } while (!srv_purge_should_exit(n_pages_purged) && n_pages_purged > 0);
+
+ return(rseg_history_len);
+}
+
+/*********************************************************************//**
+Suspend the purge coordinator thread. */
+static
+void
+srv_purge_coordinator_suspend(
+/*==========================*/
+ srv_slot_t* slot, /*!< in/out: Purge coordinator
+ thread slot */
+ ulint rseg_history_len) /*!< in: history list length
+ before last purge */
+{
+ ut_ad(!srv_read_only_mode);
+ ut_a(slot->type == SRV_PURGE);
+
+ bool stop = false;
+
+ /** Maximum wait time on the purge event, in micro-seconds. */
+ static const ulint SRV_PURGE_MAX_TIMEOUT = 10000;
+
+ ib_int64_t sig_count = srv_suspend_thread(slot);
+
+ do {
+ ulint ret;
+
+ rw_lock_x_lock(&purge_sys->latch);
+
+ purge_sys->running = false;
+
+ rw_lock_x_unlock(&purge_sys->latch);
+
+ /* We don't wait right away on the the non-timed wait because
+ we want to signal the thread that wants to suspend purge. */
+
+ if (stop) {
+ os_event_wait_low(slot->event, sig_count);
+ ret = 0;
+ } else if (rseg_history_len <= trx_sys->rseg_history_len) {
+ ret = os_event_wait_time_low(
+ slot->event, SRV_PURGE_MAX_TIMEOUT, sig_count);
+ } else {
+ /* We don't want to waste time waiting, if the
+ history list increased by the time we got here,
+ unless purge has been stopped. */
+ ret = 0;
+ }
+
+ srv_sys_mutex_enter();
+
+ /* The thread can be in state !suspended after the timeout
+ but before this check if another thread sent a wakeup signal. */
+
+ if (slot->suspended) {
+ slot->suspended = FALSE;
+ ++srv_sys->n_threads_active[slot->type];
+ ut_a(srv_sys->n_threads_active[slot->type] == 1);
+ }
+
+ srv_sys_mutex_exit();
+
+ sig_count = srv_suspend_thread(slot);
+
+ rw_lock_x_lock(&purge_sys->latch);
+
+ stop = (purge_sys->state == PURGE_STATE_STOP);
+
+ if (!stop) {
+ ut_a(purge_sys->n_stop == 0);
+ purge_sys->running = true;
+ } else {
+ ut_a(purge_sys->n_stop > 0);
+
+ /* Signal that we are suspended. */
+ os_event_set(purge_sys->event);
+ }
+
+ rw_lock_x_unlock(&purge_sys->latch);
+
+ if (ret == OS_SYNC_TIME_EXCEEDED) {
+
+ /* No new records added since wait started then simply
+ wait for new records. The magic number 5000 is an
+ approximation for the case where we have cached UNDO
+ log records which prevent truncate of the UNDO
+ segments. */
+
+ if (rseg_history_len == trx_sys->rseg_history_len
+ && trx_sys->rseg_history_len < 5000) {
+
+ stop = true;
+ }
+ }
+
+ } while (stop);
+
+ srv_sys_mutex_enter();
+
+ if (slot->suspended) {
+ slot->suspended = FALSE;
+ ++srv_sys->n_threads_active[slot->type];
+ ut_a(srv_sys->n_threads_active[slot->type] == 1);
+ }
+
+ srv_sys_mutex_exit();
+}
+
+/*********************************************************************//**
+Purge coordinator thread that schedules the purge tasks.
+@return a dummy parameter */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(srv_purge_coordinator_thread)(
+/*=========================================*/
+ void* arg __attribute__((unused))) /*!< in: a dummy parameter
+ required by os_thread_create */
+{
+ srv_slot_t* slot;
+ ulint n_total_purged = ULINT_UNDEFINED;
+
+ ut_ad(!srv_read_only_mode);
+ ut_a(srv_n_purge_threads >= 1);
+ ut_a(trx_purge_state() == PURGE_STATE_INIT);
+ ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
+
+ srv_purge_tids[0] = os_thread_get_tid();
+ os_thread_set_priority(srv_purge_tids[0], srv_sched_priority_purge);
+
+ rw_lock_x_lock(&purge_sys->latch);
+
+ purge_sys->running = true;
+ purge_sys->state = PURGE_STATE_RUN;
+
+ rw_lock_x_unlock(&purge_sys->latch);
+
+#ifdef UNIV_PFS_THREAD
+ pfs_register_thread(srv_purge_thread_key);
+#endif /* UNIV_PFS_THREAD */
+
+#ifdef UNIV_DEBUG_THREAD_CREATION
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: Purge coordinator thread created, id %lu\n",
+ os_thread_pf(os_thread_get_curr_id()));
+#endif /* UNIV_DEBUG_THREAD_CREATION */
+
+ slot = srv_reserve_slot(SRV_PURGE);
+
+ ulint rseg_history_len = trx_sys->rseg_history_len;
+
+ do {
+ /* If there are no records to purge or the last
+ purge didn't purge any records then wait for activity. */
+
+ if (purge_sys->state == PURGE_STATE_STOP
+ || n_total_purged == 0) {
+
+ srv_purge_coordinator_suspend(slot, rseg_history_len);
+ }
+
+ if (srv_purge_should_exit(n_total_purged)) {
+ ut_a(!slot->suspended);
+ break;
+ }
+
+ n_total_purged = 0;
+
+ srv_current_thread_priority = srv_purge_thread_priority;
+
+ rseg_history_len = srv_do_purge(
+ srv_n_purge_threads, &n_total_purged);
+
+ srv_inc_activity_count();
+
+ } while (!srv_purge_should_exit(n_total_purged));
+
+ /* Ensure that we don't jump out of the loop unless the
+ exit condition is satisfied. */
+
+ ut_a(srv_purge_should_exit(n_total_purged));
+
+ ulint n_pages_purged = ULINT_MAX;
+
+ /* Ensure that all records are purged if it is not a fast shutdown.
+ This covers the case where a record can be added after we exit the
+ loop above. */
+ while (srv_fast_shutdown == 0 && n_pages_purged > 0) {
+ n_pages_purged = trx_purge(1, srv_purge_batch_size, false);
+ }
+
+ /* Force a truncate of the history list. */
+ n_pages_purged = trx_purge(1, srv_purge_batch_size, true);
+ ut_a(n_pages_purged == 0 || srv_fast_shutdown != 0);
+
+ /* The task queue should always be empty, independent of fast
+ shutdown state. */
+ ut_a(srv_get_task_queue_length() == 0);
+
+ srv_free_slot(slot);
+
+ /* Note that we are shutting down. */
+ rw_lock_x_lock(&purge_sys->latch);
+
+ purge_sys->state = PURGE_STATE_EXIT;
+
+ purge_sys->running = false;
+
+ rw_lock_x_unlock(&purge_sys->latch);
+
+#ifdef UNIV_DEBUG_THREAD_CREATION
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: Purge coordinator exiting, id %lu\n",
+ os_thread_pf(os_thread_get_curr_id()));
+#endif /* UNIV_DEBUG_THREAD_CREATION */
+
+ /* Ensure that all the worker threads quit. */
+ if (srv_n_purge_threads > 1) {
+ srv_release_threads(SRV_WORKER, srv_n_purge_threads - 1);
+ }
+
+ /* We count the number of threads in os_thread_exit(). A created
+ thread should always use that to exit and not use return() to exit. */
+ os_thread_exit(NULL);
+
+ OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */
+}
+
+/**********************************************************************//**
+Enqueues a task to server task queue and releases a worker thread, if there
+is a suspended one. */
+UNIV_INTERN
+void
+srv_que_task_enqueue_low(
+/*=====================*/
+ que_thr_t* thr) /*!< in: query thread */
+{
+ ut_ad(!srv_read_only_mode);
+ mutex_enter(&srv_sys->tasks_mutex);
+
+ UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr);
+
+ mutex_exit(&srv_sys->tasks_mutex);
+
+ srv_release_threads(SRV_WORKER, 1);
+}
+
+/**********************************************************************//**
+Get count of tasks in the queue.
+@return number of tasks in queue */
+UNIV_INTERN
+ulint
+srv_get_task_queue_length(void)
+/*===========================*/
+{
+ ulint n_tasks;
+
+ ut_ad(!srv_read_only_mode);
+
+ mutex_enter(&srv_sys->tasks_mutex);
+
+ n_tasks = UT_LIST_GET_LEN(srv_sys->tasks);
+
+ mutex_exit(&srv_sys->tasks_mutex);
+
+ return(n_tasks);
+}
+
+/**********************************************************************//**
+Wakeup the purge threads. */
+UNIV_INTERN
+void
+srv_purge_wakeup(void)
+/*==================*/
+{
+ ut_ad(!srv_read_only_mode);
+
+ if (srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
+
+ srv_release_threads(SRV_PURGE, 1);
+
+ if (srv_n_purge_threads > 1) {
+ ulint n_workers = srv_n_purge_threads - 1;
+
+ srv_release_threads(SRV_WORKER, n_workers);
+ }
+ }
+}
+
diff --git a/storage/xtradb/srv/srv0start.c b/storage/xtradb/srv/srv0start.c
deleted file mode 100644
index f4567c49ca0..00000000000
--- a/storage/xtradb/srv/srv0start.c
+++ /dev/null
@@ -1,2552 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
-Copyright (c) 2008, Google Inc.
-Copyright (c) 2009, Percona Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Google, Inc. Those modifications are gratefully acknowledged and are described
-briefly in the InnoDB documentation. The contributions by Google are
-incorporated with their permission, and subject to the conditions contained in
-the file COPYING.Google.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-/********************************************************************//**
-@file srv/srv0start.c
-Starts the InnoDB database server
-
-Created 2/16/1996 Heikki Tuuri
-*************************************************************************/
-
-#include "ut0mem.h"
-#include "mem0mem.h"
-#include "data0data.h"
-#include "data0type.h"
-#include "dict0dict.h"
-#include "buf0buf.h"
-#include "os0file.h"
-#include "os0thread.h"
-#include "fil0fil.h"
-#include "fsp0fsp.h"
-#include "rem0rec.h"
-#include "mtr0mtr.h"
-#include "log0log.h"
-#include "log0online.h"
-#include "log0recv.h"
-#include "page0page.h"
-#include "page0cur.h"
-#include "trx0trx.h"
-#include "trx0sys.h"
-#include "btr0btr.h"
-#include "btr0cur.h"
-#include "rem0rec.h"
-#include "ibuf0ibuf.h"
-#include "srv0start.h"
-#include "srv0srv.h"
-#ifndef UNIV_HOTBACKUP
-# include "os0proc.h"
-# include "sync0sync.h"
-# include "buf0flu.h"
-# include "buf0rea.h"
-# include "dict0boot.h"
-# include "dict0load.h"
-# include "que0que.h"
-# include "usr0sess.h"
-# include "lock0lock.h"
-# include "trx0roll.h"
-# include "trx0purge.h"
-# include "lock0lock.h"
-# include "pars0pars.h"
-# include "btr0sea.h"
-# include "rem0cmp.h"
-# include "dict0crea.h"
-# include "row0ins.h"
-# include "row0sel.h"
-# include "row0upd.h"
-# include "row0row.h"
-# include "row0mysql.h"
-# include "btr0pcur.h"
-# include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */
-# include "zlib.h" /* for ZLIB_VERSION */
-# include "buf0lru.h" /* for buf_LRU_file_restore() */
-# include "os0stacktrace.h"
-
-/** Log sequence number immediately after startup */
-UNIV_INTERN ib_uint64_t srv_start_lsn;
-/** Log sequence number at shutdown */
-UNIV_INTERN ib_uint64_t srv_shutdown_lsn;
-
-#ifdef HAVE_DARWIN_THREADS
-# include <sys/utsname.h>
-/** TRUE if the F_FULLFSYNC option is available */
-UNIV_INTERN ibool srv_have_fullfsync = FALSE;
-#endif
-
-/** TRUE if a raw partition is in use */
-UNIV_INTERN ibool srv_start_raw_disk_in_use = FALSE;
-
-/** TRUE if the server is being started, before rolling back any
-incomplete transactions */
-UNIV_INTERN ibool srv_startup_is_before_trx_rollback_phase = FALSE;
-/** TRUE if the server is being started */
-UNIV_INTERN ibool srv_is_being_started = FALSE;
-/** TRUE if the server was successfully started */
-UNIV_INTERN ibool srv_was_started = FALSE;
-/** TRUE if innobase_start_or_create_for_mysql() has been called */
-static ibool srv_start_has_been_called = FALSE;
-
-/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
-SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
-UNIV_INTERN enum srv_shutdown_state srv_shutdown_state = SRV_SHUTDOWN_NONE;
-
-/** Files comprising the system tablespace */
-static os_file_t files[1000];
-
-/** io_handler_thread parameters for thread identification */
-static ulint n[SRV_MAX_N_IO_THREADS + 8];
-/** io_handler_thread identifiers */
-static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 8];
-
-/** We use this mutex to test the return value of pthread_mutex_trylock
- on successful locking. HP-UX does NOT return 0, though Linux et al do. */
-static os_fast_mutex_t srv_os_test_mutex;
-
-/** Name of srv_monitor_file */
-static char* srv_monitor_file_name;
-#endif /* !UNIV_HOTBACKUP */
-
-/** */
-#define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD
-#define SRV_MAX_N_PENDING_SYNC_IOS 100
-
-#ifdef UNIV_PFS_THREAD
-/* Keys to register InnoDB threads with performance schema */
-UNIV_INTERN mysql_pfs_key_t io_handler_thread_key;
-UNIV_INTERN mysql_pfs_key_t srv_lock_timeout_thread_key;
-UNIV_INTERN mysql_pfs_key_t srv_error_monitor_thread_key;
-UNIV_INTERN mysql_pfs_key_t srv_monitor_thread_key;
-UNIV_INTERN mysql_pfs_key_t srv_master_thread_key;
-UNIV_INTERN mysql_pfs_key_t srv_purge_thread_key;
-UNIV_INTERN mysql_pfs_key_t srv_log_tracking_thread_key;
-#endif /* UNIV_PFS_THREAD */
-
-/*********************************************************************//**
-Convert a numeric string that optionally ends in G or M, to a number
-containing megabytes.
-@return next character in string */
-static
-char*
-srv_parse_megabytes(
-/*================*/
- char* str, /*!< in: string containing a quantity in bytes */
- ulint* megs) /*!< out: the number in megabytes */
-{
- char* endp;
- ulint size;
-
- size = strtoul(str, &endp, 10);
-
- str = endp;
-
- switch (*str) {
- case 'G': case 'g':
- size *= 1024;
- /* fall through */
- case 'M': case 'm':
- str++;
- break;
- default:
- size /= 1024 * 1024;
- break;
- }
-
- *megs = size;
- return(str);
-}
-
-/*********************************************************************//**
-Reads the data files and their sizes from a character string given in
-the .cnf file.
-@return TRUE if ok, FALSE on parse error */
-UNIV_INTERN
-ibool
-srv_parse_data_file_paths_and_sizes(
-/*================================*/
- char* str) /*!< in/out: the data file path string */
-{
- char* input_str;
- char* path;
- ulint size;
- ulint i = 0;
-
- srv_auto_extend_last_data_file = FALSE;
- srv_last_file_size_max = 0;
- srv_data_file_names = NULL;
- srv_data_file_sizes = NULL;
- srv_data_file_is_raw_partition = NULL;
-
- input_str = str;
-
- /* First calculate the number of data files and check syntax:
- path:size[M | G];path:size[M | G]... . Note that a Windows path may
- contain a drive name and a ':'. */
-
- while (*str != '\0') {
- path = str;
-
- while ((*str != ':' && *str != '\0')
- || (*str == ':'
- && (*(str + 1) == '\\' || *(str + 1) == '/'
- || *(str + 1) == ':'))) {
- str++;
- }
-
- if (*str == '\0') {
- return(FALSE);
- }
-
- str++;
-
- str = srv_parse_megabytes(str, &size);
-
- if (0 == strncmp(str, ":autoextend",
- (sizeof ":autoextend") - 1)) {
-
- str += (sizeof ":autoextend") - 1;
-
- if (0 == strncmp(str, ":max:",
- (sizeof ":max:") - 1)) {
-
- str += (sizeof ":max:") - 1;
-
- str = srv_parse_megabytes(str, &size);
- }
-
- if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- if (strlen(str) >= 6
- && *str == 'n'
- && *(str + 1) == 'e'
- && *(str + 2) == 'w') {
- str += 3;
- }
-
- if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
- str += 3;
- }
-
- if (size == 0) {
- return(FALSE);
- }
-
- i++;
-
- if (*str == ';') {
- str++;
- } else if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- if (i == 0) {
- /* If innodb_data_file_path was defined it must contain
- at least one data file definition */
-
- return(FALSE);
- }
-
- srv_data_file_names = malloc(i * sizeof *srv_data_file_names);
- srv_data_file_sizes = malloc(i * sizeof *srv_data_file_sizes);
- srv_data_file_is_raw_partition = malloc(
- i * sizeof *srv_data_file_is_raw_partition);
-
- srv_n_data_files = i;
-
- /* Then store the actual values to our arrays */
-
- str = input_str;
- i = 0;
-
- while (*str != '\0') {
- path = str;
-
- /* Note that we must step over the ':' in a Windows path;
- a Windows path normally looks like C:\ibdata\ibdata1:1G, but
- a Windows raw partition may have a specification like
- \\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
-
- while ((*str != ':' && *str != '\0')
- || (*str == ':'
- && (*(str + 1) == '\\' || *(str + 1) == '/'
- || *(str + 1) == ':'))) {
- str++;
- }
-
- if (*str == ':') {
- /* Make path a null-terminated string */
- *str = '\0';
- str++;
- }
-
- str = srv_parse_megabytes(str, &size);
-
- srv_data_file_names[i] = path;
- srv_data_file_sizes[i] = size;
-
- if (0 == strncmp(str, ":autoextend",
- (sizeof ":autoextend") - 1)) {
-
- srv_auto_extend_last_data_file = TRUE;
-
- str += (sizeof ":autoextend") - 1;
-
- if (0 == strncmp(str, ":max:",
- (sizeof ":max:") - 1)) {
-
- str += (sizeof ":max:") - 1;
-
- str = srv_parse_megabytes(
- str, &srv_last_file_size_max);
- }
-
- if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- (srv_data_file_is_raw_partition)[i] = 0;
-
- if (strlen(str) >= 6
- && *str == 'n'
- && *(str + 1) == 'e'
- && *(str + 2) == 'w') {
- str += 3;
- (srv_data_file_is_raw_partition)[i] = SRV_NEW_RAW;
- }
-
- if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
- str += 3;
-
- if ((srv_data_file_is_raw_partition)[i] == 0) {
- (srv_data_file_is_raw_partition)[i] = SRV_OLD_RAW;
- }
- }
-
- i++;
-
- if (*str == ';') {
- str++;
- }
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Reads log group home directories from a character string given in
-the .cnf file.
-@return TRUE if ok, FALSE on parse error */
-UNIV_INTERN
-ibool
-srv_parse_log_group_home_dirs(
-/*==========================*/
- char* str) /*!< in/out: character string */
-{
- char* input_str;
- char* path;
- ulint i = 0;
-
- srv_log_group_home_dirs = NULL;
-
- input_str = str;
-
- /* First calculate the number of directories and check syntax:
- path;path;... */
-
- while (*str != '\0') {
- path = str;
-
- while (*str != ';' && *str != '\0') {
- str++;
- }
-
- i++;
-
- if (*str == ';') {
- str++;
- } else if (*str != '\0') {
-
- return(FALSE);
- }
- }
-
- if (i != 1) {
- /* If innodb_log_group_home_dir was defined it must
- contain exactly one path definition under current MySQL */
-
- return(FALSE);
- }
-
- srv_log_group_home_dirs = malloc(i * sizeof *srv_log_group_home_dirs);
-
- /* Then store the actual values to our array */
-
- str = input_str;
- i = 0;
-
- while (*str != '\0') {
- path = str;
-
- while (*str != ';' && *str != '\0') {
- str++;
- }
-
- if (*str == ';') {
- *str = '\0';
- str++;
- }
-
- srv_log_group_home_dirs[i] = path;
-
- i++;
- }
-
- return(TRUE);
-}
-
-/*********************************************************************//**
-Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
-and srv_parse_log_group_home_dirs(). */
-UNIV_INTERN
-void
-srv_free_paths_and_sizes(void)
-/*==========================*/
-{
- free(srv_data_file_names);
- srv_data_file_names = NULL;
- free(srv_data_file_sizes);
- srv_data_file_sizes = NULL;
- free(srv_data_file_is_raw_partition);
- srv_data_file_is_raw_partition = NULL;
- free(srv_log_group_home_dirs);
- srv_log_group_home_dirs = NULL;
-}
-
-#ifndef UNIV_HOTBACKUP
-/********************************************************************//**
-I/o-handler thread function.
-@return OS_THREAD_DUMMY_RETURN */
-static
-os_thread_ret_t
-io_handler_thread(
-/*==============*/
- void* arg) /*!< in: pointer to the number of the segment in
- the aio array */
-{
- ulint segment;
-
- segment = *((ulint*)arg);
-
-#ifdef UNIV_DEBUG_THREAD_CREATION
- fprintf(stderr, "Io handler thread %lu starts, id %lu\n", segment,
- os_thread_pf(os_thread_get_curr_id()));
-#endif
-
-#ifdef UNIV_PFS_THREAD
- pfs_register_thread(io_handler_thread_key);
-#endif /* UNIV_PFS_THREAD */
-
- while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
- fil_aio_wait(segment);
- }
-
- /* We count the number of threads in os_thread_exit(). A created
- thread should always use that to exit and not use return() to exit.
- The thread actually never comes here because it is exited in an
- os_event_wait(). */
-
- os_thread_exit(NULL);
-
- OS_THREAD_DUMMY_RETURN;
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
-Normalizes a directory path for Windows: converts slashes to backslashes. */
-UNIV_INTERN
-void
-srv_normalize_path_for_win(
-/*=======================*/
- char* str __attribute__((unused))) /*!< in/out: null-terminated
- character string */
-{
-#ifdef __WIN__
- for (; *str; str++) {
-
- if (*str == '/') {
- *str = '\\';
- }
- }
-#endif
-}
-
-#ifndef UNIV_HOTBACKUP
-/*********************************************************************//**
-Calculates the low 32 bits when a file size which is given as a number
-database pages is converted to the number of bytes.
-@return low 32 bytes of file size when expressed in bytes */
-static
-ulint
-srv_calc_low32(
-/*===========*/
- ulint file_size) /*!< in: file size in database pages */
-{
- return(0xFFFFFFFFUL & (file_size << UNIV_PAGE_SIZE_SHIFT));
-}
-
-/*********************************************************************//**
-Calculates the high 32 bits when a file size which is given as a number
-database pages is converted to the number of bytes.
-@return high 32 bytes of file size when expressed in bytes */
-static
-ulint
-srv_calc_high32(
-/*============*/
- ulint file_size) /*!< in: file size in database pages */
-{
- return(file_size >> (32 - UNIV_PAGE_SIZE_SHIFT));
-}
-
-/*********************************************************************//**
-Creates or opens the log files and closes them.
-@return DB_SUCCESS or error code */
-static
-ulint
-open_or_create_log_file(
-/*====================*/
- ibool create_new_db, /*!< in: TRUE if we should create a
- new database */
- ibool* log_file_created, /*!< out: TRUE if new log file
- created */
- ibool log_file_has_been_opened,/*!< in: TRUE if a log file has been
- opened before: then it is an error
- to try to create another log file */
- ulint k, /*!< in: log group number */
- ulint i) /*!< in: log file number in group */
-{
- ibool ret;
- ulint size;
- ulint size_high;
- char name[10000];
- ulint dirnamelen;
-
- UT_NOT_USED(create_new_db);
-
- *log_file_created = FALSE;
-
- srv_normalize_path_for_win(srv_log_group_home_dirs[k]);
-
- dirnamelen = strlen(srv_log_group_home_dirs[k]);
- ut_a(dirnamelen < (sizeof name) - 10 - sizeof "ib_logfile");
- memcpy(name, srv_log_group_home_dirs[k], dirnamelen);
-
- /* Add a path separator if needed. */
- if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
- name[dirnamelen++] = SRV_PATH_SEPARATOR;
- }
-
- sprintf(name + dirnamelen, "%s%lu", "ib_logfile", (ulong) i);
-
- files[i] = os_file_create(innodb_file_log_key, name,
- OS_FILE_CREATE, OS_FILE_NORMAL,
- OS_LOG_FILE, &ret);
- if (ret == FALSE) {
- if (os_file_get_last_error(FALSE) != OS_FILE_ALREADY_EXISTS
-#ifdef UNIV_AIX
- /* AIX 5.1 after security patch ML7 may have errno set
- to 0 here, which causes our function to return 100;
- work around that AIX problem */
- && os_file_get_last_error(FALSE) != 100
-#endif
- ) {
- fprintf(stderr,
- "InnoDB: Error in creating"
- " or opening %s\n", name);
-
- return(DB_ERROR);
- }
-
- files[i] = os_file_create(innodb_file_log_key, name,
- OS_FILE_OPEN, OS_FILE_AIO,
- OS_LOG_FILE, &ret);
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in opening %s\n", name);
-
- return(DB_ERROR);
- }
-
- ret = os_file_get_size(files[i], &size, &size_high);
- ut_a(ret);
-
- if (size != srv_calc_low32(srv_log_file_size)
- || size_high != srv_calc_high32(srv_log_file_size)) {
-
- fprintf(stderr,
- "InnoDB: Warning: log file %s is"
- " of different size %lu %lu bytes\n"
- "InnoDB: than specified in the .cnf"
- " file %lu %lu bytes!\n",
- name, (ulong) size_high, (ulong) size,
- (ulong) srv_calc_high32(srv_log_file_size),
- (ulong) srv_calc_low32(srv_log_file_size));
-
- srv_log_file_size= ((size +
- (((longlong) size_high) << 32)) /
- UNIV_PAGE_SIZE);
- }
- } else {
- *log_file_created = TRUE;
-
- ut_print_timestamp(stderr);
-
- fprintf(stderr,
- " InnoDB: Log file %s did not exist:"
- " new to be created\n",
- name);
- if (log_file_has_been_opened) {
-
- return(DB_ERROR);
- }
-
- fprintf(stderr, "InnoDB: Setting log file %s size to %lu MB\n",
- name, (ulong) srv_log_file_size
- >> (20 - UNIV_PAGE_SIZE_SHIFT));
-
- fprintf(stderr,
- "InnoDB: Database physically writes the file"
- " full: wait...\n");
-
- ret = os_file_set_size(name, files[i],
- srv_calc_low32(srv_log_file_size),
- srv_calc_high32(srv_log_file_size));
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in creating %s:"
- " probably out of disk space\n",
- name);
-
- return(DB_ERROR);
- }
- }
-
- ret = os_file_close(files[i]);
- ut_a(ret);
-
- if (i == 0) {
- /* Create in memory the file space object
- which is for this log group */
-
- fil_space_create(name,
- 2 * k + SRV_LOG_SPACE_FIRST_ID, 0, FIL_LOG);
- }
-
- ut_a(fil_validate());
-
- fil_node_create(name, srv_log_file_size,
- 2 * k + SRV_LOG_SPACE_FIRST_ID, FALSE);
-#ifdef UNIV_LOG_ARCHIVE
- /* If this is the first log group, create the file space object
- for archived logs.
- Under MySQL, no archiving ever done. */
-
- if (k == 0 && i == 0) {
- arch_space_id = 2 * k + 1 + SRV_LOG_SPACE_FIRST_ID;
-
- fil_space_create("arch_log_space", arch_space_id, 0, FIL_LOG);
- } else {
- arch_space_id = ULINT_UNDEFINED;
- }
-#endif /* UNIV_LOG_ARCHIVE */
- if (i == 0) {
- log_group_init(k, srv_n_log_files,
- srv_log_file_size * UNIV_PAGE_SIZE,
- 2 * k + SRV_LOG_SPACE_FIRST_ID,
- SRV_LOG_SPACE_FIRST_ID + 1); /* dummy arch
- space id */
- }
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Creates or opens database data files and closes them.
-@return DB_SUCCESS or error code */
-static
-ulint
-open_or_create_data_files(
-/*======================*/
- ibool* create_new_db, /*!< out: TRUE if new database should be
- created */
- ibool* create_new_doublewrite_file,
-#ifdef UNIV_LOG_ARCHIVE
- ulint* min_arch_log_no,/*!< out: min of archived log
- numbers in data files */
- ulint* max_arch_log_no,/*!< out: max of archived log
- numbers in data files */
-#endif /* UNIV_LOG_ARCHIVE */
- ib_uint64_t* min_flushed_lsn,/*!< out: min of flushed lsn
- values in data files */
- ib_uint64_t* max_flushed_lsn,/*!< out: max of flushed lsn
- values in data files */
- ulint* sum_of_new_sizes)/*!< out: sum of sizes of the
- new files added */
-{
- ibool ret;
- ulint i;
- ibool one_opened = FALSE;
- ibool one_created = FALSE;
- ulint size;
- ulint size_high;
- ulint flags;
- ulint rounded_size_pages;
- char name[10000];
-
- if (srv_n_data_files >= 1000) {
- fprintf(stderr, "InnoDB: can only have < 1000 data files\n"
- "InnoDB: you have defined %lu\n",
- (ulong) srv_n_data_files);
- return(DB_ERROR);
- }
-
- *sum_of_new_sizes = 0;
-
- *create_new_db = FALSE;
- *create_new_doublewrite_file = FALSE;
-
- srv_normalize_path_for_win(srv_data_home);
-
- for (i = 0; i < srv_n_data_files; i++) {
- ulint dirnamelen;
-
- srv_normalize_path_for_win(srv_data_file_names[i]);
- dirnamelen = strlen(srv_data_home);
-
- ut_a(dirnamelen + strlen(srv_data_file_names[i])
- < (sizeof name) - 1);
- memcpy(name, srv_data_home, dirnamelen);
- /* Add a path separator if needed. */
- if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
- name[dirnamelen++] = SRV_PATH_SEPARATOR;
- }
-
- strcpy(name + dirnamelen, srv_data_file_names[i]);
-
- if (srv_data_file_is_raw_partition[i] == 0) {
-
- /* First we try to create the file: if it already
- exists, ret will get value FALSE */
-
- files[i] = os_file_create(innodb_file_data_key,
- name, OS_FILE_CREATE,
- OS_FILE_NORMAL,
- OS_DATA_FILE, &ret);
-
- if (ret == FALSE && os_file_get_last_error(FALSE)
- != OS_FILE_ALREADY_EXISTS
-#ifdef UNIV_AIX
- /* AIX 5.1 after security patch ML7 may have
- errno set to 0 here, which causes our function
- to return 100; work around that AIX problem */
- && os_file_get_last_error(FALSE) != 100
-#endif
- ) {
- fprintf(stderr,
- "InnoDB: Error in creating"
- " or opening %s\n",
- name);
-
- return(DB_ERROR);
- }
- } else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
- /* The partition is opened, not created; then it is
- written over */
-
- srv_start_raw_disk_in_use = TRUE;
- srv_created_new_raw = TRUE;
-
- files[i] = os_file_create(innodb_file_data_key,
- name, OS_FILE_OPEN_RAW,
- OS_FILE_NORMAL,
- OS_DATA_FILE, &ret);
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in opening %s\n", name);
-
- return(DB_ERROR);
- }
- } else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
- srv_start_raw_disk_in_use = TRUE;
-
- ret = FALSE;
- } else {
- ut_a(0);
- }
-
- if (ret == FALSE) {
- const char* check_msg;
- /* We open the data file */
-
- if (one_created) {
- fprintf(stderr,
- "InnoDB: Error: data files can only"
- " be added at the end\n");
- fprintf(stderr,
- "InnoDB: of a tablespace, but"
- " data file %s existed beforehand.\n",
- name);
- return(DB_ERROR);
- }
-
- if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
- files[i] = os_file_create(
- innodb_file_data_key,
- name, OS_FILE_OPEN_RAW,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret);
- } else if (i == 0) {
- files[i] = os_file_create(
- innodb_file_data_key,
- name, OS_FILE_OPEN_RETRY,
- OS_FILE_NORMAL, OS_DATA_FILE, &ret);
- } else {
- files[i] = os_file_create(
- innodb_file_data_key,
- name, OS_FILE_OPEN, OS_FILE_NORMAL,
- OS_DATA_FILE, &ret);
- }
-
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in opening %s\n", name);
- os_file_get_last_error(TRUE);
-
- return(DB_ERROR);
- }
-
- if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
-
- goto skip_size_check;
- }
-
- ret = os_file_get_size(files[i], &size, &size_high);
- ut_a(ret);
- /* Round size downward to megabytes */
-
- rounded_size_pages
- = (size / (1024 * 1024) + 4096 * size_high)
- << (20 - UNIV_PAGE_SIZE_SHIFT);
-
- if (i == srv_n_data_files - 1
- && srv_auto_extend_last_data_file) {
-
- if (srv_data_file_sizes[i] > rounded_size_pages
- || (srv_last_file_size_max > 0
- && srv_last_file_size_max
- < rounded_size_pages)) {
-
- fprintf(stderr,
- "InnoDB: Error: auto-extending"
- " data file %s is"
- " of a different size\n"
- "InnoDB: %lu pages (rounded"
- " down to MB) than specified"
- " in the .cnf file:\n"
- "InnoDB: initial %lu pages,"
- " max %lu (relevant if"
- " non-zero) pages!\n",
- name,
- (ulong) rounded_size_pages,
- (ulong) srv_data_file_sizes[i],
- (ulong)
- srv_last_file_size_max);
-
- return(DB_ERROR);
- }
-
- srv_data_file_sizes[i] = rounded_size_pages;
- }
-
- if (rounded_size_pages != srv_data_file_sizes[i]) {
-
- fprintf(stderr,
- "InnoDB: Error: data file %s"
- " is of a different size\n"
- "InnoDB: %lu pages"
- " (rounded down to MB)\n"
- "InnoDB: than specified"
- " in the .cnf file %lu pages!\n",
- name,
- (ulong) rounded_size_pages,
- (ulong) srv_data_file_sizes[i]);
-
- return(DB_ERROR);
- }
-skip_size_check:
- check_msg = fil_read_first_page(
- files[i], one_opened, &flags,
-#ifdef UNIV_LOG_ARCHIVE
- min_arch_log_no, max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- min_flushed_lsn, max_flushed_lsn);
-
- if (check_msg) {
- fprintf(stderr,
- "InnoDB: Error: %s in data file %s\n",
- check_msg, name);
- return(DB_ERROR);
- }
-
- if (!one_opened
- && UNIV_PAGE_SIZE
- != fsp_flags_get_page_size(flags)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: data file %s"
- " uses page size %lu,\n",
- name,
- fsp_flags_get_page_size(flags));
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: but the only supported"
- " page size in this release is=%lu\n",
- (ulong) UNIV_PAGE_SIZE);
-
- return(DB_ERROR);
- }
-
- one_opened = TRUE;
- } else {
- /* We created the data file and now write it full of
- zeros */
-
- one_created = TRUE;
-
- if (i > 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Data file %s did not"
- " exist: new to be created\n",
- name);
- } else {
- fprintf(stderr,
- "InnoDB: The first specified"
- " data file %s did not exist:\n"
- "InnoDB: a new database"
- " to be created!\n", name);
- *create_new_db = TRUE;
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Setting file %s size to %lu MB\n",
- name,
- (ulong) (srv_data_file_sizes[i]
- >> (20 - UNIV_PAGE_SIZE_SHIFT)));
-
- fprintf(stderr,
- "InnoDB: Database physically writes the"
- " file full: wait...\n");
-
- ret = os_file_set_size(
- name, files[i],
- srv_calc_low32(srv_data_file_sizes[i]),
- srv_calc_high32(srv_data_file_sizes[i]));
-
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in creating %s:"
- " probably out of disk space\n", name);
-
- return(DB_ERROR);
- }
-
- *sum_of_new_sizes = *sum_of_new_sizes
- + srv_data_file_sizes[i];
- }
-
- ret = os_file_close(files[i]);
- ut_a(ret);
-
- if (i == 0) {
- fil_space_create(name, 0, 0, FIL_TABLESPACE);
- }
-
- ut_a(fil_validate());
-
- fil_node_create(name, srv_data_file_sizes[i], 0,
- srv_data_file_is_raw_partition[i] != 0);
- }
-
- /* special file for doublewrite buffer */
- if (srv_doublewrite_file)
- {
- srv_normalize_path_for_win(srv_doublewrite_file);
-
- fprintf(stderr,
- "InnoDB: Note: The innodb_doublewrite_file option has been specified.\n"
- "InnoDB: This option is for experts only. Don't use it unless you understand WELL what it is.\n"
- "InnoDB: ### Don't specify a file older than the last checkpoint. ###\n"
- "InnoDB: Otherwise, the older doublewrite buffer will break your data during recovery!\n");
-
- strcpy(name, srv_doublewrite_file);
-
- /* First we try to create the file: if it already
- exists, ret will get value FALSE */
-
- files[i] = os_file_create(innodb_file_data_key, name, OS_FILE_CREATE,
- OS_FILE_NORMAL,
- OS_DATA_FILE, &ret);
-
- if (ret == FALSE && os_file_get_last_error(FALSE)
- != OS_FILE_ALREADY_EXISTS
-#ifdef UNIV_AIX
- /* AIX 5.1 after security patch ML7 may have
- errno set to 0 here, which causes our function
- to return 100; work around that AIX problem */
- && os_file_get_last_error(FALSE) != 100
-#endif
- ) {
- fprintf(stderr,
- "InnoDB: Error in creating"
- " or opening %s\n",
- name);
-
- return(DB_ERROR);
- }
-
- if (ret == FALSE) {
-
- const char* check_msg;
-
- /* We open the data file */
-
- files[i] = os_file_create(innodb_file_data_key,
- name, OS_FILE_OPEN, OS_FILE_NORMAL,
- OS_DATA_FILE, &ret);
-
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in opening %s\n", name);
- os_file_get_last_error(TRUE);
-
- return(DB_ERROR);
- }
-
- ret = os_file_get_size(files[i], &size, &size_high);
- ut_a(ret);
- /* Round size downward to megabytes */
-
- rounded_size_pages
- = (size / (1024 * 1024) + 4096 * size_high)
- << (20 - UNIV_PAGE_SIZE_SHIFT);
-
- if (rounded_size_pages != TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9) {
-
- fprintf(stderr,
- "InnoDB: Warning: doublewrite buffer file %s"
- " is of a different size\n"
- "InnoDB: %lu pages"
- " (rounded down to MB)\n"
- "InnoDB: than intended size"
- " %lu pages...\n",
- name,
- (ulong) rounded_size_pages,
- (ulong) TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9);
- }
-
- check_msg = fil_read_first_page(
- files[i], one_opened, &flags,
-#ifdef UNIV_LOG_ARCHIVE
- min_arch_log_no, max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- min_flushed_lsn, max_flushed_lsn);
-
- if (check_msg) {
- fprintf(stderr,
- "InnoDB: Error: %s in doublewrite "
- "buffer file %s\n", check_msg, name);
- return(DB_ERROR);
- }
-
- one_opened = TRUE;
- } else {
- /* We created the data file and now write it full of
- zeros */
-
- *create_new_doublewrite_file = TRUE;
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Doublewrite buffer file %s did not"
- " exist. It will be be created.\n",
- name);
-
- if (*create_new_db == FALSE) {
- fprintf(stderr,
- "InnoDB: Notice: Previous version's ibdata files may cause crash.\n"
- " If you use that, please use the ibdata files of this version.\n");
- }
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Setting file %s size to %lu MB\n",
- name,
- (ulong) ((TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9)
- >> (20 - UNIV_PAGE_SIZE_SHIFT)));
-
- fprintf(stderr,
- "InnoDB: Database physically writes the"
- " file full: wait...\n");
-
- ret = os_file_set_size(
- name, files[i],
- srv_calc_low32(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9),
- srv_calc_high32(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9));
-
- if (!ret) {
- fprintf(stderr,
- "InnoDB: Error in creating %s:"
- " probably out of disk space\n", name);
-
- return(DB_ERROR);
- }
- }
-
- ret = os_file_close(files[i]);
- ut_a(ret);
-
- fil_space_create(name, TRX_DOUBLEWRITE_SPACE, 0, FIL_TABLESPACE);
-
- ut_a(fil_validate());
-
- fil_node_create(name, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, TRX_DOUBLEWRITE_SPACE, FALSE);
-
- i++;
- }
-
- return(DB_SUCCESS);
-}
-
-/*********************************************************************//**
-Initializes the log tracking subsystem and starts its thread. */
-static
-void
-init_log_online(void)
-/*=================*/
-{
- if (UNIV_UNLIKELY(srv_force_recovery > 0)) {
- srv_track_changed_pages = FALSE;
- return;
- }
-
- if (srv_track_changed_pages) {
-
- log_online_read_init();
-
- /* Create the thread that follows the redo log to output the
- changed page bitmap */
- os_thread_create(&srv_redo_log_follow_thread, NULL,
- thread_ids + 5 + SRV_MAX_N_IO_THREADS);
- }
-}
-
-/********************************************************************
-Starts InnoDB and creates a new database if database files
-are not found and the user wants.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-int
-innobase_start_or_create_for_mysql(void)
-/*====================================*/
-{
- ibool create_new_db;
- ibool create_new_doublewrite_file;
- ibool log_file_created;
- ibool log_created = FALSE;
- ibool log_opened = FALSE;
- ib_uint64_t min_flushed_lsn;
- ib_uint64_t max_flushed_lsn;
-#ifdef UNIV_LOG_ARCHIVE
- ulint min_arch_log_no;
- ulint max_arch_log_no;
-#endif /* UNIV_LOG_ARCHIVE */
- ulint sum_of_new_sizes;
- ulint sum_of_data_file_sizes;
- ulint tablespace_size_in_header;
- ulint err;
- ulint i;
- ulint io_limit;
- my_bool srv_file_per_table_original_value
- = srv_file_per_table;
- mtr_t mtr;
-#ifdef HAVE_DARWIN_THREADS
-# ifdef F_FULLFSYNC
- /* This executable has been compiled on Mac OS X 10.3 or later.
- Assume that F_FULLFSYNC is available at run-time. */
- srv_have_fullfsync = TRUE;
-# else /* F_FULLFSYNC */
- /* This executable has been compiled on Mac OS X 10.2
- or earlier. Determine if the executable is running
- on Mac OS X 10.3 or later. */
- struct utsname utsname;
- if (uname(&utsname)) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: cannot determine Mac OS X version!\n", stderr);
- } else {
- srv_have_fullfsync = strcmp(utsname.release, "7.") >= 0;
- }
- if (!srv_have_fullfsync) {
- ut_print_timestamp(stderr);
- fputs(" InnoDB: On Mac OS X, fsync() may be "
- "broken on internal drives,\n", stderr);
- ut_print_timestamp(stderr);
- fputs(" InnoDB: making transactions unsafe!\n", stderr);
- }
-# endif /* F_FULLFSYNC */
-#endif /* HAVE_DARWIN_THREADS */
-
- if (sizeof(ulint) != sizeof(void*)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: size of InnoDB's ulint is %lu, "
- "but size of void*\n", (ulong) sizeof(ulint));
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: is %lu. The sizes should be the same "
- "so that on a 64-bit\n",
- (ulong) sizeof(void*));
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: platforms you can allocate more than 4 GB "
- "of memory.\n");
- }
-
- /* If stacktrace is used we set up signal handler for SIGUSR2 signal
- here. If signal handler set fails we report that and disable
- stacktrace feature. */
-
- if (srv_use_stacktrace) {
-#ifdef __linux__
- struct sigaction sigact;
-
- sigact.sa_sigaction = os_stacktrace_print;
- sigact.sa_flags = SA_RESTART | SA_SIGINFO;
-
- if (sigaction(SIGUSR2, &sigact, (struct sigaction *)NULL) != 0)
- {
- fprintf(stderr, " InnoDB:error setting signal handler for %d (%s)\n",
- SIGUSR2, strsignal(SIGUSR2));
- srv_use_stacktrace = FALSE;
-
- }
-#endif /* __linux__ */
- }
-
-
- /* System tables are created in tablespace 0. Thus, we must
- temporarily clear srv_file_per_table. This is ok, because the
- server will not accept connections (which could modify
- innodb_file_per_table) until this function has returned. */
- srv_file_per_table = FALSE;
-#ifdef UNIV_DEBUG
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!\n");
-#endif
-
-#ifdef UNIV_IBUF_DEBUG
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!\n");
-# ifdef UNIV_IBUF_COUNT_DEBUG
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: !!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on "
- "!!!!!!!!!\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG\n");
-# endif
-#endif
-
-#ifdef UNIV_BLOB_DEBUG
- fprintf(stderr,
- "InnoDB: !!!!!!!! UNIV_BLOB_DEBUG switched on !!!!!!!!!\n"
- "InnoDB: Server restart may fail with UNIV_BLOB_DEBUG\n");
-#endif /* UNIV_BLOB_DEBUG */
-
-#ifdef UNIV_SYNC_DEBUG
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!\n");
-#endif
-
-#ifdef UNIV_SEARCH_DEBUG
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!\n");
-#endif
-
-#ifdef UNIV_LOG_LSN_DEBUG
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: !!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!\n");
-#endif /* UNIV_LOG_LSN_DEBUG */
-#ifdef UNIV_MEM_DEBUG
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!\n");
-#endif
-
- if (UNIV_LIKELY(srv_use_sys_malloc)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: The InnoDB memory heap is disabled\n");
- }
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: " IB_ATOMICS_STARTUP_MSG "\n", stderr);
-
- ut_print_timestamp(stderr);
- fputs(" InnoDB: Compressed tables use zlib " ZLIB_VERSION
-#ifdef UNIV_ZIP_DEBUG
- " with validation"
-#endif /* UNIV_ZIP_DEBUG */
- "\n" , stderr);
-#ifdef UNIV_ZIP_COPY
- ut_print_timestamp(stderr);
- fputs(" InnoDB: and extra copying\n", stderr);
-#endif /* UNIV_ZIP_COPY */
-
- /* Since InnoDB does not currently clean up all its internal data
- structures in MySQL Embedded Server Library server_end(), we
- print an error message if someone tries to start up InnoDB a
- second time during the process lifetime. */
-
- if (srv_start_has_been_called) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: startup called second time "
- "during the process\n");
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: lifetime. In the MySQL Embedded "
- "Server Library you\n");
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: cannot call server_init() more "
- "than once during the\n");
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: process lifetime.\n");
- }
-
- srv_start_has_been_called = TRUE;
-
-#ifdef UNIV_DEBUG
- log_do_write = TRUE;
-#endif /* UNIV_DEBUG */
- /* yydebug = TRUE; */
-
- srv_is_being_started = TRUE;
- srv_startup_is_before_trx_rollback_phase = TRUE;
-
-#ifdef __WIN__
- switch (os_get_os_version()) {
- case OS_WIN95:
- case OS_WIN31:
- case OS_WINNT:
- srv_use_native_conditions = FALSE;
- /* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
- and NT use simulated aio. In NT Windows provides async i/o,
- but when run in conjunction with InnoDB Hot Backup, it seemed
- to corrupt the data files. */
-
- srv_use_native_aio = FALSE;
- break;
-
- case OS_WIN2000:
- case OS_WINXP:
- /* On 2000 and XP, async IO is available, but no condition variables. */
- srv_use_native_aio = TRUE;
- srv_use_native_conditions = FALSE;
- break;
-
- default:
- /* Vista and later have both async IO and condition variables */
- srv_use_native_aio = TRUE;
- srv_use_native_conditions = TRUE;
- break;
- }
-
-#elif defined(LINUX_NATIVE_AIO)
-
- if (srv_use_native_aio) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Using Linux native AIO\n");
- }
-#else
- /* Currently native AIO is supported only on windows and linux
- and that also when the support is compiled in. In all other
- cases, we ignore the setting of innodb_use_native_aio. */
- srv_use_native_aio = FALSE;
-
-#endif
-
- if (srv_file_flush_method_str == NULL) {
- /* These are the default options */
-
- srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-
- srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
- srv_unix_file_flush_method = SRV_UNIX_FSYNC;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
- srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
- srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
- srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
- srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
- srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
-#ifdef _WIN32
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
- srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
- srv_use_native_aio = FALSE;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
- srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
- srv_use_native_aio = FALSE;
-
- } else if (0 == ut_strcmp(srv_file_flush_method_str,
- "async_unbuffered")) {
- srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
- srv_use_native_aio = TRUE;
-#endif
- } else {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Unrecognized value %s for"
- " innodb_flush_method\n",
- srv_file_flush_method_str);
- return(DB_ERROR);
- }
-
- /* Note that the call srv_boot() also changes the values of
- some variables to the units used by InnoDB internally */
-
- /* Set the maximum number of threads which can wait for a semaphore
- inside InnoDB: this is the 'sync wait array' size, as well as the
- maximum number of threads that can wait in the 'srv_conc array' for
- their time to enter InnoDB. */
-
- if (srv_buf_pool_size >= 1000 * 1024 * 1024) {
- /* If buffer pool is less than 1000 MB,
- assume fewer threads. Also use only one
- buffer pool instance */
- srv_max_n_threads = 50000;
-
- } else if (srv_buf_pool_size >= 8 * 1024 * 1024) {
-
- srv_buf_pool_instances = 1;
- srv_max_n_threads = 10000;
- } else {
- srv_buf_pool_instances = 1;
- srv_max_n_threads = 1000; /* saves several MB of memory,
- especially in 64-bit
- computers */
- }
-
- err = srv_boot();
-
- if (err != DB_SUCCESS) {
-
- return((int) err);
- }
-
- mutex_create(srv_monitor_file_mutex_key,
- &srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
-
- if (srv_innodb_status) {
- srv_monitor_file_name = mem_alloc(
- strlen(fil_path_to_mysql_datadir)
- + 20 + sizeof "/innodb_status.");
- sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
- fil_path_to_mysql_datadir, os_proc_get_number());
- srv_monitor_file = fopen(srv_monitor_file_name, "w+");
- if (!srv_monitor_file) {
- fprintf(stderr, "InnoDB: unable to create %s: %s\n",
- srv_monitor_file_name, strerror(errno));
- return(DB_ERROR);
- }
- } else {
- srv_monitor_file_name = NULL;
- srv_monitor_file = os_file_create_tmpfile();
- if (!srv_monitor_file) {
- return(DB_ERROR);
- }
- }
-
- mutex_create(srv_dict_tmpfile_mutex_key,
- &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
-
- srv_dict_tmpfile = os_file_create_tmpfile();
- if (!srv_dict_tmpfile) {
- return(DB_ERROR);
- }
-
- mutex_create(srv_misc_tmpfile_mutex_key,
- &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
-
- srv_misc_tmpfile = os_file_create_tmpfile();
- if (!srv_misc_tmpfile) {
- return(DB_ERROR);
- }
-
- /* If user has set the value of innodb_file_io_threads then
- we'll emit a message telling the user that this parameter
- is now deprecated. */
- if (srv_n_file_io_threads != 4) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Warning:"
- " innodb_file_io_threads is deprecated."
- " Please use innodb_read_io_threads and"
- " innodb_write_io_threads instead\n");
- }
-
- /* Now overwrite the value on srv_n_file_io_threads */
- srv_n_file_io_threads = 2 + srv_n_read_io_threads
- + srv_n_write_io_threads;
-
- ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
-
- io_limit = 8 * SRV_N_PENDING_IOS_PER_THREAD;
-
- /* On Windows when using native aio the number of aio requests
- that a thread can handle at a given time is limited to 32
- i.e.: SRV_N_PENDING_IOS_PER_THREAD */
-# ifdef __WIN__
- if (srv_use_native_aio) {
- io_limit = SRV_N_PENDING_IOS_PER_THREAD;
- }
-# endif /* __WIN__ */
-
- if (!os_aio_init(io_limit,
- srv_n_read_io_threads,
- srv_n_write_io_threads,
- SRV_MAX_N_PENDING_SYNC_IOS)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Fatal error: cannot initialize AIO"
- " sub-system\n");
-#if defined(LINUX_NATIVE_AIO)
- fprintf(stderr, "You can try increasing system fs.aio-max-nr to 1048576 or larger or setting innodb_use_native_aio = 0 in my.cnf\n");
-#endif
- return(DB_ERROR);
- }
-
- fil_init(srv_file_per_table ? 50000 : 5000,
- srv_max_n_open_files);
-
- /* Print time to initialize the buffer pool */
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Initializing buffer pool, size =");
-
- if (srv_buf_pool_size >= 1024 * 1024 * 1024) {
- fprintf(stderr,
- " %.1fG\n",
- ((double) srv_buf_pool_size) / (1024 * 1024 * 1024));
- } else {
- fprintf(stderr,
- " %.1fM\n",
- ((double) srv_buf_pool_size) / (1024 * 1024));
- }
-
- err = buf_pool_init(srv_buf_pool_size, (ibool) srv_buf_pool_populate,
- srv_buf_pool_instances);
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Completed initialization of buffer pool\n");
-
- if (err != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Fatal error: cannot allocate memory"
- " for the buffer pool\n");
-
- return(DB_ERROR);
- }
-
-#ifdef UNIV_DEBUG
- /* We have observed deadlocks with a 5MB buffer pool but
- the actual lower limit could very well be a little higher. */
-
- if (srv_buf_pool_size <= 5 * 1024 * 1024) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Warning: Small buffer pool size "
- "(%luM), the flst_validate() debug function "
- "can cause a deadlock if the buffer pool fills up.\n",
- srv_buf_pool_size / 1024 / 1024);
- }
-#endif
-
- fsp_init();
- log_init();
-
- lock_sys_create(srv_lock_table_size);
-
- /* Create i/o-handler threads: */
-
- for (i = 0; i < srv_n_file_io_threads; i++) {
- n[i] = i;
-
- os_thread_create(io_handler_thread, n + i, thread_ids + i);
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- if (0 != ut_strcmp(srv_log_group_home_dirs[0], srv_arch_dir)) {
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: Error: you must set the log group home dir in my.cnf\n");
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: the same as log arch dir.\n");
-
- return(DB_ERROR);
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- if (sizeof(ulint) == 4
- && srv_n_log_files * srv_log_file_size
- >= ((ulint)1 << (32 - UNIV_PAGE_SIZE_SHIFT))) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: combined size of log files"
- " must be < 4 GB on 32-bit systems\n");
-
- return(DB_ERROR);
- }
-
- sum_of_new_sizes = 0;
-
- for (i = 0; i < srv_n_data_files; i++) {
-#ifndef __WIN__
- if (sizeof(off_t) < 5 && srv_data_file_sizes[i] >= ((ulint)1 << (32 - UNIV_PAGE_SIZE_SHIFT))) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: file size must be < 4 GB"
- " with this MySQL binary\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: and operating system combination,"
- " in some OS's < 2 GB\n");
-
- return(DB_ERROR);
- }
-#endif
- sum_of_new_sizes += srv_data_file_sizes[i];
- }
-
- if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: tablespace size must be"
- " at least 10 MB\n");
-
- return(DB_ERROR);
- }
-
- err = open_or_create_data_files(&create_new_db,
- &create_new_doublewrite_file,
-#ifdef UNIV_LOG_ARCHIVE
- &min_arch_log_no, &max_arch_log_no,
-#endif /* UNIV_LOG_ARCHIVE */
- &min_flushed_lsn, &max_flushed_lsn,
- &sum_of_new_sizes);
- if (err != DB_SUCCESS) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Could not open or create data files.\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: If you tried to add new data files,"
- " and it failed here,\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: you should now edit innodb_data_file_path"
- " in my.cnf back\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: to what it was, and remove the"
- " new ibdata files InnoDB created\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: in this failed attempt. InnoDB only wrote"
- " those files full of\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: zeros, but did not yet use them in any way."
- " But be careful: do not\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: remove old data files"
- " which contain your precious data!\n");
-
- return((int) err);
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- srv_normalize_path_for_win(srv_arch_dir);
- srv_arch_dir = srv_add_path_separator_if_needed(srv_arch_dir);
-#endif /* UNIV_LOG_ARCHIVE */
-
- for (i = 0; i < srv_n_log_files; i++) {
- err = open_or_create_log_file(create_new_db, &log_file_created,
- log_opened, 0, i);
- if (err != DB_SUCCESS) {
-
- return((int) err);
- }
-
- if (log_file_created) {
- log_created = TRUE;
- } else {
- log_opened = TRUE;
- }
- if ((log_opened && create_new_db)
- || (log_opened && log_created)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: all log files must be"
- " created at the same time.\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: All log files must be"
- " created also in database creation.\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: If you want bigger or smaller"
- " log files, shut down the\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: database and make sure there"
- " were no errors in shutdown.\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Then delete the existing log files."
- " Edit the .cnf file\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: and start the database again.\n");
-
- return(DB_ERROR);
- }
- }
-
- /* Open all log files and data files in the system tablespace: we
- keep them open until database shutdown */
-
- fil_open_log_and_system_tablespace_files();
-
- if (log_created && !create_new_db
-#ifdef UNIV_LOG_ARCHIVE
- && !srv_archive_recovery
-#endif /* UNIV_LOG_ARCHIVE */
- ) {
- if (max_flushed_lsn != min_flushed_lsn
-#ifdef UNIV_LOG_ARCHIVE
- || max_arch_log_no != min_arch_log_no
-#endif /* UNIV_LOG_ARCHIVE */
- ) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Cannot initialize created"
- " log files because\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: data files were not in sync"
- " with each other\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: or the data files are corrupt.\n");
-
- return(DB_ERROR);
- }
-
- if (max_flushed_lsn < (ib_uint64_t) 1000) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Cannot initialize created"
- " log files because\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: data files are corrupt,"
- " or new data files were\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: created when the database"
- " was started previous\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: time but the database"
- " was not shut down\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: normally after that.\n");
-
- return(DB_ERROR);
- }
-
- mutex_enter(&(log_sys->mutex));
-
-#ifdef UNIV_LOG_ARCHIVE
- /* Do not + 1 arch_log_no because we do not use log
- archiving */
- recv_reset_logs(max_flushed_lsn, max_arch_log_no, TRUE);
-#else
- recv_reset_logs(max_flushed_lsn, TRUE);
-#endif /* UNIV_LOG_ARCHIVE */
-
- mutex_exit(&(log_sys->mutex));
- }
-
- trx_sys_file_format_init();
-
- if (create_new_db) {
- init_log_online();
-
- mtr_start(&mtr);
-
- fsp_header_init(0, sum_of_new_sizes, &mtr);
-
- mtr_commit(&mtr);
-
- /* To maintain backward compatibility we create only
- the first rollback segment before the double write buffer.
- All the remaining rollback segments will be created later,
- after the double write buffer has been created. */
- trx_sys_create();
-
- if (create_new_doublewrite_file) {
- mtr_start(&mtr);
- fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr);
- mtr_commit(&mtr);
-
- trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE);
- }
-
- dict_create();
-
- srv_startup_is_before_trx_rollback_phase = FALSE;
-
-#ifdef UNIV_LOG_ARCHIVE
- } else if (srv_archive_recovery) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Starting archive"
- " recovery from a backup...\n");
- err = recv_recovery_from_archive_start(
- min_flushed_lsn, srv_archive_recovery_limit_lsn,
- min_arch_log_no);
- if (err != DB_SUCCESS) {
-
- return(DB_ERROR);
- }
- /* Since ibuf init is in dict_boot, and ibuf is needed
- in any disk i/o, first call dict_boot */
-
- dict_boot();
-
- trx_sys_init_at_db_start();
-
- srv_startup_is_before_trx_rollback_phase = FALSE;
-
- /* Initialize the fsp free limit global variable in the log
- system */
- fsp_header_get_free_limit();
-
- recv_recovery_from_archive_finish();
-#endif /* UNIV_LOG_ARCHIVE */
- } else {
- char* save_srv_doublewrite_file = NULL;
-
- if (create_new_doublewrite_file) {
- /* doublewrite_file cannot be used for recovery yet. */
- save_srv_doublewrite_file = srv_doublewrite_file;
- srv_doublewrite_file = NULL;
- }
-
- /* Check if we support the max format that is stamped
- on the system tablespace.
- Note: We are NOT allowed to make any modifications to
- the TRX_SYS_PAGE_NO page before recovery because this
- page also contains the max_trx_id etc. important system
- variables that are required for recovery. We need to
- ensure that we return the system to a state where normal
- recovery is guaranteed to work. We do this by
- invalidating the buffer cache, this will force the
- reread of the page and restoration to its last known
- consistent state, this is REQUIRED for the recovery
- process to work. */
- err = trx_sys_file_format_max_check(
- srv_max_file_format_at_startup);
-
- if (err != DB_SUCCESS) {
- return(err);
- }
-
- /* Invalidate the buffer pool to ensure that we reread
- the page that we read above, during recovery.
- Note that this is not as heavy weight as it seems. At
- this point there will be only ONE page in the buf_LRU
- and there must be no page in the buf_flush list. */
- buf_pool_invalidate();
-
- /* We always try to do a recovery, even if the database had
- been shut down normally: this is the normal startup path */
-
- err = recv_recovery_from_checkpoint_start(LOG_CHECKPOINT,
- IB_ULONGLONG_MAX,
- min_flushed_lsn,
- max_flushed_lsn);
- if (err != DB_SUCCESS) {
-
- return(DB_ERROR);
- }
-
- init_log_online();
-
- /* Since the insert buffer init is in dict_boot, and the
- insert buffer is needed in any disk i/o, first we call
- dict_boot(). Note that trx_sys_init_at_db_start() only needs
- to access space 0, and the insert buffer at this stage already
- works for space 0. */
-
- dict_boot();
- trx_sys_init_at_db_start();
-
- /* Initialize the fsp free limit global variable in the log
- system */
- fsp_header_get_free_limit();
-
- /* recv_recovery_from_checkpoint_finish needs trx lists which
- are initialized in trx_sys_init_at_db_start(). */
-
- recv_recovery_from_checkpoint_finish();
- if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
- /* The following call is necessary for the insert
- buffer to work with multiple tablespaces. We must
- know the mapping between space id's and .ibd file
- names.
-
- In a crash recovery, we check that the info in data
- dictionary is consistent with what we already know
- about space id's from the call of
- fil_load_single_table_tablespaces().
-
- In a normal startup, we create the space objects for
- every table in the InnoDB data dictionary that has
- an .ibd file.
-
- We also determine the maximum tablespace id used. */
-
- dict_check_tablespaces_and_store_max_id(
- recv_needed_recovery);
- }
-
- srv_startup_is_before_trx_rollback_phase = FALSE;
- recv_recovery_rollback_active();
-
- /* It is possible that file_format tag has never
- been set. In this case we initialize it to minimum
- value. Important to note that we can do it ONLY after
- we have finished the recovery process so that the
- image of TRX_SYS_PAGE_NO is not stale. */
- trx_sys_file_format_tag_init();
-
- if (create_new_doublewrite_file) {
- /* restore the value */
- srv_doublewrite_file = save_srv_doublewrite_file;
-
- mtr_start(&mtr);
- fsp_header_init(TRX_DOUBLEWRITE_SPACE, TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 9, &mtr);
- mtr_commit(&mtr);
-
- trx_sys_dummy_create(TRX_DOUBLEWRITE_SPACE);
- }
-
- if (UNIV_UNLIKELY(!dict_verify_xtradb_sys_stats())) {
- fprintf(stderr, "InnoDB: Warning: "
- "SYS_STATS table corrupted, recreating\n");
- dict_recreate_xtradb_sys_stats();
- }
- }
-
- if (!create_new_db && sum_of_new_sizes > 0) {
- /* New data file(s) were added */
- mtr_start(&mtr);
-
- fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
-
- mtr_commit(&mtr);
-
- /* Immediately write the log record about increased tablespace
- size to disk, so that it is durable even if mysqld would crash
- quickly */
-
- log_buffer_flush_to_disk();
- }
-
-#ifdef UNIV_LOG_ARCHIVE
- /* Archiving is always off under MySQL */
- if (!srv_log_archive_on) {
- ut_a(DB_SUCCESS == log_archive_noarchivelog());
- } else {
- mutex_enter(&(log_sys->mutex));
-
- start_archive = FALSE;
-
- if (log_sys->archiving_state == LOG_ARCH_OFF) {
- start_archive = TRUE;
- }
-
- mutex_exit(&(log_sys->mutex));
-
- if (start_archive) {
- ut_a(DB_SUCCESS == log_archive_archivelog());
- }
- }
-#endif /* UNIV_LOG_ARCHIVE */
-
- /* fprintf(stderr, "Max allowed record size %lu\n",
- page_get_free_space_of_empty() / 2); */
-
- if (trx_doublewrite == NULL) {
- /* Create the doublewrite buffer to a new tablespace */
-
- trx_sys_create_doublewrite_buf();
- }
-
- /* Here the double write buffer has already been created and so
- any new rollback segments will be allocated after the double
- write buffer. The default segment should already exist.
- We create the new segments only if it's a new database or
- the database was shutdown cleanly. */
-
- /* Note: When creating the extra rollback segments during an upgrade
- we violate the latching order, even if the change buffer is empty.
- We make an exception in sync0sync.c and check srv_is_being_started
- for that violation. It cannot create a deadlock because we are still
- running in single threaded mode essentially. Only the IO threads
- should be running at this stage. */
-
- trx_sys_create_rsegs(TRX_SYS_N_RSEGS - 1);
-
- /* Create the thread which watches the timeouts for lock waits */
- os_thread_create(&srv_lock_timeout_thread, NULL,
- thread_ids + 2 + SRV_MAX_N_IO_THREADS);
-
- /* Create the thread which warns of long semaphore waits */
- os_thread_create(&srv_error_monitor_thread, NULL,
- thread_ids + 3 + SRV_MAX_N_IO_THREADS);
-
- /* Create the thread which prints InnoDB monitor info */
- os_thread_create(&srv_monitor_thread, NULL,
- thread_ids + 4 + SRV_MAX_N_IO_THREADS);
-
- /* Create the thread which automaticaly dumps/restore buffer pool */
- os_thread_create(&srv_LRU_dump_restore_thread, NULL,
- thread_ids + 5 + SRV_MAX_N_IO_THREADS);
-
- /* If srv_blocking_lru_restore is TRUE, load buffer pool contents
- synchronously */
- if (srv_auto_lru_dump && srv_blocking_lru_restore)
- buf_LRU_file_restore();
-
- srv_is_being_started = FALSE;
-
- err = dict_create_or_check_foreign_constraint_tables();
-
- if (err != DB_SUCCESS) {
- return((int)DB_ERROR);
- }
-
- /* Create the master thread which does purge and other utility
- operations */
-
- os_thread_create(&srv_master_thread, NULL, thread_ids
- + (1 + SRV_MAX_N_IO_THREADS));
-
- /* Currently we allow only a single purge thread. */
- ut_a(srv_n_purge_threads == 0 || srv_n_purge_threads == 1);
-
- /* If the user has requested a separate purge thread then
- start the purge thread. */
- if (srv_n_purge_threads == 1) {
- os_thread_create(&srv_purge_thread, NULL, NULL);
- }
-
- /* Wait for the purge and master thread to startup. */
-
- while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
- if (srv_thread_has_reserved_slot(SRV_MASTER) == ULINT_UNDEFINED
- || (srv_n_purge_threads == 1
- && srv_thread_has_reserved_slot(SRV_WORKER)
- == ULINT_UNDEFINED)) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr, " InnoDB: "
- "Waiting for the background threads to "
- "start\n");
- os_thread_sleep(1000000);
- } else {
- break;
- }
- }
-
-#ifdef UNIV_DEBUG
- /* buf_debug_prints = TRUE; */
-#endif /* UNIV_DEBUG */
- sum_of_data_file_sizes = 0;
-
- for (i = 0; i < srv_n_data_files; i++) {
- sum_of_data_file_sizes += srv_data_file_sizes[i];
- }
-
- tablespace_size_in_header = fsp_header_get_tablespace_size();
-
- if (!srv_auto_extend_last_data_file
- && sum_of_data_file_sizes != tablespace_size_in_header) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: tablespace size"
- " stored in header is %lu pages, but\n",
- (ulong) tablespace_size_in_header);
- ut_print_timestamp(stderr);
- fprintf(stderr,
- "InnoDB: the sum of data file sizes is %lu pages\n",
- (ulong) sum_of_data_file_sizes);
-
- if (srv_force_recovery == 0
- && sum_of_data_file_sizes < tablespace_size_in_header) {
- /* This is a fatal error, the tail of a tablespace is
- missing */
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Cannot start InnoDB."
- " The tail of the system tablespace is\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: missing. Have you edited"
- " innodb_data_file_path in my.cnf in an\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: inappropriate way, removing"
- " ibdata files from there?\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: You can set innodb_force_recovery=1"
- " in my.cnf to force\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: a startup if you are trying"
- " to recover a badly corrupt database.\n");
-
- return(DB_ERROR);
- }
- }
-
- if (srv_auto_extend_last_data_file
- && sum_of_data_file_sizes < tablespace_size_in_header) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: tablespace size stored in header"
- " is %lu pages, but\n",
- (ulong) tablespace_size_in_header);
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: the sum of data file sizes"
- " is only %lu pages\n",
- (ulong) sum_of_data_file_sizes);
-
- if (srv_force_recovery == 0) {
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Cannot start InnoDB. The tail of"
- " the system tablespace is\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: missing. Have you edited"
- " innodb_data_file_path in my.cnf in an\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: inappropriate way, removing"
- " ibdata files from there?\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: You can set innodb_force_recovery=1"
- " in my.cnf to force\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: a startup if you are trying to"
- " recover a badly corrupt database.\n");
-
- return(DB_ERROR);
- }
- }
-
- /* Check that os_fast_mutexes work as expected */
- os_fast_mutex_init(&srv_os_test_mutex);
-
- if (0 != os_fast_mutex_trylock(&srv_os_test_mutex)) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Error: pthread_mutex_trylock returns"
- " an unexpected value on\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: success! Cannot continue.\n");
- exit(1);
- }
-
- os_fast_mutex_unlock(&srv_os_test_mutex);
-
- os_fast_mutex_lock(&srv_os_test_mutex);
-
- os_fast_mutex_unlock(&srv_os_test_mutex);
-
- os_fast_mutex_free(&srv_os_test_mutex);
-
- if (!srv_file_per_table_original_value
- && srv_pass_corrupt_table) {
- fprintf(stderr, "InnoDB: Warning:"
- " The option innodb_file_per_table is disabled,"
- " so using the option innodb_pass_corrupt_table doesn't make sense.\n");
- }
-
- if (srv_print_verbose_log) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " Percona XtraDB (http://www.percona.com) %s started; "
- "log sequence number %llu\n",
- INNODB_VERSION_STR, srv_start_lsn);
- }
-
- if (srv_force_recovery > 0) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: !!! innodb_force_recovery"
- " is set to %lu !!!\n",
- (ulong) srv_force_recovery);
- }
-
- fflush(stderr);
-
- if (trx_doublewrite_must_reset_space_ids) {
- /* Actually, we did not change the undo log format between
- 4.0 and 4.1.1, and we would not need to run purge to
- completion. Note also that the purge algorithm in 4.1.1
- can process the history list again even after a full
- purge, because our algorithm does not cut the end of the
- history list in all cases so that it would become empty
- after a full purge. That mean that we may purge 4.0 type
- undo log even after this phase.
-
- The insert buffer record format changed between 4.0 and
- 4.1.1. It is essential that the insert buffer is emptied
- here! */
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: You are upgrading to an"
- " InnoDB version which allows multiple\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: tablespaces. Wait that purge"
- " and insert buffer merge run to\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: completion...\n");
- for (;;) {
- os_thread_sleep(1000000);
-
- if (0 == strcmp(srv_main_thread_op_info,
- "waiting for server activity")) {
-
- ut_a(ibuf_is_empty());
-
- break;
- }
- }
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Full purge and insert buffer merge"
- " completed.\n");
-
- trx_sys_mark_upgraded_to_multiple_tablespaces();
-
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: You have now successfully upgraded"
- " to the multiple tablespaces\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: format. You should NOT DOWNGRADE"
- " to an earlier version of\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: InnoDB! But if you absolutely need to"
- " downgrade, see\n");
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: " REFMAN "multiple-tablespaces.html\n"
- " InnoDB: for instructions.\n");
- }
-
- if (srv_force_recovery == 0) {
- /* In the insert buffer we may have even bigger tablespace
- id's, because we may have dropped those tablespaces, but
- insert buffer merge has not had time to clean the records from
- the ibuf tree. */
-
- ibuf_update_max_tablespace_id();
- }
-
- srv_file_per_table = srv_file_per_table_original_value;
-
- srv_was_started = TRUE;
-
- return((int) DB_SUCCESS);
-}
-
-/****************************************************************//**
-Shuts down the InnoDB database.
-@return DB_SUCCESS or error code */
-UNIV_INTERN
-int
-innobase_shutdown_for_mysql(void)
-/*=============================*/
-{
- ulint i;
- if (!srv_was_started) {
- if (srv_is_being_started) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Warning: shutting down"
- " a not properly started\n"
- "InnoDB: or created database!\n");
- }
-
- return(DB_SUCCESS);
- }
-
- /* 1. Flush the buffer pool to disk, write the current lsn to
- the tablespace header(s), and copy all log data to archive.
- The step 1 is the real InnoDB shutdown. The remaining steps 2 - ...
- just free data structures after the shutdown. */
-
- logs_empty_and_mark_files_at_shutdown();
-
- if (srv_conc_n_threads != 0) {
- fprintf(stderr,
- "InnoDB: Warning: query counter shows %ld queries"
- " still\n"
- "InnoDB: inside InnoDB at shutdown\n",
- srv_conc_n_threads);
- }
-
- /* 2. Make all threads created by InnoDB to exit */
-
- srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
-
- /* All threads end up waiting for certain events. Put those events
- to the signaled state. Then the threads will exit themselves after
- os_event_wait(). */
-
- for (i = 0; i < 1000; i++) {
- /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
- HERE OR EARLIER */
-
- /* a. Let the lock timeout thread exit */
- os_event_set(srv_lock_timeout_thread_event);
-
- /* b. srv error monitor thread exits automatically, no need
- to do anything here */
-
- /* c. We wake the master thread so that it exits */
- srv_wake_master_thread();
-
- /* d. We wake the purge thread so that it exits */
- srv_wake_purge_thread();
-
- /* e. Exit the i/o threads */
-
- os_aio_wake_all_threads_at_shutdown();
-
- os_mutex_enter(os_sync_mutex);
-
- if (os_thread_count == 0) {
- /* All the threads have exited or are just exiting;
- NOTE that the threads may not have completed their
- exit yet. Should we use pthread_join() to make sure
- they have exited? If we did, we would have to
- remove the pthread_detach() from
- os_thread_exit(). Now we just sleep 0.1
- seconds and hope that is enough! */
-
- os_mutex_exit(os_sync_mutex);
-
- os_thread_sleep(100000);
-
- break;
- }
-
- os_mutex_exit(os_sync_mutex);
-
- os_thread_sleep(100000);
- }
-
- if (i == 1000) {
- fprintf(stderr,
- "InnoDB: Warning: %lu threads created by InnoDB"
- " had not exited at shutdown!\n",
- (ulong) os_thread_count);
- }
-
- if (srv_monitor_file) {
- fclose(srv_monitor_file);
- srv_monitor_file = 0;
- if (srv_monitor_file_name) {
- unlink(srv_monitor_file_name);
- mem_free(srv_monitor_file_name);
- }
- }
- if (srv_dict_tmpfile) {
- fclose(srv_dict_tmpfile);
- srv_dict_tmpfile = 0;
- }
-
- if (srv_misc_tmpfile) {
- fclose(srv_misc_tmpfile);
- srv_misc_tmpfile = 0;
- }
-
- /* This must be disabled before closing the buffer pool
- and closing the data dictionary. */
- btr_search_disable();
-
- ibuf_close();
- log_shutdown();
- lock_sys_close();
- trx_sys_file_format_close();
- trx_sys_close();
-
- mutex_free(&srv_monitor_file_mutex);
- mutex_free(&srv_dict_tmpfile_mutex);
- mutex_free(&srv_misc_tmpfile_mutex);
- dict_close();
- btr_search_sys_free();
-
- /* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
- them */
- os_aio_free();
- sync_close();
- srv_free();
- fil_close();
-
- /* 4. Free the os_conc_mutex and all os_events and os_mutexes */
-
- os_sync_free();
-
- /* 5. Free all allocated memory */
-
- pars_lexer_close();
- log_mem_free();
- buf_pool_free(srv_buf_pool_instances);
- mem_close();
-
- /* ut_free_all_mem() frees all allocated memory not freed yet
- in shutdown, and it will also free the ut_list_mutex, so it
- should be the last one for all operation */
- ut_free_all_mem();
-
- if (os_thread_count != 0
- || os_event_count != 0
- || os_mutex_count != 0
- || os_fast_mutex_count != 0) {
- fprintf(stderr,
- "InnoDB: Warning: some resources were not"
- " cleaned up in shutdown:\n"
- "InnoDB: threads %lu, events %lu,"
- " os_mutexes %lu, os_fast_mutexes %lu\n",
- (ulong) os_thread_count, (ulong) os_event_count,
- (ulong) os_mutex_count, (ulong) os_fast_mutex_count);
- }
-
- if (dict_foreign_err_file) {
- fclose(dict_foreign_err_file);
- }
- if (lock_latest_err_file) {
- fclose(lock_latest_err_file);
- }
-
- if (srv_print_verbose_log) {
- ut_print_timestamp(stderr);
- fprintf(stderr,
- " InnoDB: Shutdown completed;"
- " log sequence number %llu\n",
- srv_shutdown_lsn);
- }
-
- srv_was_started = FALSE;
- srv_start_has_been_called = FALSE;
-
- return((int) DB_SUCCESS);
-}
-#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc
new file mode 100644
index 00000000000..3ddfd9ab3a4
--- /dev/null
+++ b/storage/xtradb/srv/srv0start.cc
@@ -0,0 +1,3234 @@
+/*****************************************************************************
+
+Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
+Copyright (c) 2008, Google Inc.
+Copyright (c) 2009, Percona Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/********************************************************************//**
+@file srv/srv0start.cc
+Starts the InnoDB database server
+
+Created 2/16/1996 Heikki Tuuri
+*************************************************************************/
+
+#include "ut0mem.h"
+#include "mem0mem.h"
+#include "data0data.h"
+#include "data0type.h"
+#include "dict0dict.h"
+#include "buf0buf.h"
+#include "buf0dump.h"
+#include "os0file.h"
+#include "os0thread.h"
+#include "fil0fil.h"
+#include "fsp0fsp.h"
+#include "rem0rec.h"
+#include "mtr0mtr.h"
+#include "log0log.h"
+#include "log0online.h"
+#include "log0recv.h"
+#include "page0page.h"
+#include "page0cur.h"
+#include "trx0trx.h"
+#include "trx0sys.h"
+#include "btr0btr.h"
+#include "btr0cur.h"
+#include "rem0rec.h"
+#include "ibuf0ibuf.h"
+#include "srv0start.h"
+#include "srv0srv.h"
+#ifndef UNIV_HOTBACKUP
+# include "trx0rseg.h"
+# include "os0proc.h"
+# include "sync0sync.h"
+# include "buf0flu.h"
+# include "buf0rea.h"
+# include "dict0boot.h"
+# include "dict0load.h"
+# include "dict0stats_bg.h"
+# include "que0que.h"
+# include "usr0sess.h"
+# include "lock0lock.h"
+# include "trx0roll.h"
+# include "trx0purge.h"
+# include "lock0lock.h"
+# include "pars0pars.h"
+# include "btr0sea.h"
+# include "rem0cmp.h"
+# include "dict0crea.h"
+# include "row0ins.h"
+# include "row0sel.h"
+# include "row0upd.h"
+# include "row0row.h"
+# include "row0mysql.h"
+# include "btr0pcur.h"
+# include "os0sync.h"
+# include "zlib.h"
+# include "ut0crc32.h"
+# include "os0stacktrace.h"
+
+/** Log sequence number immediately after startup */
+UNIV_INTERN lsn_t srv_start_lsn;
+/** Log sequence number at shutdown */
+UNIV_INTERN lsn_t srv_shutdown_lsn;
+
+#ifdef HAVE_DARWIN_THREADS
+# include <sys/utsname.h>
+/** TRUE if the F_FULLFSYNC option is available */
+UNIV_INTERN ibool srv_have_fullfsync = FALSE;
+#endif
+
+/** TRUE if a raw partition is in use */
+UNIV_INTERN ibool srv_start_raw_disk_in_use = FALSE;
+
+/** TRUE if the server is being started, before rolling back any
+incomplete transactions */
+UNIV_INTERN ibool srv_startup_is_before_trx_rollback_phase = FALSE;
+/** TRUE if the server is being started */
+UNIV_INTERN ibool srv_is_being_started = FALSE;
+/** TRUE if the server was successfully started */
+UNIV_INTERN ibool srv_was_started = FALSE;
+/** TRUE if innobase_start_or_create_for_mysql() has been called */
+static ibool srv_start_has_been_called = FALSE;
+
+/** At a shutdown this value climbs from SRV_SHUTDOWN_NONE to
+SRV_SHUTDOWN_CLEANUP and then to SRV_SHUTDOWN_LAST_PHASE, and so on */
+UNIV_INTERN enum srv_shutdown_state srv_shutdown_state = SRV_SHUTDOWN_NONE;
+
+/** Files comprising the system tablespace */
+static os_file_t files[1000];
+
+/** io_handler_thread parameters for thread identification */
+static ulint n[SRV_MAX_N_IO_THREADS + 6];
+/** io_handler_thread identifiers, 32 is the maximum number of purge threads */
+static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 6
+ + SRV_MAX_N_PURGE_THREADS];
+
+/** We use this mutex to test the return value of pthread_mutex_trylock
+ on successful locking. HP-UX does NOT return 0, though Linux et al do. */
+static os_fast_mutex_t srv_os_test_mutex;
+
+/** Name of srv_monitor_file */
+static char* srv_monitor_file_name;
+#endif /* !UNIV_HOTBACKUP */
+
+/** Default undo tablespace size in UNIV_PAGEs count (10MB). */
+static const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES =
+ ((1024 * 1024) * 10) / UNIV_PAGE_SIZE_DEF;
+
+/** */
+#define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD
+#define SRV_MAX_N_PENDING_SYNC_IOS 100
+
+#ifdef UNIV_PFS_THREAD
+/* Keys to register InnoDB threads with performance schema */
+UNIV_INTERN mysql_pfs_key_t io_handler_thread_key;
+UNIV_INTERN mysql_pfs_key_t srv_lock_timeout_thread_key;
+UNIV_INTERN mysql_pfs_key_t srv_error_monitor_thread_key;
+UNIV_INTERN mysql_pfs_key_t srv_monitor_thread_key;
+UNIV_INTERN mysql_pfs_key_t srv_master_thread_key;
+UNIV_INTERN mysql_pfs_key_t srv_purge_thread_key;
+UNIV_INTERN mysql_pfs_key_t srv_log_tracking_thread_key;
+#endif /* UNIV_PFS_THREAD */
+
+/*********************************************************************//**
+Convert a numeric string that optionally ends in G or M, to a number
+containing megabytes.
+@return next character in string */
+static
+char*
+srv_parse_megabytes(
+/*================*/
+ char* str, /*!< in: string containing a quantity in bytes */
+ ulint* megs) /*!< out: the number in megabytes */
+{
+ char* endp;
+ ulint size;
+
+ size = strtoul(str, &endp, 10);
+
+ str = endp;
+
+ switch (*str) {
+ case 'G': case 'g':
+ size *= 1024;
+ /* fall through */
+ case 'M': case 'm':
+ str++;
+ break;
+ default:
+ size /= 1024 * 1024;
+ break;
+ }
+
+ *megs = size;
+ return(str);
+}
+
+/*********************************************************************//**
+Check if a file can be opened in read-write mode.
+@return true if it doesn't exist or can be opened in rw mode. */
+static
+bool
+srv_file_check_mode(
+/*================*/
+ const char* name) /*!< in: filename to check */
+{
+ os_file_stat_t stat;
+
+ memset(&stat, 0x0, sizeof(stat));
+
+ dberr_t err = os_file_get_status(name, &stat, true);
+
+ if (err == DB_FAIL) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "os_file_get_status() failed on '%s'. Can't determine "
+ "file permissions", name);
+
+ return(false);
+
+ } else if (err == DB_SUCCESS) {
+
+ /* Note: stat.rw_perm is only valid of files */
+
+ if (stat.type == OS_FILE_TYPE_FILE) {
+ if (!stat.rw_perm) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "%s can't be opened in %s mode",
+ name,
+ srv_read_only_mode
+ ? "read" : "read-write");
+
+ return(false);
+ }
+ } else {
+ /* Not a regular file, bail out. */
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "'%s' not a regular file.", name);
+
+ return(false);
+ }
+ } else {
+
+ /* This is OK. If the file create fails on RO media, there
+ is nothing we can do. */
+
+ ut_a(err == DB_NOT_FOUND);
+ }
+
+ return(true);
+}
+
+/*********************************************************************//**
+Reads the data files and their sizes from a character string given in
+the .cnf file.
+@return TRUE if ok, FALSE on parse error */
+UNIV_INTERN
+ibool
+srv_parse_data_file_paths_and_sizes(
+/*================================*/
+ char* str) /*!< in/out: the data file path string */
+{
+ char* input_str;
+ char* path;
+ ulint size;
+ ulint i = 0;
+
+ srv_auto_extend_last_data_file = FALSE;
+ srv_last_file_size_max = 0;
+ srv_data_file_names = NULL;
+ srv_data_file_sizes = NULL;
+ srv_data_file_is_raw_partition = NULL;
+
+ input_str = str;
+
+ /* First calculate the number of data files and check syntax:
+ path:size[M | G];path:size[M | G]... . Note that a Windows path may
+ contain a drive name and a ':'. */
+
+ while (*str != '\0') {
+ path = str;
+
+ while ((*str != ':' && *str != '\0')
+ || (*str == ':'
+ && (*(str + 1) == '\\' || *(str + 1) == '/'
+ || *(str + 1) == ':'))) {
+ str++;
+ }
+
+ if (*str == '\0') {
+ return(FALSE);
+ }
+
+ str++;
+
+ str = srv_parse_megabytes(str, &size);
+
+ if (0 == strncmp(str, ":autoextend",
+ (sizeof ":autoextend") - 1)) {
+
+ str += (sizeof ":autoextend") - 1;
+
+ if (0 == strncmp(str, ":max:",
+ (sizeof ":max:") - 1)) {
+
+ str += (sizeof ":max:") - 1;
+
+ str = srv_parse_megabytes(str, &size);
+ }
+
+ if (*str != '\0') {
+
+ return(FALSE);
+ }
+ }
+
+ if (strlen(str) >= 6
+ && *str == 'n'
+ && *(str + 1) == 'e'
+ && *(str + 2) == 'w') {
+ str += 3;
+ }
+
+ if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
+ str += 3;
+ }
+
+ if (size == 0) {
+ return(FALSE);
+ }
+
+ i++;
+
+ if (*str == ';') {
+ str++;
+ } else if (*str != '\0') {
+
+ return(FALSE);
+ }
+ }
+
+ if (i == 0) {
+ /* If innodb_data_file_path was defined it must contain
+ at least one data file definition */
+
+ return(FALSE);
+ }
+
+ srv_data_file_names = static_cast<char**>(
+ malloc(i * sizeof *srv_data_file_names));
+
+ srv_data_file_sizes = static_cast<ulint*>(
+ malloc(i * sizeof *srv_data_file_sizes));
+
+ srv_data_file_is_raw_partition = static_cast<ulint*>(
+ malloc(i * sizeof *srv_data_file_is_raw_partition));
+
+ srv_n_data_files = i;
+
+ /* Then store the actual values to our arrays */
+
+ str = input_str;
+ i = 0;
+
+ while (*str != '\0') {
+ path = str;
+
+ /* Note that we must step over the ':' in a Windows path;
+ a Windows path normally looks like C:\ibdata\ibdata1:1G, but
+ a Windows raw partition may have a specification like
+ \\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */
+
+ while ((*str != ':' && *str != '\0')
+ || (*str == ':'
+ && (*(str + 1) == '\\' || *(str + 1) == '/'
+ || *(str + 1) == ':'))) {
+ str++;
+ }
+
+ if (*str == ':') {
+ /* Make path a null-terminated string */
+ *str = '\0';
+ str++;
+ }
+
+ str = srv_parse_megabytes(str, &size);
+
+ srv_data_file_names[i] = path;
+ srv_data_file_sizes[i] = size;
+
+ if (0 == strncmp(str, ":autoextend",
+ (sizeof ":autoextend") - 1)) {
+
+ srv_auto_extend_last_data_file = TRUE;
+
+ str += (sizeof ":autoextend") - 1;
+
+ if (0 == strncmp(str, ":max:",
+ (sizeof ":max:") - 1)) {
+
+ str += (sizeof ":max:") - 1;
+
+ str = srv_parse_megabytes(
+ str, &srv_last_file_size_max);
+ }
+
+ if (*str != '\0') {
+
+ return(FALSE);
+ }
+ }
+
+ (srv_data_file_is_raw_partition)[i] = 0;
+
+ if (strlen(str) >= 6
+ && *str == 'n'
+ && *(str + 1) == 'e'
+ && *(str + 2) == 'w') {
+ str += 3;
+ (srv_data_file_is_raw_partition)[i] = SRV_NEW_RAW;
+ }
+
+ if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') {
+ str += 3;
+
+ if ((srv_data_file_is_raw_partition)[i] == 0) {
+ (srv_data_file_is_raw_partition)[i] = SRV_OLD_RAW;
+ }
+ }
+
+ i++;
+
+ if (*str == ';') {
+ str++;
+ }
+ }
+
+ return(TRUE);
+}
+
+/*********************************************************************//**
+Frees the memory allocated by srv_parse_data_file_paths_and_sizes()
+and srv_parse_log_group_home_dirs(). */
+UNIV_INTERN
+void
+srv_free_paths_and_sizes(void)
+/*==========================*/
+{
+ free(srv_data_file_names);
+ srv_data_file_names = NULL;
+ free(srv_data_file_sizes);
+ srv_data_file_sizes = NULL;
+ free(srv_data_file_is_raw_partition);
+ srv_data_file_is_raw_partition = NULL;
+}
+
+#ifndef UNIV_HOTBACKUP
+
+static ulint io_tid_i = 0;
+
+/********************************************************************//**
+I/o-handler thread function.
+@return OS_THREAD_DUMMY_RETURN */
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(io_handler_thread)(
+/*==============================*/
+ void* arg) /*!< in: pointer to the number of the segment in
+ the aio array */
+{
+ ulint segment;
+ ulint tid_i = os_atomic_increment_ulint(&io_tid_i, 1) - 1;
+
+ ut_ad(tid_i < srv_n_file_io_threads);
+
+ segment = *((ulint*) arg);
+
+ srv_io_tids[tid_i] = os_thread_get_tid();
+ os_thread_set_priority(srv_io_tids[tid_i], srv_sched_priority_io);
+
+#ifdef UNIV_DEBUG_THREAD_CREATION
+ fprintf(stderr, "Io handler thread %lu starts, id %lu\n", segment,
+ os_thread_pf(os_thread_get_curr_id()));
+#endif
+
+#ifdef UNIV_PFS_THREAD
+ pfs_register_thread(io_handler_thread_key);
+#endif /* UNIV_PFS_THREAD */
+
+ while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
+ srv_current_thread_priority = srv_io_thread_priority;
+ fil_aio_wait(segment);
+ }
+
+ /* We count the number of threads in os_thread_exit(). A created
+ thread should always use that to exit and not use return() to exit.
+ The thread actually never comes here because it is exited in an
+ os_event_wait(). */
+
+ os_thread_exit(NULL);
+
+ OS_THREAD_DUMMY_RETURN;
+}
+#endif /* !UNIV_HOTBACKUP */
+
+/*********************************************************************//**
+Normalizes a directory path for Windows: converts slashes to backslashes. */
+UNIV_INTERN
+void
+srv_normalize_path_for_win(
+/*=======================*/
+ char* str __attribute__((unused))) /*!< in/out: null-terminated
+ character string */
+{
+#ifdef __WIN__
+ for (; *str; str++) {
+
+ if (*str == '/') {
+ *str = '\\';
+ }
+ }
+#endif
+}
+
+#ifndef UNIV_HOTBACKUP
+/*********************************************************************//**
+Creates a log file.
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+create_log_file(
+/*============*/
+ os_file_t* file, /*!< out: file handle */
+ const char* name) /*!< in: log file name */
+{
+ ibool ret;
+
+ *file = os_file_create(
+ innodb_file_log_key, name,
+ OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
+ OS_LOG_FILE, &ret);
+
+ if (!ret) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name);
+ return(DB_ERROR);
+ }
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Setting log file %s size to %lu MB",
+ name, (ulong) srv_log_file_size
+ >> (20 - UNIV_PAGE_SIZE_SHIFT));
+
+ ret = os_file_set_size(name, *file,
+ (os_offset_t) srv_log_file_size
+ << UNIV_PAGE_SIZE_SHIFT);
+ if (!ret) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "Cannot set log file"
+ " %s to size %lu MB", name, (ulong) srv_log_file_size
+ >> (20 - UNIV_PAGE_SIZE_SHIFT));
+ return(DB_ERROR);
+ }
+
+ ret = os_file_close(*file);
+ ut_a(ret);
+
+ return(DB_SUCCESS);
+}
+
+/** Initial number of the first redo log file */
+#define INIT_LOG_FILE0 (SRV_N_LOG_FILES_MAX + 1)
+
+#ifdef DBUG_OFF
+# define RECOVERY_CRASH(x) do {} while(0)
+#else
+# define RECOVERY_CRASH(x) do { \
+ if (srv_force_recovery_crash == x) { \
+ fprintf(stderr, "innodb_force_recovery_crash=%lu\n", \
+ srv_force_recovery_crash); \
+ fflush(stderr); \
+ exit(3); \
+ } \
+} while (0)
+#endif
+
+/*********************************************************************//**
+Creates all log files.
+@return DB_SUCCESS or error code */
+static
+dberr_t
+create_log_files(
+/*=============*/
+ bool create_new_db, /*!< in: TRUE if new database is being
+ created */
+ char* logfilename, /*!< in/out: buffer for log file name */
+ size_t dirnamelen, /*!< in: length of the directory path */
+ lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
+ char*& logfile0) /*!< out: name of the first log file */
+{
+ if (srv_read_only_mode) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot create log files in read-only mode");
+ return(DB_READ_ONLY);
+ }
+
+ /* We prevent system tablespace creation with existing files in
+ data directory. So we do not delete log files when creating new system
+ tablespace */
+ if (!create_new_db) {
+ /* Remove any old log files. */
+ for (unsigned i = 0; i <= INIT_LOG_FILE0; i++) {
+ sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
+
+ /* Ignore errors about non-existent files or files
+ that cannot be removed. The create_log_file() will
+ return an error when the file exists. */
+#ifdef __WIN__
+ DeleteFile((LPCTSTR) logfilename);
+#else
+ unlink(logfilename);
+#endif
+ /* Crashing after deleting the first
+ file should be recoverable. The buffer
+ pool was clean, and we can simply create
+ all log files from the scratch. */
+ RECOVERY_CRASH(6);
+ }
+ }
+
+ ut_ad(!buf_pool_check_no_pending_io());
+
+ RECOVERY_CRASH(7);
+
+ for (unsigned i = 0; i < srv_n_log_files; i++) {
+ sprintf(logfilename + dirnamelen,
+ "ib_logfile%u", i ? i : INIT_LOG_FILE0);
+
+ dberr_t err = create_log_file(&files[i], logfilename);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ }
+
+ RECOVERY_CRASH(8);
+
+ /* We did not create the first log file initially as
+ ib_logfile0, so that crash recovery cannot find it until it
+ has been completed and renamed. */
+ sprintf(logfilename + dirnamelen, "ib_logfile%u", INIT_LOG_FILE0);
+
+ fil_space_create(
+ logfilename, SRV_LOG_SPACE_FIRST_ID,
+ fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
+ FIL_LOG);
+ ut_a(fil_validate());
+
+ logfile0 = fil_node_create(
+ logfilename, (ulint) srv_log_file_size,
+ SRV_LOG_SPACE_FIRST_ID, FALSE);
+ ut_a(logfile0);
+
+ for (unsigned i = 1; i < srv_n_log_files; i++) {
+ sprintf(logfilename + dirnamelen, "ib_logfile%u", i);
+
+ if (!fil_node_create(
+ logfilename,
+ (ulint) srv_log_file_size,
+ SRV_LOG_SPACE_FIRST_ID, FALSE)) {
+ ut_error;
+ }
+ }
+
+#ifdef UNIV_LOG_ARCHIVE
+ /* Create the file space object for archived logs. */
+ fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1,
+ 0, FIL_LOG);
+#endif
+ log_group_init(0, srv_n_log_files,
+ srv_log_file_size * UNIV_PAGE_SIZE,
+ SRV_LOG_SPACE_FIRST_ID,
+ SRV_LOG_SPACE_FIRST_ID + 1);
+
+ fil_open_log_and_system_tablespace_files();
+
+ /* Create a log checkpoint. */
+ mutex_enter(&log_sys->mutex);
+ ut_d(recv_no_log_write = FALSE);
+ recv_reset_logs(
+#ifdef UNIV_LOG_ARCHIVE
+ UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no,
+ TRUE,
+#endif
+ lsn);
+ mutex_exit(&log_sys->mutex);
+
+ return(DB_SUCCESS);
+}
+
+/*********************************************************************//**
+Renames the first log file. */
+static
+void
+create_log_files_rename(
+/*====================*/
+ char* logfilename, /*!< in/out: buffer for log file name */
+ size_t dirnamelen, /*!< in: length of the directory path */
+ lsn_t lsn, /*!< in: FIL_PAGE_FILE_FLUSH_LSN value */
+ char* logfile0) /*!< in/out: name of the first log file */
+{
+ /* If innodb_flush_method=O_DSYNC,
+ we need to explicitly flush the log buffers. */
+ fil_flush(SRV_LOG_SPACE_FIRST_ID);
+ /* Close the log files, so that we can rename
+ the first one. */
+ fil_close_log_files(false);
+
+ /* Rename the first log file, now that a log
+ checkpoint has been created. */
+ sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
+
+ RECOVERY_CRASH(9);
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Renaming log file %s to %s", logfile0, logfilename);
+
+ mutex_enter(&log_sys->mutex);
+ ut_ad(strlen(logfile0) == 2 + strlen(logfilename));
+ ibool success = os_file_rename(
+ innodb_file_log_key, logfile0, logfilename);
+ ut_a(success);
+
+ RECOVERY_CRASH(10);
+
+ /* Replace the first file with ib_logfile0. */
+ strcpy(logfile0, logfilename);
+ mutex_exit(&log_sys->mutex);
+
+ fil_open_log_and_system_tablespace_files();
+
+ ib_logf(IB_LOG_LEVEL_WARN, "New log files created, LSN=" LSN_PF, lsn);
+}
+
+/*********************************************************************//**
+Opens a log file.
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+open_log_file(
+/*==========*/
+ os_file_t* file, /*!< out: file handle */
+ const char* name, /*!< in: log file name */
+ os_offset_t* size) /*!< out: file size */
+{
+ ibool ret;
+
+ *file = os_file_create(innodb_file_log_key, name,
+ OS_FILE_OPEN, OS_FILE_AIO,
+ OS_LOG_FILE, &ret);
+ if (!ret) {
+ ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name);
+ return(DB_ERROR);
+ }
+
+ *size = os_file_get_size(*file);
+
+ ret = os_file_close(*file);
+ ut_a(ret);
+ return(DB_SUCCESS);
+}
+
+/*********************************************************************//**
+Creates or opens database data files and closes them.
+@return DB_SUCCESS or error code */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+open_or_create_data_files(
+/*======================*/
+ ibool* create_new_db, /*!< out: TRUE if new database should be
+ created */
+#ifdef UNIV_LOG_ARCHIVE
+ lsn_t* min_arch_log_no,/*!< out: min of archived log
+ numbers in data files */
+ lsn_t* max_arch_log_no,/*!< out: max of archived log
+ numbers in data files */
+#endif /* UNIV_LOG_ARCHIVE */
+ lsn_t* min_flushed_lsn,/*!< out: min of flushed lsn
+ values in data files */
+ lsn_t* max_flushed_lsn,/*!< out: max of flushed lsn
+ values in data files */
+ ulint* sum_of_new_sizes)/*!< out: sum of sizes of the
+ new files added */
+{
+ ibool ret;
+ ulint i;
+ ibool one_opened = FALSE;
+ ibool one_created = FALSE;
+ os_offset_t size;
+ ulint flags;
+ ulint space;
+ ulint rounded_size_pages;
+ char name[10000];
+
+ if (srv_n_data_files >= 1000) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Can only have < 1000 data files, you have "
+ "defined %lu", (ulong) srv_n_data_files);
+
+ return(DB_ERROR);
+ }
+
+ *sum_of_new_sizes = 0;
+
+ *create_new_db = FALSE;
+
+ srv_normalize_path_for_win(srv_data_home);
+
+ for (i = 0; i < srv_n_data_files; i++) {
+ ulint dirnamelen;
+
+ srv_normalize_path_for_win(srv_data_file_names[i]);
+ dirnamelen = strlen(srv_data_home);
+
+ ut_a(dirnamelen + strlen(srv_data_file_names[i])
+ < (sizeof name) - 1);
+
+ memcpy(name, srv_data_home, dirnamelen);
+
+ /* Add a path separator if needed. */
+ if (dirnamelen && name[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
+ name[dirnamelen++] = SRV_PATH_SEPARATOR;
+ }
+
+ strcpy(name + dirnamelen, srv_data_file_names[i]);
+
+ /* Note: It will return true if the file doesn' exist. */
+
+ if (!srv_file_check_mode(name)) {
+
+ return(DB_FAIL);
+
+ } else if (srv_data_file_is_raw_partition[i] == 0) {
+
+ /* First we try to create the file: if it already
+ exists, ret will get value FALSE */
+
+ files[i] = os_file_create(
+ innodb_file_data_key, name, OS_FILE_CREATE,
+ OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+
+ if (srv_read_only_mode) {
+
+ if (ret) {
+ goto size_check;
+ }
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Opening %s failed!", name);
+
+ return(DB_ERROR);
+
+ } else if (!ret
+ && os_file_get_last_error(false)
+ != OS_FILE_ALREADY_EXISTS
+#ifdef UNIV_AIX
+ /* AIX 5.1 after security patch ML7 may have
+ errno set to 0 here, which causes our
+ function to return 100; work around that
+ AIX problem */
+ && os_file_get_last_error(false) != 100
+#endif /* UNIV_AIX */
+ ) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Creating or opening %s failed!",
+ name);
+
+ return(DB_ERROR);
+ }
+
+ } else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
+
+ ut_a(!srv_read_only_mode);
+
+ /* The partition is opened, not created; then it is
+ written over */
+
+ srv_start_raw_disk_in_use = TRUE;
+ srv_created_new_raw = TRUE;
+
+ files[i] = os_file_create(
+ innodb_file_data_key, name, OS_FILE_OPEN_RAW,
+ OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+
+ if (!ret) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Error in opening %s", name);
+
+ return(DB_ERROR);
+ }
+ } else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
+ srv_start_raw_disk_in_use = TRUE;
+
+ ret = FALSE;
+ } else {
+ ut_a(0);
+ }
+
+ if (ret == FALSE) {
+ const char* check_msg;
+ /* We open the data file */
+
+ if (one_created) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Data files can only be added at "
+ "the end of a tablespace, but "
+ "data file %s existed beforehand.",
+ name);
+ return(DB_ERROR);
+ }
+ if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
+ ut_a(!srv_read_only_mode);
+ files[i] = os_file_create(
+ innodb_file_data_key,
+ name, OS_FILE_OPEN_RAW,
+ OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+ } else if (i == 0) {
+ files[i] = os_file_create(
+ innodb_file_data_key,
+ name, OS_FILE_OPEN_RETRY,
+ OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+ } else {
+ files[i] = os_file_create(
+ innodb_file_data_key,
+ name, OS_FILE_OPEN, OS_FILE_NORMAL,
+ OS_DATA_FILE, &ret);
+ }
+
+ if (!ret) {
+
+ os_file_get_last_error(true);
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Can't open '%s'", name);
+
+ return(DB_ERROR);
+ }
+
+ if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
+
+ goto skip_size_check;
+ }
+
+size_check:
+ size = os_file_get_size(files[i]);
+ ut_a(size != (os_offset_t) -1);
+ /* Round size downward to megabytes */
+
+ rounded_size_pages = (ulint)
+ (size >> UNIV_PAGE_SIZE_SHIFT);
+
+ if (i == srv_n_data_files - 1
+ && srv_auto_extend_last_data_file) {
+
+ if (srv_data_file_sizes[i] > rounded_size_pages
+ || (srv_last_file_size_max > 0
+ && srv_last_file_size_max
+ < rounded_size_pages)) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "auto-extending "
+ "data file %s is "
+ "of a different size "
+ "%lu pages (rounded "
+ "down to MB) than specified "
+ "in the .cnf file: "
+ "initial %lu pages, "
+ "max %lu (relevant if "
+ "non-zero) pages!",
+ name,
+ (ulong) rounded_size_pages,
+ (ulong) srv_data_file_sizes[i],
+ (ulong)
+ srv_last_file_size_max);
+
+ return(DB_ERROR);
+ }
+
+ srv_data_file_sizes[i] = rounded_size_pages;
+ }
+
+ if (rounded_size_pages != srv_data_file_sizes[i]) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Data file %s is of a different "
+ "size %lu pages (rounded down to MB) "
+ "than specified in the .cnf file "
+ "%lu pages!",
+ name,
+ (ulong) rounded_size_pages,
+ (ulong) srv_data_file_sizes[i]);
+
+ return(DB_ERROR);
+ }
+skip_size_check:
+ check_msg = fil_read_first_page(
+ files[i], one_opened, &flags, &space,
+ min_flushed_lsn, max_flushed_lsn);
+
+ if (check_msg) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "%s in data file %s",
+ check_msg, name);
+ return(DB_ERROR);
+ }
+
+ /* The first file of the system tablespace must
+ have space ID = TRX_SYS_SPACE. The FSP_SPACE_ID
+ field in files greater than ibdata1 are unreliable. */
+ ut_a(one_opened || space == TRX_SYS_SPACE);
+
+ /* Check the flags for the first system tablespace
+ file only. */
+ if (!one_opened
+ && UNIV_PAGE_SIZE
+ != fsp_flags_get_page_size(flags)) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Data file \"%s\" uses page size %lu,"
+ "but the start-up parameter "
+ "is --innodb-page-size=%lu",
+ name,
+ fsp_flags_get_page_size(flags),
+ UNIV_PAGE_SIZE);
+
+ return(DB_ERROR);
+ }
+
+ one_opened = TRUE;
+ } else if (!srv_read_only_mode) {
+ /* We created the data file and now write it full of
+ zeros */
+
+ one_created = TRUE;
+
+ if (i > 0) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Data file %s did not"
+ " exist: new to be created",
+ name);
+ } else {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "The first specified "
+ "data file %s did not exist: "
+ "a new database to be created!",
+ name);
+
+ *create_new_db = TRUE;
+ }
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Setting file %s size to %lu MB",
+ name,
+ (ulong) (srv_data_file_sizes[i]
+ >> (20 - UNIV_PAGE_SIZE_SHIFT)));
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Database physically writes the"
+ " file full: wait...");
+
+ ret = os_file_set_size(
+ name, files[i],
+ (os_offset_t) srv_data_file_sizes[i]
+ << UNIV_PAGE_SIZE_SHIFT);
+
+ if (!ret) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Error in creating %s: "
+ "probably out of disk space",
+ name);
+
+ return(DB_ERROR);
+ }
+
+ *sum_of_new_sizes += srv_data_file_sizes[i];
+ }
+
+ ret = os_file_close(files[i]);
+ ut_a(ret);
+
+ if (i == 0) {
+ flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
+ fil_space_create(name, 0, flags, FIL_TABLESPACE);
+ }
+
+ ut_a(fil_validate());
+
+ if (!fil_node_create(name, srv_data_file_sizes[i], 0,
+ srv_data_file_is_raw_partition[i] != 0)) {
+ return(DB_ERROR);
+ }
+ }
+
+ return(DB_SUCCESS);
+}
+
+/*********************************************************************//**
+Create undo tablespace.
+@return DB_SUCCESS or error code */
+static
+dberr_t
+srv_undo_tablespace_create(
+/*=======================*/
+ const char* name, /*!< in: tablespace name */
+ ulint size) /*!< in: tablespace size in pages */
+{
+ os_file_t fh;
+ ibool ret;
+ dberr_t err = DB_SUCCESS;
+
+ os_file_create_subdirs_if_needed(name);
+
+ fh = os_file_create(
+ innodb_file_data_key,
+ name,
+ srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
+ OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+
+ if (srv_read_only_mode && ret) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "%s opened in read-only mode", name);
+ } else if (ret == FALSE) {
+ if (os_file_get_last_error(false) != OS_FILE_ALREADY_EXISTS
+#ifdef UNIV_AIX
+ /* AIX 5.1 after security patch ML7 may have
+ errno set to 0 here, which causes our function
+ to return 100; work around that AIX problem */
+ && os_file_get_last_error(false) != 100
+#endif /* UNIV_AIX */
+ ) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Can't create UNDO tablespace %s", name);
+ } else {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Creating system tablespace with"
+ " existing undo tablespaces is not"
+ " supported. Please delete all undo"
+ " tablespaces before creating new"
+ " system tablespace.");
+ }
+ err = DB_ERROR;
+ } else {
+ ut_a(!srv_read_only_mode);
+
+ /* We created the data file and now write it full of zeros */
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Data file %s did not exist: new to be created",
+ name);
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Setting file %s size to %lu MB",
+ name, size >> (20 - UNIV_PAGE_SIZE_SHIFT));
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Database physically writes the file full: wait...");
+
+ ret = os_file_set_size(name, fh, size << UNIV_PAGE_SIZE_SHIFT);
+
+ if (!ret) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Error in creating %s: probably out of "
+ "disk space", name);
+
+ err = DB_ERROR;
+ }
+
+ os_file_close(fh);
+ }
+
+ return(err);
+}
+
+/*********************************************************************//**
+Open an undo tablespace.
+@return DB_SUCCESS or error code */
+static
+dberr_t
+srv_undo_tablespace_open(
+/*=====================*/
+ const char* name, /*!< in: tablespace name */
+ ulint space) /*!< in: tablespace id */
+{
+ os_file_t fh;
+ dberr_t err = DB_ERROR;
+ ibool ret;
+ ulint flags;
+
+ if (!srv_file_check_mode(name)) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "UNDO tablespaces must be %s!",
+ srv_read_only_mode ? "writable" : "readable");
+
+ return(DB_ERROR);
+ }
+
+ fh = os_file_create(
+ innodb_file_data_key, name,
+ OS_FILE_OPEN_RETRY
+ | OS_FILE_ON_ERROR_NO_EXIT
+ | OS_FILE_ON_ERROR_SILENT,
+ OS_FILE_NORMAL,
+ OS_DATA_FILE,
+ &ret);
+
+ /* If the file open was successful then load the tablespace. */
+
+ if (ret) {
+ os_offset_t size;
+
+ size = os_file_get_size(fh);
+ ut_a(size != (os_offset_t) -1);
+
+ ret = os_file_close(fh);
+ ut_a(ret);
+
+ /* Load the tablespace into InnoDB's internal
+ data structures. */
+
+ /* We set the biggest space id to the undo tablespace
+ because InnoDB hasn't opened any other tablespace apart
+ from the system tablespace. */
+
+ fil_set_max_space_id_if_bigger(space);
+
+ /* Set the compressed page size to 0 (non-compressed) */
+ flags = fsp_flags_set_page_size(0, UNIV_PAGE_SIZE);
+ fil_space_create(name, space, flags, FIL_TABLESPACE);
+
+ ut_a(fil_validate());
+
+ os_offset_t n_pages = size / UNIV_PAGE_SIZE;
+
+ /* On 64 bit Windows ulint can be 32 bit and os_offset_t
+ is 64 bit. It is OK to cast the n_pages to ulint because
+ the unit has been scaled to pages and they are always
+ 32 bit. */
+ if (fil_node_create(name, (ulint) n_pages, space, FALSE)) {
+ err = DB_SUCCESS;
+ }
+ }
+
+ return(err);
+}
+
+/********************************************************************
+Opens the configured number of undo tablespaces.
+@return DB_SUCCESS or error code */
+static
+dberr_t
+srv_undo_tablespaces_init(
+/*======================*/
+ ibool create_new_db, /*!< in: TRUE if new db being
+ created */
+ const ulint n_conf_tablespaces, /*!< in: configured undo
+ tablespaces */
+ ulint* n_opened) /*!< out: number of UNDO
+ tablespaces successfully
+ discovered and opened */
+{
+ ulint i;
+ dberr_t err = DB_SUCCESS;
+ ulint prev_space_id = 0;
+ ulint n_undo_tablespaces;
+ ulint undo_tablespace_ids[TRX_SYS_N_RSEGS + 1];
+
+ *n_opened = 0;
+
+ ut_a(n_conf_tablespaces <= TRX_SYS_N_RSEGS);
+
+ memset(undo_tablespace_ids, 0x0, sizeof(undo_tablespace_ids));
+
+ /* Create the undo spaces only if we are creating a new
+ instance. We don't allow creating of new undo tablespaces
+ in an existing instance (yet). This restriction exists because
+ we check in several places for SYSTEM tablespaces to be less than
+ the min of user defined tablespace ids. Once we implement saving
+ the location of the undo tablespaces and their space ids this
+ restriction will/should be lifted. */
+
+ for (i = 0; create_new_db && i < n_conf_tablespaces; ++i) {
+ char name[OS_FILE_MAX_PATH];
+
+ ut_snprintf(
+ name, sizeof(name),
+ "%s%cundo%03lu",
+ srv_undo_dir, SRV_PATH_SEPARATOR, i + 1);
+
+ /* Undo space ids start from 1. */
+ err = srv_undo_tablespace_create(
+ name, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES);
+
+ if (err != DB_SUCCESS) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Could not create undo tablespace '%s'.",
+ name);
+
+ return(err);
+ }
+ }
+
+ /* Get the tablespace ids of all the undo segments excluding
+ the system tablespace (0). If we are creating a new instance then
+ we build the undo_tablespace_ids ourselves since they don't
+ already exist. */
+
+ if (!create_new_db) {
+ n_undo_tablespaces = trx_rseg_get_n_undo_tablespaces(
+ undo_tablespace_ids);
+ } else {
+ n_undo_tablespaces = n_conf_tablespaces;
+
+ for (i = 1; i <= n_undo_tablespaces; ++i) {
+ undo_tablespace_ids[i - 1] = i;
+ }
+
+ undo_tablespace_ids[i] = ULINT_UNDEFINED;
+ }
+
+ /* Open all the undo tablespaces that are currently in use. If we
+ fail to open any of these it is a fatal error. The tablespace ids
+ should be contiguous. It is a fatal error because they are required
+ for recovery and are referenced by the UNDO logs (a.k.a RBS). */
+
+ for (i = 0; i < n_undo_tablespaces; ++i) {
+ char name[OS_FILE_MAX_PATH];
+
+ ut_snprintf(
+ name, sizeof(name),
+ "%s%cundo%03lu",
+ srv_undo_dir, SRV_PATH_SEPARATOR,
+ undo_tablespace_ids[i]);
+
+ /* Should be no gaps in undo tablespace ids. */
+ ut_a(prev_space_id + 1 == undo_tablespace_ids[i]);
+
+ /* The system space id should not be in this array. */
+ ut_a(undo_tablespace_ids[i] != 0);
+ ut_a(undo_tablespace_ids[i] != ULINT_UNDEFINED);
+
+ /* Undo space ids start from 1. */
+
+ err = srv_undo_tablespace_open(name, undo_tablespace_ids[i]);
+
+ if (err != DB_SUCCESS) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unable to open undo tablespace '%s'.", name);
+
+ return(err);
+ }
+
+ prev_space_id = undo_tablespace_ids[i];
+
+ ++*n_opened;
+ }
+
+ /* Open any extra unused undo tablespaces. These must be contiguous.
+ We stop at the first failure. These are undo tablespaces that are
+ not in use and therefore not required by recovery. We only check
+ that there are no gaps. */
+
+ for (i = prev_space_id + 1; i < TRX_SYS_N_RSEGS; ++i) {
+ char name[OS_FILE_MAX_PATH];
+
+ ut_snprintf(
+ name, sizeof(name),
+ "%s%cundo%03lu", srv_undo_dir, SRV_PATH_SEPARATOR, i);
+
+ /* Undo space ids start from 1. */
+ err = srv_undo_tablespace_open(name, i);
+
+ if (err != DB_SUCCESS) {
+ break;
+ }
+
+ ++n_undo_tablespaces;
+
+ ++*n_opened;
+ }
+
+ /* If the user says that there are fewer than what we find we
+ tolerate that discrepancy but not the inverse. Because there could
+ be unused undo tablespaces for future use. */
+
+ if (n_conf_tablespaces > n_undo_tablespaces) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Expected to open %lu undo "
+ "tablespaces but was able\n",
+ n_conf_tablespaces);
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: to find only %lu undo "
+ "tablespaces.\n", n_undo_tablespaces);
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Set the "
+ "innodb_undo_tablespaces parameter to "
+ "the\n");
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: correct value and retry. Suggested "
+ "value is %lu\n", n_undo_tablespaces);
+
+ return(err != DB_SUCCESS ? err : DB_ERROR);
+
+ } else if (n_undo_tablespaces > 0) {
+
+ ib_logf(IB_LOG_LEVEL_INFO, "Opened %lu undo tablespaces",
+ n_undo_tablespaces);
+
+ if (n_conf_tablespaces == 0) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Using the system tablespace for all UNDO "
+ "logging because innodb_undo_tablespaces=0");
+ }
+ }
+
+ if (create_new_db) {
+ mtr_t mtr;
+
+ mtr_start(&mtr);
+
+ /* The undo log tablespace */
+ for (i = 1; i <= n_undo_tablespaces; ++i) {
+
+ fsp_header_init(
+ i, SRV_UNDO_TABLESPACE_SIZE_IN_PAGES, &mtr);
+ }
+
+ mtr_commit(&mtr);
+ }
+
+ return(DB_SUCCESS);
+}
+
+/********************************************************************
+Wait for the purge thread(s) to start up. */
+static
+void
+srv_start_wait_for_purge_to_start()
+/*===============================*/
+{
+ /* Wait for the purge coordinator and master thread to startup. */
+
+ purge_state_t state = trx_purge_state();
+
+ ut_a(state != PURGE_STATE_DISABLED);
+
+ while (srv_shutdown_state == SRV_SHUTDOWN_NONE
+ && srv_force_recovery < SRV_FORCE_NO_BACKGROUND
+ && state == PURGE_STATE_INIT) {
+
+ switch (state = trx_purge_state()) {
+ case PURGE_STATE_RUN:
+ case PURGE_STATE_STOP:
+ break;
+
+ case PURGE_STATE_INIT:
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Waiting for purge to start");
+
+ os_thread_sleep(50000);
+ break;
+
+ case PURGE_STATE_EXIT:
+ case PURGE_STATE_DISABLED:
+ ut_error;
+ }
+ }
+}
+
+/*********************************************************************//**
+Initializes the log tracking subsystem and starts its thread. */
+static
+void
+init_log_online(void)
+/*=================*/
+{
+ if (UNIV_UNLIKELY(srv_force_recovery > 0 || srv_read_only_mode)) {
+ srv_track_changed_pages = FALSE;
+ return;
+ }
+
+ if (srv_track_changed_pages) {
+
+ log_online_read_init();
+
+ /* Create the thread that follows the redo log to output the
+ changed page bitmap */
+ os_thread_create(&srv_redo_log_follow_thread, NULL,
+ thread_ids + 5 + SRV_MAX_N_IO_THREADS);
+ }
+}
+
+/********************************************************************
+Starts InnoDB and creates a new database if database files
+are not found and the user wants.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+innobase_start_or_create_for_mysql(void)
+/*====================================*/
+{
+ ibool create_new_db;
+ lsn_t min_flushed_lsn;
+ lsn_t max_flushed_lsn;
+#ifdef UNIV_LOG_ARCHIVE
+ lsn_t min_arch_log_no;
+ lsn_t max_arch_log_no;
+#endif /* UNIV_LOG_ARCHIVE */
+ ulint sum_of_new_sizes;
+ ulint sum_of_data_file_sizes;
+ ulint tablespace_size_in_header;
+ dberr_t err;
+ unsigned i;
+ ulint srv_n_log_files_found = srv_n_log_files;
+ ulint io_limit;
+ mtr_t mtr;
+ ib_bh_t* ib_bh;
+ ulint n_recovered_trx;
+ char logfilename[10000];
+ char* logfile0 = NULL;
+ size_t dirnamelen;
+
+ if (srv_read_only_mode) {
+ ib_logf(IB_LOG_LEVEL_INFO, "Started in read only mode");
+ }
+
+#ifdef HAVE_DARWIN_THREADS
+# ifdef F_FULLFSYNC
+ /* This executable has been compiled on Mac OS X 10.3 or later.
+ Assume that F_FULLFSYNC is available at run-time. */
+ srv_have_fullfsync = TRUE;
+# else /* F_FULLFSYNC */
+ /* This executable has been compiled on Mac OS X 10.2
+ or earlier. Determine if the executable is running
+ on Mac OS X 10.3 or later. */
+ struct utsname utsname;
+ if (uname(&utsname)) {
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: cannot determine Mac OS X version!\n", stderr);
+ } else {
+ srv_have_fullfsync = strcmp(utsname.release, "7.") >= 0;
+ }
+ if (!srv_have_fullfsync) {
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: On Mac OS X, fsync() may be "
+ "broken on internal drives,\n", stderr);
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: making transactions unsafe!\n", stderr);
+ }
+# endif /* F_FULLFSYNC */
+#endif /* HAVE_DARWIN_THREADS */
+
+ if (sizeof(ulint) != sizeof(void*)) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Error: size of InnoDB's ulint is %lu, "
+ "but size of void*\n", (ulong) sizeof(ulint));
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: is %lu. The sizes should be the same "
+ "so that on a 64-bit\n",
+ (ulong) sizeof(void*));
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: platforms you can allocate more than 4 GB "
+ "of memory.\n");
+ }
+
+ /* If stacktrace is used we set up signal handler for SIGUSR2 signal
+ here. If signal handler set fails we report that and disable
+ stacktrace feature. */
+
+ if (srv_use_stacktrace) {
+#ifdef __linux__
+ struct sigaction sigact;
+
+ sigact.sa_sigaction = os_stacktrace_print;
+ sigact.sa_flags = SA_RESTART | SA_SIGINFO;
+
+ if (sigaction(SIGUSR2, &sigact, (struct sigaction *)NULL) != 0)
+ {
+ fprintf(stderr, " InnoDB:error setting signal handler for %d (%s)\n",
+ SIGUSR2, strsignal(SIGUSR2));
+ srv_use_stacktrace = FALSE;
+
+ }
+#endif /* __linux__ */
+ }
+
+#ifdef UNIV_DEBUG
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!\n");
+#endif
+
+#ifdef UNIV_IBUF_DEBUG
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!\n");
+# ifdef UNIV_IBUF_COUNT_DEBUG
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: !!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on "
+ "!!!!!!!!!\n");
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG\n");
+# endif
+#endif
+
+#ifdef UNIV_BLOB_DEBUG
+ fprintf(stderr,
+ "InnoDB: !!!!!!!! UNIV_BLOB_DEBUG switched on !!!!!!!!!\n"
+ "InnoDB: Server restart may fail with UNIV_BLOB_DEBUG\n");
+#endif /* UNIV_BLOB_DEBUG */
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!\n");
+#endif
+
+#ifdef UNIV_SEARCH_DEBUG
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!\n");
+#endif
+
+#ifdef UNIV_LOG_LSN_DEBUG
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: !!!!!!!! UNIV_LOG_LSN_DEBUG switched on !!!!!!!!!\n");
+#endif /* UNIV_LOG_LSN_DEBUG */
+#ifdef UNIV_MEM_DEBUG
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!\n");
+#endif
+
+ if (srv_use_sys_malloc) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "The InnoDB memory heap is disabled");
+ }
+
+#if defined(COMPILER_HINTS_ENABLED)
+ ib_logf(IB_LOG_LEVEL_INFO,
+ " InnoDB: Compiler hints enabled.");
+#endif /* defined(COMPILER_HINTS_ENABLED) */
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "" IB_ATOMICS_STARTUP_MSG "");
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Compressed tables use zlib " ZLIB_VERSION
+#ifdef UNIV_ZIP_DEBUG
+ " with validation"
+#endif /* UNIV_ZIP_DEBUG */
+ );
+#ifdef UNIV_ZIP_COPY
+ ib_logf(IB_LOG_LEVEL_INFO, "and extra copying");
+#endif /* UNIV_ZIP_COPY */
+
+
+ /* Since InnoDB does not currently clean up all its internal data
+ structures in MySQL Embedded Server Library server_end(), we
+ print an error message if someone tries to start up InnoDB a
+ second time during the process lifetime. */
+
+ if (srv_start_has_been_called) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: Error: startup called second time "
+ "during the process\n");
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: lifetime. In the MySQL Embedded "
+ "Server Library you\n");
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: cannot call server_init() more "
+ "than once during the\n");
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: process lifetime.\n");
+ }
+
+ srv_start_has_been_called = TRUE;
+
+#ifdef UNIV_DEBUG
+ log_do_write = TRUE;
+#endif /* UNIV_DEBUG */
+ /* yydebug = TRUE; */
+
+ srv_is_being_started = TRUE;
+ srv_startup_is_before_trx_rollback_phase = TRUE;
+
+#ifdef __WIN__
+ switch (os_get_os_version()) {
+ case OS_WIN95:
+ case OS_WIN31:
+ case OS_WINNT:
+ srv_use_native_conditions = FALSE;
+ /* On Win 95, 98, ME, Win32 subsystem for Windows 3.1,
+ and NT use simulated aio. In NT Windows provides async i/o,
+ but when run in conjunction with InnoDB Hot Backup, it seemed
+ to corrupt the data files. */
+
+ srv_use_native_aio = FALSE;
+ break;
+
+ case OS_WIN2000:
+ case OS_WINXP:
+ /* On 2000 and XP, async IO is available, but no condition variables. */
+ srv_use_native_aio = TRUE;
+ srv_use_native_conditions = FALSE;
+ break;
+
+ default:
+ /* Vista and later have both async IO and condition variables */
+ srv_use_native_aio = TRUE;
+ srv_use_native_conditions = TRUE;
+ break;
+ }
+
+#elif defined(LINUX_NATIVE_AIO)
+
+ if (srv_use_native_aio) {
+ ib_logf(IB_LOG_LEVEL_INFO, "Using Linux native AIO");
+ }
+#else
+ /* Currently native AIO is supported only on windows and linux
+ and that also when the support is compiled in. In all other
+ cases, we ignore the setting of innodb_use_native_aio. */
+ srv_use_native_aio = FALSE;
+#endif /* __WIN__ */
+
+ if (srv_file_flush_method_str == NULL) {
+ /* These are the default options */
+
+ srv_unix_file_flush_method = SRV_UNIX_FSYNC;
+
+ srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
+ } else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) {
+ srv_unix_file_flush_method = SRV_UNIX_FSYNC;
+
+ } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) {
+ srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
+
+ } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) {
+ srv_unix_file_flush_method = SRV_UNIX_O_DIRECT;
+
+ } else if (0 == ut_strcmp(srv_file_flush_method_str, "ALL_O_DIRECT")) {
+ srv_unix_file_flush_method = SRV_UNIX_ALL_O_DIRECT;
+
+ } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT_NO_FSYNC")) {
+ srv_unix_file_flush_method = SRV_UNIX_O_DIRECT_NO_FSYNC;
+
+ } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) {
+ srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
+
+ } else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) {
+ srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
+#ifdef _WIN32
+ } else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) {
+ srv_win_file_flush_method = SRV_WIN_IO_NORMAL;
+ srv_use_native_aio = FALSE;
+
+ } else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) {
+ srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
+ srv_use_native_aio = FALSE;
+
+ } else if (0 == ut_strcmp(srv_file_flush_method_str,
+ "async_unbuffered")) {
+ srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
+ srv_use_native_aio = TRUE;
+#endif /* __WIN__ */
+ } else {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unrecognized value %s for innodb_flush_method",
+ srv_file_flush_method_str);
+ return(DB_ERROR);
+ }
+
+ /* Note that the call srv_boot() also changes the values of
+ some variables to the units used by InnoDB internally */
+
+ /* Set the maximum number of threads which can wait for a semaphore
+ inside InnoDB: this is the 'sync wait array' size, as well as the
+ maximum number of threads that can wait in the 'srv_conc array' for
+ their time to enter InnoDB. */
+
+ if (srv_buf_pool_size >= 1000 * 1024 * 1024) {
+ /* If buffer pool is less than 1000 MB,
+ assume fewer threads. Also use only one
+ buffer pool instance */
+ srv_max_n_threads = 50000;
+
+ } else if (srv_buf_pool_size >= 8 * 1024 * 1024) {
+
+ srv_buf_pool_instances = 1;
+ srv_max_n_threads = 10000;
+ } else {
+ srv_buf_pool_instances = 1;
+
+ /* Saves several MB of memory, especially in
+ 64-bit computers */
+
+ srv_max_n_threads = 1000;
+ }
+
+ srv_boot();
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "%s CPU crc32 instructions",
+ ut_crc32_sse2_enabled ? "Using" : "Not using");
+
+ if (!srv_read_only_mode) {
+
+ mutex_create(srv_monitor_file_mutex_key,
+ &srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK);
+
+ if (srv_innodb_status) {
+
+ srv_monitor_file_name = static_cast<char*>(
+ mem_alloc(
+ strlen(fil_path_to_mysql_datadir)
+ + 20 + sizeof "/innodb_status."));
+
+ sprintf(srv_monitor_file_name, "%s/innodb_status.%lu",
+ fil_path_to_mysql_datadir,
+ os_proc_get_number());
+
+ srv_monitor_file = fopen(srv_monitor_file_name, "w+");
+
+ if (!srv_monitor_file) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Unable to create %s: %s",
+ srv_monitor_file_name,
+ strerror(errno));
+
+ return(DB_ERROR);
+ }
+ } else {
+ srv_monitor_file_name = NULL;
+ srv_monitor_file = os_file_create_tmpfile();
+
+ if (!srv_monitor_file) {
+ return(DB_ERROR);
+ }
+ }
+
+ mutex_create(srv_dict_tmpfile_mutex_key,
+ &srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION);
+
+ srv_dict_tmpfile = os_file_create_tmpfile();
+
+ if (!srv_dict_tmpfile) {
+ return(DB_ERROR);
+ }
+
+ mutex_create(srv_misc_tmpfile_mutex_key,
+ &srv_misc_tmpfile_mutex, SYNC_ANY_LATCH);
+
+ srv_misc_tmpfile = os_file_create_tmpfile();
+
+ if (!srv_misc_tmpfile) {
+ return(DB_ERROR);
+ }
+ }
+
+ /* If user has set the value of innodb_file_io_threads then
+ we'll emit a message telling the user that this parameter
+ is now deprecated. */
+ if (srv_n_file_io_threads != 4) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "innodb_file_io_threads is deprecated. Please use "
+ "innodb_read_io_threads and innodb_write_io_threads "
+ "instead");
+ }
+
+ /* Now overwrite the value on srv_n_file_io_threads */
+ srv_n_file_io_threads = srv_n_read_io_threads;
+
+ if (!srv_read_only_mode) {
+ /* Add the log and ibuf IO threads. */
+ srv_n_file_io_threads += 2;
+ srv_n_file_io_threads += srv_n_write_io_threads;
+ } else {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Disabling background IO write threads.");
+
+ srv_n_write_io_threads = 0;
+ }
+
+ ut_a(srv_n_file_io_threads <= SRV_MAX_N_IO_THREADS);
+
+ io_limit = 8 * SRV_N_PENDING_IOS_PER_THREAD;
+
+ /* On Windows when using native aio the number of aio requests
+ that a thread can handle at a given time is limited to 32
+ i.e.: SRV_N_PENDING_IOS_PER_THREAD */
+# ifdef __WIN__
+ if (srv_use_native_aio) {
+ io_limit = SRV_N_PENDING_IOS_PER_THREAD;
+ }
+# endif /* __WIN__ */
+
+ if (!os_aio_init(io_limit,
+ srv_n_read_io_threads,
+ srv_n_write_io_threads,
+ SRV_MAX_N_PENDING_SYNC_IOS)) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Fatal : Cannot initialize AIO sub-system");
+#if defined(LINUX_NATIVE_AIO)
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "You can try increasing system fs.aio-max-nr to 1048576 "
+ "or larger or setting innodb_use_native_aio = 0 in my.cnf");
+#endif
+
+ return(DB_ERROR);
+ }
+
+ fil_init(srv_file_per_table ? 50000 : 5000, srv_max_n_open_files);
+
+ double size;
+ char unit;
+
+ if (srv_buf_pool_size >= 1024 * 1024 * 1024) {
+ size = ((double) srv_buf_pool_size) / (1024 * 1024 * 1024);
+ unit = 'G';
+ } else {
+ size = ((double) srv_buf_pool_size) / (1024 * 1024);
+ unit = 'M';
+ }
+
+ /* Print time to initialize the buffer pool */
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Initializing buffer pool, size = %.1f%c", size, unit);
+
+ err = buf_pool_init(srv_buf_pool_size, (ibool) srv_buf_pool_populate,
+ srv_buf_pool_instances);
+
+ if (err != DB_SUCCESS) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot allocate memory for the buffer pool");
+
+ return(DB_ERROR);
+ }
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Completed initialization of buffer pool");
+
+#ifdef UNIV_DEBUG
+ /* We have observed deadlocks with a 5MB buffer pool but
+ the actual lower limit could very well be a little higher. */
+
+ if (srv_buf_pool_size <= 5 * 1024 * 1024) {
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Small buffer pool size (%luM), the flst_validate() "
+ "debug function can cause a deadlock if the "
+ "buffer pool fills up.",
+ srv_buf_pool_size / 1024 / 1024);
+ }
+#endif /* UNIV_DEBUG */
+
+ fsp_init();
+ log_init();
+
+ lock_sys_create(srv_lock_table_size);
+
+ /* Create i/o-handler threads: */
+
+ for (i = 0; i < srv_n_file_io_threads; ++i) {
+
+ n[i] = i;
+
+ os_thread_create(io_handler_thread, n + i, thread_ids + i);
+ }
+
+ if (srv_n_log_files * srv_log_file_size * UNIV_PAGE_SIZE
+ >= 512ULL * 1024ULL * 1024ULL * 1024ULL) {
+ /* log_block_convert_lsn_to_no() limits the returned block
+ number to 1G and given that OS_FILE_LOG_BLOCK_SIZE is 512
+ bytes, then we have a limit of 512 GB. If that limit is to
+ be raised, then log_block_convert_lsn_to_no() must be
+ modified. */
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Combined size of log files must be < 512 GB");
+
+ return(DB_ERROR);
+ }
+
+ if (srv_n_log_files * srv_log_file_size >= ULINT_MAX) {
+ /* fil_io() takes ulint as an argument and we are passing
+ (next_offset / UNIV_PAGE_SIZE) to it in log_group_write_buf().
+ So (next_offset / UNIV_PAGE_SIZE) must be less than ULINT_MAX.
+ So next_offset must be < ULINT_MAX * UNIV_PAGE_SIZE. This
+ means that we are limited to ULINT_MAX * UNIV_PAGE_SIZE which
+ is 64 TB on 32 bit systems. */
+ fprintf(stderr,
+ " InnoDB: Error: combined size of log files"
+ " must be < %lu GB\n",
+ ULINT_MAX / 1073741824 * UNIV_PAGE_SIZE);
+
+ return(DB_ERROR);
+ }
+
+ sum_of_new_sizes = 0;
+
+ for (i = 0; i < srv_n_data_files; i++) {
+#ifndef __WIN__
+ if (sizeof(off_t) < 5
+ && srv_data_file_sizes[i]
+ >= (ulint) (1 << (32 - UNIV_PAGE_SIZE_SHIFT))) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Error: file size must be < 4 GB"
+ " with this MySQL binary\n");
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: and operating system combination,"
+ " in some OS's < 2 GB\n");
+
+ return(DB_ERROR);
+ }
+#endif
+ sum_of_new_sizes += srv_data_file_sizes[i];
+ }
+
+ if (sum_of_new_sizes < 10485760 / UNIV_PAGE_SIZE) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Tablespace size must be at least 10 MB");
+
+ return(DB_ERROR);
+ }
+
+ err = open_or_create_data_files(&create_new_db,
+#ifdef UNIV_LOG_ARCHIVE
+ &min_arch_log_no, &max_arch_log_no,
+#endif /* UNIV_LOG_ARCHIVE */
+ &min_flushed_lsn, &max_flushed_lsn,
+ &sum_of_new_sizes);
+ if (err == DB_FAIL) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "The system tablespace must be writable!");
+
+ return(DB_ERROR);
+
+ } else if (err != DB_SUCCESS) {
+
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Could not open or create the system tablespace. If "
+ "you tried to add new data files to the system "
+ "tablespace, and it failed here, you should now "
+ "edit innodb_data_file_path in my.cnf back to what "
+ "it was, and remove the new ibdata files InnoDB "
+ "created in this failed attempt. InnoDB only wrote "
+ "those files full of zeros, but did not yet use "
+ "them in any way. But be careful: do not remove "
+ "old data files which contain your precious data!");
+
+ return(err);
+ }
+
+#ifdef UNIV_LOG_ARCHIVE
+ srv_normalize_path_for_win(srv_arch_dir);
+#endif /* UNIV_LOG_ARCHIVE */
+
+ dirnamelen = strlen(srv_log_group_home_dir);
+ ut_a(dirnamelen < (sizeof logfilename) - 10 - sizeof "ib_logfile");
+ memcpy(logfilename, srv_log_group_home_dir, dirnamelen);
+
+ /* Add a path separator if needed. */
+ if (dirnamelen && logfilename[dirnamelen - 1] != SRV_PATH_SEPARATOR) {
+ logfilename[dirnamelen++] = SRV_PATH_SEPARATOR;
+ }
+
+ srv_log_file_size_requested = srv_log_file_size;
+
+ if (create_new_db) {
+ bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
+ ut_a(success);
+
+ min_flushed_lsn = max_flushed_lsn = log_get_lsn();
+
+ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+
+ err = create_log_files(create_new_db, logfilename, dirnamelen,
+ max_flushed_lsn, logfile0);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+ } else {
+ for (i = 0; i < SRV_N_LOG_FILES_MAX; i++) {
+ os_offset_t size;
+ os_file_stat_t stat_info;
+
+ sprintf(logfilename + dirnamelen,
+ "ib_logfile%u", i);
+
+ err = os_file_get_status(
+ logfilename, &stat_info, false);
+
+ if (err == DB_NOT_FOUND) {
+ if (i == 0) {
+ if (max_flushed_lsn
+ != min_flushed_lsn) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot create"
+ " log files because"
+ " data files are"
+ " corrupt or"
+ " not in sync"
+ " with each other");
+ return(DB_ERROR);
+ }
+
+ if (max_flushed_lsn < (lsn_t) 1000) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot create"
+ " log files because"
+ " data files are"
+ " corrupt or the"
+ " database was not"
+ " shut down cleanly"
+ " after creating"
+ " the data files.");
+ return(DB_ERROR);
+ }
+
+ err = create_log_files(
+ create_new_db, logfilename,
+ dirnamelen, max_flushed_lsn,
+ logfile0);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ create_log_files_rename(
+ logfilename, dirnamelen,
+ max_flushed_lsn, logfile0);
+
+ /* Suppress the message about
+ crash recovery. */
+ max_flushed_lsn = min_flushed_lsn
+ = log_get_lsn();
+ goto files_checked;
+ } else if (i < 2) {
+ /* must have at least 2 log files */
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Only one log file found.");
+ return(err);
+ }
+
+ /* opened all files */
+ break;
+ }
+
+ if (!srv_file_check_mode(logfilename)) {
+ return(DB_ERROR);
+ }
+
+ err = open_log_file(&files[i], logfilename, &size);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ ut_a(size != (os_offset_t) -1);
+
+ if (size & ((1 << UNIV_PAGE_SIZE_SHIFT) - 1)) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Log file %s size "
+ UINT64PF " is not a multiple of"
+ " innodb_page_size",
+ logfilename, size);
+ return(DB_ERROR);
+ }
+
+ size >>= UNIV_PAGE_SIZE_SHIFT;
+
+ if (i == 0) {
+ srv_log_file_size = size;
+ } else if (size != srv_log_file_size) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Log file %s is"
+ " of different size "UINT64PF" bytes"
+ " than other log"
+ " files "UINT64PF" bytes!",
+ logfilename,
+ size << UNIV_PAGE_SIZE_SHIFT,
+ (os_offset_t) srv_log_file_size
+ << UNIV_PAGE_SIZE_SHIFT);
+ return(DB_ERROR);
+ }
+ }
+
+ srv_n_log_files_found = i;
+
+ /* Create the in-memory file space objects. */
+
+ sprintf(logfilename + dirnamelen, "ib_logfile%u", 0);
+
+ fil_space_create(logfilename,
+ SRV_LOG_SPACE_FIRST_ID,
+ fsp_flags_set_page_size(0, UNIV_PAGE_SIZE),
+ FIL_LOG);
+
+ ut_a(fil_validate());
+
+ /* srv_log_file_size is measured in pages; if page size is 16KB,
+ then we have a limit of 64TB on 32 bit systems */
+ ut_a(srv_log_file_size <= ULINT_MAX);
+
+ for (unsigned j = 0; j < i; j++) {
+ sprintf(logfilename + dirnamelen, "ib_logfile%u", j);
+
+ if (!fil_node_create(logfilename,
+ (ulint) srv_log_file_size,
+ SRV_LOG_SPACE_FIRST_ID, FALSE)) {
+ return(DB_ERROR);
+ }
+ }
+
+#ifdef UNIV_LOG_ARCHIVE
+ /* Create the file space object for archived logs. Under
+ MySQL, no archiving ever done. */
+ fil_space_create("arch_log_space", SRV_LOG_SPACE_FIRST_ID + 1,
+ 0, FIL_LOG);
+#endif /* UNIV_LOG_ARCHIVE */
+ log_group_init(0, i, srv_log_file_size * UNIV_PAGE_SIZE,
+ SRV_LOG_SPACE_FIRST_ID,
+ SRV_LOG_SPACE_FIRST_ID + 1);
+ }
+
+files_checked:
+ /* Open all log files and data files in the system
+ tablespace: we keep them open until database
+ shutdown */
+
+ fil_open_log_and_system_tablespace_files();
+
+ err = srv_undo_tablespaces_init(
+ create_new_db,
+ srv_undo_tablespaces,
+ &srv_undo_tablespaces_open);
+
+ /* If the force recovery is set very high then we carry on regardless
+ of all errors. Basically this is fingers crossed mode. */
+
+ if (err != DB_SUCCESS
+ && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) {
+
+ return(err);
+ }
+
+ /* Initialize objects used by dict stats gathering thread, which
+ can also be used by recovery if it tries to drop some table */
+ if (!srv_read_only_mode) {
+ dict_stats_thread_init();
+ }
+
+ trx_sys_file_format_init();
+
+ trx_sys_create();
+
+ if (create_new_db) {
+
+ ut_a(!srv_read_only_mode);
+ init_log_online();
+
+ mtr_start(&mtr);
+
+ fsp_header_init(0, sum_of_new_sizes, &mtr);
+
+ mtr_commit(&mtr);
+
+ /* To maintain backward compatibility we create only
+ the first rollback segment before the double write buffer.
+ All the remaining rollback segments will be created later,
+ after the double write buffer has been created. */
+ trx_sys_create_sys_pages();
+
+ ib_bh = trx_sys_init_at_db_start();
+ n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
+
+ /* The purge system needs to create the purge view and
+ therefore requires that the trx_sys is inited. */
+
+ trx_purge_sys_create(srv_n_purge_threads, ib_bh);
+
+ err = dict_create();
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ srv_startup_is_before_trx_rollback_phase = FALSE;
+
+ bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
+ ut_a(success);
+
+ min_flushed_lsn = max_flushed_lsn = log_get_lsn();
+
+ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+
+ /* Stamp the LSN to the data files. */
+ fil_write_flushed_lsn_to_data_files(max_flushed_lsn, 0);
+
+ fil_flush_file_spaces(FIL_TABLESPACE);
+
+ create_log_files_rename(logfilename, dirnamelen,
+ max_flushed_lsn, logfile0);
+#ifdef UNIV_LOG_ARCHIVE
+ } else if (srv_archive_recovery) {
+
+ ib_logf(IB_LOG_LEVEL_INFO,
+ " Starting archive recovery from a backup...");
+
+ err = recv_recovery_from_archive_start(
+ min_flushed_lsn, srv_archive_recovery_limit_lsn,
+ min_arch_log_no);
+ if (err != DB_SUCCESS) {
+
+ return(DB_ERROR);
+ }
+ /* Since ibuf init is in dict_boot, and ibuf is needed
+ in any disk i/o, first call dict_boot */
+
+ err = dict_boot();
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ ib_bh = trx_sys_init_at_db_start();
+ n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
+
+ /* The purge system needs to create the purge view and
+ therefore requires that the trx_sys is inited. */
+
+ trx_purge_sys_create(srv_n_purge_threads, ib_bh);
+
+ srv_startup_is_before_trx_rollback_phase = FALSE;
+
+ recv_recovery_from_archive_finish();
+#endif /* UNIV_LOG_ARCHIVE */
+ } else {
+
+ /* Check if we support the max format that is stamped
+ on the system tablespace.
+ Note: We are NOT allowed to make any modifications to
+ the TRX_SYS_PAGE_NO page before recovery because this
+ page also contains the max_trx_id etc. important system
+ variables that are required for recovery. We need to
+ ensure that we return the system to a state where normal
+ recovery is guaranteed to work. We do this by
+ invalidating the buffer cache, this will force the
+ reread of the page and restoration to its last known
+ consistent state, this is REQUIRED for the recovery
+ process to work. */
+ err = trx_sys_file_format_max_check(
+ srv_max_file_format_at_startup);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ /* Invalidate the buffer pool to ensure that we reread
+ the page that we read above, during recovery.
+ Note that this is not as heavy weight as it seems. At
+ this point there will be only ONE page in the buf_LRU
+ and there must be no page in the buf_flush list. */
+ buf_pool_invalidate();
+
+ /* We always try to do a recovery, even if the database had
+ been shut down normally: this is the normal startup path */
+
+ err = recv_recovery_from_checkpoint_start(
+ LOG_CHECKPOINT, LSN_MAX,
+ min_flushed_lsn, max_flushed_lsn);
+
+ if (err != DB_SUCCESS) {
+
+ return(DB_ERROR);
+ }
+
+ init_log_online();
+
+ /* Since the insert buffer init is in dict_boot, and the
+ insert buffer is needed in any disk i/o, first we call
+ dict_boot(). Note that trx_sys_init_at_db_start() only needs
+ to access space 0, and the insert buffer at this stage already
+ works for space 0. */
+
+ err = dict_boot();
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ ib_bh = trx_sys_init_at_db_start();
+ n_recovered_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
+
+ /* The purge system needs to create the purge view and
+ therefore requires that the trx_sys is inited. */
+
+ trx_purge_sys_create(srv_n_purge_threads, ib_bh);
+
+ /* recv_recovery_from_checkpoint_finish needs trx lists which
+ are initialized in trx_sys_init_at_db_start(). */
+
+ recv_recovery_from_checkpoint_finish();
+
+ if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
+ /* The following call is necessary for the insert
+ buffer to work with multiple tablespaces. We must
+ know the mapping between space id's and .ibd file
+ names.
+
+ In a crash recovery, we check that the info in data
+ dictionary is consistent with what we already know
+ about space id's from the call of
+ fil_load_single_table_tablespaces().
+
+ In a normal startup, we create the space objects for
+ every table in the InnoDB data dictionary that has
+ an .ibd file.
+
+ We also determine the maximum tablespace id used. */
+ dict_check_t dict_check;
+
+ if (recv_needed_recovery) {
+ dict_check = DICT_CHECK_ALL_LOADED;
+ } else if (n_recovered_trx) {
+ dict_check = DICT_CHECK_SOME_LOADED;
+ } else {
+ dict_check = DICT_CHECK_NONE_LOADED;
+ }
+
+ dict_check_tablespaces_and_store_max_id(dict_check);
+ }
+
+ if (!srv_force_recovery
+ && !recv_sys->found_corrupt_log
+ && (srv_log_file_size_requested != srv_log_file_size
+ || srv_n_log_files_found != srv_n_log_files)) {
+ /* Prepare to replace the redo log files. */
+
+ if (srv_read_only_mode) {
+ ib_logf(IB_LOG_LEVEL_ERROR,
+ "Cannot resize log files "
+ "in read-only mode.");
+ return(DB_READ_ONLY);
+ }
+
+ /* Clean the buffer pool. */
+ bool success = buf_flush_list(
+ ULINT_MAX, LSN_MAX, NULL);
+ ut_a(success);
+
+ RECOVERY_CRASH(1);
+
+ min_flushed_lsn = max_flushed_lsn = log_get_lsn();
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Resizing redo log from %u*%u to %u*%u pages"
+ ", LSN=" LSN_PF,
+ (unsigned) i,
+ (unsigned) srv_log_file_size,
+ (unsigned) srv_n_log_files,
+ (unsigned) srv_log_file_size_requested,
+ max_flushed_lsn);
+
+ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+
+ RECOVERY_CRASH(2);
+
+ /* Flush the old log files. */
+ log_buffer_flush_to_disk();
+ /* If innodb_flush_method=O_DSYNC,
+ we need to explicitly flush the log buffers. */
+ fil_flush(SRV_LOG_SPACE_FIRST_ID);
+
+ ut_ad(max_flushed_lsn == log_get_lsn());
+
+ /* Prohibit redo log writes from any other
+ threads until creating a log checkpoint at the
+ end of create_log_files(). */
+ ut_d(recv_no_log_write = TRUE);
+ ut_ad(!buf_pool_check_no_pending_io());
+
+ RECOVERY_CRASH(3);
+
+ /* Stamp the LSN to the data files. */
+ fil_write_flushed_lsn_to_data_files(
+ max_flushed_lsn, 0);
+
+ fil_flush_file_spaces(FIL_TABLESPACE);
+
+ RECOVERY_CRASH(4);
+
+ /* Close and free the redo log files, so that
+ we can replace them. */
+ fil_close_log_files(true);
+
+ RECOVERY_CRASH(5);
+
+ /* Free the old log file space. */
+ log_group_close_all();
+
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Starting to delete and rewrite log files.");
+
+ srv_log_file_size = srv_log_file_size_requested;
+
+ err = create_log_files(create_new_db, logfilename,
+ dirnamelen, max_flushed_lsn,
+ logfile0);
+
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ /* create_log_files() can increase system lsn that is
+ why FIL_PAGE_FILE_FLUSH_LSN have to be updated */
+ min_flushed_lsn = max_flushed_lsn = log_get_lsn();
+ fil_write_flushed_lsn_to_data_files(min_flushed_lsn, 0);
+ fil_flush_file_spaces(FIL_TABLESPACE);
+
+ create_log_files_rename(logfilename, dirnamelen,
+ log_get_lsn(), logfile0);
+ }
+
+ srv_startup_is_before_trx_rollback_phase = FALSE;
+ recv_recovery_rollback_active();
+
+ /* It is possible that file_format tag has never
+ been set. In this case we initialize it to minimum
+ value. Important to note that we can do it ONLY after
+ we have finished the recovery process so that the
+ image of TRX_SYS_PAGE_NO is not stale. */
+ trx_sys_file_format_tag_init();
+ }
+
+ if (!create_new_db && sum_of_new_sizes > 0) {
+ /* New data file(s) were added */
+ mtr_start(&mtr);
+
+ fsp_header_inc_size(0, sum_of_new_sizes, &mtr);
+
+ mtr_commit(&mtr);
+
+ /* Immediately write the log record about increased tablespace
+ size to disk, so that it is durable even if mysqld would crash
+ quickly */
+
+ log_buffer_flush_to_disk();
+ }
+
+#ifdef UNIV_LOG_ARCHIVE
+ /* Archiving is always off under MySQL */
+ if (!srv_log_archive_on) {
+ ut_a(DB_SUCCESS == log_archive_noarchivelog());
+ } else {
+ bool start_archive;
+
+ mutex_enter(&(log_sys->mutex));
+
+ start_archive = FALSE;
+
+ if (log_sys->archiving_state == LOG_ARCH_OFF) {
+ start_archive = TRUE;
+ }
+
+ mutex_exit(&(log_sys->mutex));
+
+ if (start_archive) {
+ ut_a(DB_SUCCESS == log_archive_archivelog());
+ }
+ }
+#endif /* UNIV_LOG_ARCHIVE */
+
+ /* fprintf(stderr, "Max allowed record size %lu\n",
+ page_get_free_space_of_empty() / 2); */
+
+ if (buf_dblwr == NULL) {
+ /* Create the doublewrite buffer to a new tablespace */
+
+ buf_dblwr_create();
+ }
+
+ /* Here the double write buffer has already been created and so
+ any new rollback segments will be allocated after the double
+ write buffer. The default segment should already exist.
+ We create the new segments only if it's a new database or
+ the database was shutdown cleanly. */
+
+ /* Note: When creating the extra rollback segments during an upgrade
+ we violate the latching order, even if the change buffer is empty.
+ We make an exception in sync0sync.cc and check srv_is_being_started
+ for that violation. It cannot create a deadlock because we are still
+ running in single threaded mode essentially. Only the IO threads
+ should be running at this stage. */
+
+ ut_a(srv_undo_logs > 0);
+ ut_a(srv_undo_logs <= TRX_SYS_N_RSEGS);
+
+ /* The number of rsegs that exist in InnoDB is given by status
+ variable srv_available_undo_logs. The number of rsegs to use can
+ be set using the dynamic global variable srv_undo_logs. */
+
+ srv_available_undo_logs = trx_sys_create_rsegs(
+ srv_undo_tablespaces, srv_undo_logs);
+
+ if (srv_available_undo_logs == ULINT_UNDEFINED) {
+ /* Can only happen if force recovery is set. */
+ ut_a(srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO
+ || srv_read_only_mode);
+ srv_undo_logs = ULONG_UNDEFINED;
+ }
+
+ /* Flush the changes made to TRX_SYS_PAGE by trx_sys_create_rsegs()*/
+ if (!srv_force_recovery && !srv_read_only_mode) {
+ bool success = buf_flush_list(ULINT_MAX, LSN_MAX, NULL);
+ ut_a(success);
+ buf_flush_wait_batch_end(NULL, BUF_FLUSH_LIST);
+ }
+
+ if (!srv_read_only_mode) {
+ /* Create the thread which watches the timeouts
+ for lock waits */
+ os_thread_create(
+ lock_wait_timeout_thread,
+ NULL, thread_ids + 2 + SRV_MAX_N_IO_THREADS);
+
+ /* Create the thread which warns of long semaphore waits */
+ os_thread_create(
+ srv_error_monitor_thread,
+ NULL, thread_ids + 3 + SRV_MAX_N_IO_THREADS);
+
+ /* Create the thread which prints InnoDB monitor info */
+ os_thread_create(
+ srv_monitor_thread,
+ NULL, thread_ids + 4 + SRV_MAX_N_IO_THREADS);
+ }
+
+ /* Create the SYS_FOREIGN and SYS_FOREIGN_COLS system tables */
+ err = dict_create_or_check_foreign_constraint_tables();
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ /* Create the SYS_TABLESPACES system table */
+ err = dict_create_or_check_sys_tablespace();
+ if (err != DB_SUCCESS) {
+ return(err);
+ }
+
+ srv_is_being_started = FALSE;
+
+ ut_a(trx_purge_state() == PURGE_STATE_INIT);
+
+ /* Create the master thread which does purge and other utility
+ operations */
+
+ if (!srv_read_only_mode) {
+
+ os_thread_create(
+ srv_master_thread,
+ NULL, thread_ids + (1 + SRV_MAX_N_IO_THREADS));
+ }
+
+ if (!srv_read_only_mode
+ && srv_force_recovery < SRV_FORCE_NO_BACKGROUND) {
+
+ os_thread_create(
+ srv_purge_coordinator_thread,
+ NULL, thread_ids + 5 + SRV_MAX_N_IO_THREADS);
+
+ ut_a(UT_ARR_SIZE(thread_ids)
+ > 5 + srv_n_purge_threads + SRV_MAX_N_IO_THREADS);
+
+ /* We've already created the purge coordinator thread above. */
+ for (i = 1; i < srv_n_purge_threads; ++i) {
+ os_thread_create(
+ srv_worker_thread, NULL,
+ thread_ids + 5 + i + SRV_MAX_N_IO_THREADS);
+ }
+
+ srv_start_wait_for_purge_to_start();
+
+ } else {
+ purge_sys->state = PURGE_STATE_DISABLED;
+ }
+
+ if (!srv_read_only_mode) {
+ os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
+ }
+
+#ifdef UNIV_DEBUG
+ /* buf_debug_prints = TRUE; */
+#endif /* UNIV_DEBUG */
+ sum_of_data_file_sizes = 0;
+
+ for (i = 0; i < srv_n_data_files; i++) {
+ sum_of_data_file_sizes += srv_data_file_sizes[i];
+ }
+
+ tablespace_size_in_header = fsp_header_get_tablespace_size();
+
+ if (!srv_read_only_mode
+ && !srv_auto_extend_last_data_file
+ && sum_of_data_file_sizes != tablespace_size_in_header) {
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Error: tablespace size"
+ " stored in header is %lu pages, but\n",
+ (ulong) tablespace_size_in_header);
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ "InnoDB: the sum of data file sizes is %lu pages\n",
+ (ulong) sum_of_data_file_sizes);
+
+ if (srv_force_recovery == 0
+ && sum_of_data_file_sizes < tablespace_size_in_header) {
+ /* This is a fatal error, the tail of a tablespace is
+ missing */
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Cannot start InnoDB."
+ " The tail of the system tablespace is\n");
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: missing. Have you edited"
+ " innodb_data_file_path in my.cnf in an\n");
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: inappropriate way, removing"
+ " ibdata files from there?\n");
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: You can set innodb_force_recovery=1"
+ " in my.cnf to force\n");
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: a startup if you are trying"
+ " to recover a badly corrupt database.\n");
+
+ return(DB_ERROR);
+ }
+ }
+
+ if (!srv_read_only_mode
+ && srv_auto_extend_last_data_file
+ && sum_of_data_file_sizes < tablespace_size_in_header) {
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Error: tablespace size stored in header"
+ " is %lu pages, but\n",
+ (ulong) tablespace_size_in_header);
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: the sum of data file sizes"
+ " is only %lu pages\n",
+ (ulong) sum_of_data_file_sizes);
+
+ if (srv_force_recovery == 0) {
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Cannot start InnoDB. The tail of"
+ " the system tablespace is\n");
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: missing. Have you edited"
+ " innodb_data_file_path in my.cnf in an\n");
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: inappropriate way, removing"
+ " ibdata files from there?\n");
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: You can set innodb_force_recovery=1"
+ " in my.cnf to force\n");
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: a startup if you are trying to"
+ " recover a badly corrupt database.\n");
+
+ return(DB_ERROR);
+ }
+ }
+
+ /* Check that os_fast_mutexes work as expected */
+ os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &srv_os_test_mutex);
+
+ if (0 != os_fast_mutex_trylock(&srv_os_test_mutex)) {
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: Error: pthread_mutex_trylock returns"
+ " an unexpected value on\n");
+ ut_print_timestamp(stderr);
+ fprintf(stderr,
+ " InnoDB: success! Cannot continue.\n");
+ exit(1);
+ }
+
+ os_fast_mutex_unlock(&srv_os_test_mutex);
+
+ os_fast_mutex_lock(&srv_os_test_mutex);
+
+ os_fast_mutex_unlock(&srv_os_test_mutex);
+
+ os_fast_mutex_free(&srv_os_test_mutex);
+
+ if (!srv_file_per_table && srv_pass_corrupt_table) {
+ fprintf(stderr, "InnoDB: Warning:"
+ " The option innodb_file_per_table is disabled,"
+ " so using the option innodb_pass_corrupt_table doesn't make sense.\n");
+ }
+
+ if (srv_print_verbose_log) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ " Percona XtraDB (http://www.percona.com) %s started; "
+ "log sequence number " LSN_PF "",
+ INNODB_VERSION_STR, srv_start_lsn);
+ }
+
+ if (srv_force_recovery > 0) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "!!! innodb_force_recovery is set to %lu !!!",
+ (ulong) srv_force_recovery);
+ }
+
+ if (srv_force_recovery == 0) {
+ /* In the insert buffer we may have even bigger tablespace
+ id's, because we may have dropped those tablespaces, but
+ insert buffer merge has not had time to clean the records from
+ the ibuf tree. */
+
+ ibuf_update_max_tablespace_id();
+ }
+
+ if (!srv_read_only_mode) {
+ /* Create the buffer pool dump/load thread */
+ os_thread_create(buf_dump_thread, NULL, NULL);
+
+ /* Create the dict stats gathering thread */
+ os_thread_create(dict_stats_thread, NULL, NULL);
+
+ /* Create the thread that will optimize the FTS sub-system. */
+ fts_optimize_init();
+ }
+
+ srv_was_started = TRUE;
+
+ return(DB_SUCCESS);
+}
+
+#if 0
+/********************************************************************
+Sync all FTS cache before shutdown */
+static
+void
+srv_fts_close(void)
+/*===============*/
+{
+ dict_table_t* table;
+
+ for (table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
+ table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
+ fts_t* fts = table->fts;
+
+ if (fts != NULL) {
+ fts_sync_table(table);
+ }
+ }
+
+ for (table = UT_LIST_GET_FIRST(dict_sys->table_non_LRU);
+ table; table = UT_LIST_GET_NEXT(table_LRU, table)) {
+ fts_t* fts = table->fts;
+
+ if (fts != NULL) {
+ fts_sync_table(table);
+ }
+ }
+}
+#endif
+
+/****************************************************************//**
+Shuts down the InnoDB database.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+innobase_shutdown_for_mysql(void)
+/*=============================*/
+{
+ ulint i;
+
+ if (!srv_was_started) {
+ if (srv_is_being_started) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Shutting down an improperly started, "
+ "or created database!");
+ }
+
+ return(DB_SUCCESS);
+ }
+
+ if (!srv_read_only_mode) {
+ /* Shutdown the FTS optimize sub system. */
+ fts_optimize_start_shutdown();
+
+ fts_optimize_end();
+ }
+
+ /* 1. Flush the buffer pool to disk, write the current lsn to
+ the tablespace header(s), and copy all log data to archive.
+ The step 1 is the real InnoDB shutdown. The remaining steps 2 - ...
+ just free data structures after the shutdown. */
+
+ logs_empty_and_mark_files_at_shutdown();
+
+ if (srv_conc_get_active_threads() != 0) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Query counter shows %ld queries still "
+ "inside InnoDB at shutdown",
+ srv_conc_get_active_threads());
+ }
+
+ /* 2. Make all threads created by InnoDB to exit */
+
+ srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS;
+
+ /* All threads end up waiting for certain events. Put those events
+ to the signaled state. Then the threads will exit themselves after
+ os_event_wait(). */
+
+ for (i = 0; i < 1000; i++) {
+ /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM
+ HERE OR EARLIER */
+
+ if (!srv_read_only_mode) {
+ /* a. Let the lock timeout thread exit */
+ os_event_set(lock_sys->timeout_event);
+
+ /* b. srv error monitor thread exits automatically,
+ no need to do anything here */
+
+ /* c. We wake the master thread so that it exits */
+ srv_wake_master_thread();
+
+ /* d. Wakeup purge threads. */
+ srv_purge_wakeup();
+ }
+
+ /* e. Exit the i/o threads */
+
+ os_aio_wake_all_threads_at_shutdown();
+
+ /* f. dict_stats_thread is signaled from
+ logs_empty_and_mark_files_at_shutdown() and should have
+ already quit or is quitting right now. */
+
+ os_mutex_enter(os_sync_mutex);
+
+ if (os_thread_count == 0) {
+ /* All the threads have exited or are just exiting;
+ NOTE that the threads may not have completed their
+ exit yet. Should we use pthread_join() to make sure
+ they have exited? If we did, we would have to
+ remove the pthread_detach() from
+ os_thread_exit(). Now we just sleep 0.1
+ seconds and hope that is enough! */
+
+ os_mutex_exit(os_sync_mutex);
+
+ os_thread_sleep(100000);
+
+ break;
+ }
+
+ os_mutex_exit(os_sync_mutex);
+
+ os_thread_sleep(100000);
+ }
+
+ if (i == 1000) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "%lu threads created by InnoDB"
+ " had not exited at shutdown!",
+ (ulong) os_thread_count);
+ }
+
+ if (srv_monitor_file) {
+ fclose(srv_monitor_file);
+ srv_monitor_file = 0;
+ if (srv_monitor_file_name) {
+ unlink(srv_monitor_file_name);
+ mem_free(srv_monitor_file_name);
+ }
+ }
+
+ if (srv_dict_tmpfile) {
+ fclose(srv_dict_tmpfile);
+ srv_dict_tmpfile = 0;
+ }
+
+ if (srv_misc_tmpfile) {
+ fclose(srv_misc_tmpfile);
+ srv_misc_tmpfile = 0;
+ }
+
+ if (!srv_read_only_mode) {
+ dict_stats_thread_deinit();
+ }
+
+ /* This must be disabled before closing the buffer pool
+ and closing the data dictionary. */
+ btr_search_disable();
+
+ ibuf_close();
+ log_shutdown();
+ lock_sys_close();
+ trx_sys_file_format_close();
+ trx_sys_close();
+
+ /* We don't create these mutexes in RO mode because we don't create
+ the temp files that the cover. */
+ if (!srv_read_only_mode) {
+ mutex_free(&srv_monitor_file_mutex);
+ mutex_free(&srv_dict_tmpfile_mutex);
+ mutex_free(&srv_misc_tmpfile_mutex);
+ }
+
+ dict_close();
+ btr_search_sys_free();
+
+ /* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside
+ them */
+ os_aio_free();
+ que_close();
+ row_mysql_close();
+ srv_mon_free();
+ sync_close();
+ srv_free();
+ fil_close();
+
+ /* 4. Free the os_conc_mutex and all os_events and os_mutexes */
+
+ os_sync_free();
+
+ /* 5. Free all allocated memory */
+
+ pars_lexer_close();
+ log_mem_free();
+ buf_pool_free(srv_buf_pool_instances);
+ mem_close();
+
+ /* ut_free_all_mem() frees all allocated memory not freed yet
+ in shutdown, and it will also free the ut_list_mutex, so it
+ should be the last one for all operation */
+ ut_free_all_mem();
+
+ if (os_thread_count != 0
+ || os_event_count != 0
+ || os_mutex_count != 0
+ || os_fast_mutex_count != 0) {
+ ib_logf(IB_LOG_LEVEL_WARN,
+ "Some resources were not cleaned up in shutdown: "
+ "threads %lu, events %lu, os_mutexes %lu, "
+ "os_fast_mutexes %lu",
+ (ulong) os_thread_count, (ulong) os_event_count,
+ (ulong) os_mutex_count, (ulong) os_fast_mutex_count);
+ }
+
+ if (dict_foreign_err_file) {
+ fclose(dict_foreign_err_file);
+ }
+
+ if (srv_print_verbose_log) {
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Shutdown completed; log sequence number " LSN_PF "",
+ srv_shutdown_lsn);
+ }
+
+ srv_was_started = FALSE;
+ srv_start_has_been_called = FALSE;
+
+ return(DB_SUCCESS);
+}
+#endif /* !UNIV_HOTBACKUP */
+
+
+/********************************************************************
+Signal all per-table background threads to shutdown, and wait for them to do
+so. */
+UNIV_INTERN
+void
+srv_shutdown_table_bg_threads(void)
+/*===============================*/
+{
+ dict_table_t* table;
+ dict_table_t* first;
+ dict_table_t* last = NULL;
+
+ mutex_enter(&dict_sys->mutex);
+
+ /* Signal all threads that they should stop. */
+ table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
+ first = table;
+ while (table) {
+ dict_table_t* next;
+ fts_t* fts = table->fts;
+
+ if (fts != NULL) {
+ fts_start_shutdown(table, fts);
+ }
+
+ next = UT_LIST_GET_NEXT(table_LRU, table);
+
+ if (!next) {
+ last = table;
+ }
+
+ table = next;
+ }
+
+ /* We must release dict_sys->mutex here; if we hold on to it in the
+ loop below, we will deadlock if any of the background threads try to
+ acquire it (for example, the FTS thread by calling que_eval_sql).
+
+ Releasing it here and going through dict_sys->table_LRU without
+ holding it is safe because:
+
+ a) MySQL only starts the shutdown procedure after all client
+ threads have been disconnected and no new ones are accepted, so no
+ new tables are added or old ones dropped.
+
+ b) Despite its name, the list is not LRU, and the order stays
+ fixed.
+
+ To safeguard against the above assumptions ever changing, we store
+ the first and last items in the list above, and then check that
+ they've stayed the same below. */
+
+ mutex_exit(&dict_sys->mutex);
+
+ /* Wait for the threads of each table to stop. This is not inside
+ the above loop, because by signaling all the threads first we can
+ overlap their shutting down delays. */
+ table = UT_LIST_GET_FIRST(dict_sys->table_LRU);
+ ut_a(first == table);
+ while (table) {
+ dict_table_t* next;
+ fts_t* fts = table->fts;
+
+ if (fts != NULL) {
+ fts_shutdown(table, fts);
+ }
+
+ next = UT_LIST_GET_NEXT(table_LRU, table);
+
+ if (table == last) {
+ ut_a(!next);
+ }
+
+ table = next;
+ }
+}
+
+/*****************************************************************//**
+Get the meta-data filename from the table name. */
+UNIV_INTERN
+void
+srv_get_meta_data_filename(
+/*=======================*/
+ dict_table_t* table, /*!< in: table */
+ char* filename, /*!< out: filename */
+ ulint max_len) /*!< in: filename max length */
+{
+ ulint len;
+ char* path;
+ char* suffix;
+ static const ulint suffix_len = strlen(".cfg");
+
+ if (DICT_TF_HAS_DATA_DIR(table->flags)) {
+ dict_get_and_save_data_dir_path(table, false);
+ ut_a(table->data_dir_path);
+
+ path = os_file_make_remote_pathname(
+ table->data_dir_path, table->name, "cfg");
+ } else {
+ path = fil_make_ibd_name(table->name, false);
+ }
+
+ ut_a(path);
+ len = ut_strlen(path);
+ ut_a(max_len >= len);
+
+ suffix = path + (len - suffix_len);
+ if (strncmp(suffix, ".cfg", suffix_len) == 0) {
+ strcpy(filename, path);
+ } else {
+ ut_ad(strncmp(suffix, ".ibd", suffix_len) == 0);
+
+ strncpy(filename, path, len - suffix_len);
+ suffix = filename + (len - suffix_len);
+ strcpy(suffix, ".cfg");
+ }
+
+ mem_free(path);
+
+ srv_normalize_path_for_win(filename);
+}