summaryrefslogtreecommitdiff
path: root/storage/xtradb/sync
diff options
context:
space:
mode:
authorunknown <knielsen@knielsen-hq.org>2009-06-09 13:16:11 +0200
committerunknown <knielsen@knielsen-hq.org>2009-06-09 13:16:11 +0200
commita6b7f71329ceb7d0188572f494b5d1a1f0461fc5 (patch)
treed7e62c1af5118cd3ec9346de436569e907fcc51d /storage/xtradb/sync
parentb125770aaadd09e839ad9211047e88095984308b (diff)
parent107072563d771422c9bbb9aeeedce8ae19c5b838 (diff)
downloadmariadb-git-a6b7f71329ceb7d0188572f494b5d1a1f0461fc5.tar.gz
Import Percona XtraDB into the MariaDB source tree.
Diffstat (limited to 'storage/xtradb/sync')
-rw-r--r--storage/xtradb/sync/sync0arr.c1045
-rw-r--r--storage/xtradb/sync/sync0rw.c1285
-rw-r--r--storage/xtradb/sync/sync0sync.c1411
3 files changed, 3741 insertions, 0 deletions
diff --git a/storage/xtradb/sync/sync0arr.c b/storage/xtradb/sync/sync0arr.c
new file mode 100644
index 00000000000..7edbbda5b55
--- /dev/null
+++ b/storage/xtradb/sync/sync0arr.c
@@ -0,0 +1,1045 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************
+The wait array used in synchronization primitives
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "sync0arr.h"
+#ifdef UNIV_NONINL
+#include "sync0arr.ic"
+#endif
+
+#include "sync0sync.h"
+#include "sync0rw.h"
+#include "os0sync.h"
+#include "os0file.h"
+#include "srv0srv.h"
+
+/*
+ WAIT ARRAY
+ ==========
+
+The wait array consists of cells each of which has an
+an operating system event object created for it. The threads
+waiting for a mutex, for example, can reserve a cell
+in the array and suspend themselves to wait for the event
+to become signaled. When using the wait array, remember to make
+sure that some thread holding the synchronization object
+will eventually know that there is a waiter in the array and
+signal the object, to prevent infinite wait.
+Why we chose to implement a wait array? First, to make
+mutexes fast, we had to code our own implementation of them,
+which only in usually uncommon cases resorts to using
+slow operating system primitives. Then we had the choice of
+assigning a unique OS event for each mutex, which would
+be simpler, or using a global wait array. In some operating systems,
+the global wait array solution is more efficient and flexible,
+because we can do with a very small number of OS events,
+say 200. In NT 3.51, allocating events seems to be a quadratic
+algorithm, because 10 000 events are created fast, but
+100 000 events takes a couple of minutes to create.
+
+As of 5.0.30 the above mentioned design is changed. Since now
+OS can handle millions of wait events efficiently, we no longer
+have this concept of each cell of wait array having one event.
+Instead, now the event that a thread wants to wait on is embedded
+in the wait object (mutex or rw_lock). We still keep the global
+wait array for the sake of diagnostics and also to avoid infinite
+wait The error_monitor thread scans the global wait array to signal
+any waiting threads who have missed the signal. */
+
+/* A cell where an individual thread may wait suspended
+until a resource is released. The suspending is implemented
+using an operating system event semaphore. */
+struct sync_cell_struct {
+ void* wait_object; /* pointer to the object the
+ thread is waiting for; if NULL
+ the cell is free for use */
+ mutex_t* old_wait_mutex; /* the latest wait mutex in cell */
+ rw_lock_t* old_wait_rw_lock;/* the latest wait rw-lock in cell */
+ ulint request_type; /* lock type requested on the
+ object */
+ const char* file; /* in debug version file where
+ requested */
+ ulint line; /* in debug version line where
+ requested */
+ os_thread_id_t thread; /* thread id of this waiting
+ thread */
+ ibool waiting; /* TRUE if the thread has already
+ called sync_array_event_wait
+ on this cell */
+ ib_int64_t signal_count; /* We capture the signal_count
+ of the wait_object when we
+ reset the event. This value is
+ then passed on to os_event_wait
+ and we wait only if the event
+ has not been signalled in the
+ period between the reset and
+ wait call. */
+ time_t reservation_time;/* time when the thread reserved
+ the wait cell */
+};
+
+/* NOTE: It is allowed for a thread to wait
+for an event allocated for the array without owning the
+protecting mutex (depending on the case: OS or database mutex), but
+all changes (set or reset) to the state of the event must be made
+while owning the mutex. */
+struct sync_array_struct {
+ ulint n_reserved; /* number of currently reserved
+ cells in the wait array */
+ ulint n_cells; /* number of cells in the
+ wait array */
+ sync_cell_t* array; /* pointer to wait array */
+ ulint protection; /* this flag tells which
+ mutex protects the data */
+ mutex_t mutex; /* possible database mutex
+ protecting this data structure */
+ os_mutex_t os_mutex; /* Possible operating system mutex
+ protecting the data structure.
+ As this data structure is used in
+ constructing the database mutex,
+ to prevent infinite recursion
+ in implementation, we fall back to
+ an OS mutex. */
+ ulint sg_count; /* count of how many times an
+ object has been signalled */
+ ulint res_count; /* count of cell reservations
+ since creation of the array */
+};
+
+#ifdef UNIV_SYNC_DEBUG
+/**********************************************************************
+This function is called only in the debug version. Detects a deadlock
+of one or more threads because of waits of semaphores. */
+static
+ibool
+sync_array_detect_deadlock(
+/*=======================*/
+ /* out: TRUE if deadlock detected */
+ sync_array_t* arr, /* in: wait array; NOTE! the caller must
+ own the mutex to array */
+ sync_cell_t* start, /* in: cell where recursive search started */
+ sync_cell_t* cell, /* in: cell to search */
+ ulint depth); /* in: recursion depth */
+#endif /* UNIV_SYNC_DEBUG */
+
+/*********************************************************************
+Gets the nth cell in array. */
+static
+sync_cell_t*
+sync_array_get_nth_cell(
+/*====================*/
+ /* out: cell */
+ sync_array_t* arr, /* in: sync array */
+ ulint n) /* in: index */
+{
+ ut_a(arr);
+ ut_a(n < arr->n_cells);
+
+ return(arr->array + n);
+}
+
+/**********************************************************************
+Reserves the mutex semaphore protecting a sync array. */
+static
+void
+sync_array_enter(
+/*=============*/
+ sync_array_t* arr) /* in: sync wait array */
+{
+ ulint protection;
+
+ protection = arr->protection;
+
+ if (protection == SYNC_ARRAY_OS_MUTEX) {
+ os_mutex_enter(arr->os_mutex);
+ } else if (protection == SYNC_ARRAY_MUTEX) {
+ mutex_enter(&(arr->mutex));
+ } else {
+ ut_error;
+ }
+}
+
+/**********************************************************************
+Releases the mutex semaphore protecting a sync array. */
+static
+void
+sync_array_exit(
+/*============*/
+ sync_array_t* arr) /* in: sync wait array */
+{
+ ulint protection;
+
+ protection = arr->protection;
+
+ if (protection == SYNC_ARRAY_OS_MUTEX) {
+ os_mutex_exit(arr->os_mutex);
+ } else if (protection == SYNC_ARRAY_MUTEX) {
+ mutex_exit(&(arr->mutex));
+ } else {
+ ut_error;
+ }
+}
+
+/***********************************************************************
+Creates a synchronization wait array. It is protected by a mutex
+which is automatically reserved when the functions operating on it
+are called. */
+UNIV_INTERN
+sync_array_t*
+sync_array_create(
+/*==============*/
+ /* out, own: created wait array */
+ ulint n_cells, /* in: number of cells in the array
+ to create */
+ ulint protection) /* in: either SYNC_ARRAY_OS_MUTEX or
+ SYNC_ARRAY_MUTEX: determines the type
+ of mutex protecting the data structure */
+{
+ sync_array_t* arr;
+ sync_cell_t* cell_array;
+ sync_cell_t* cell;
+ ulint i;
+
+ ut_a(n_cells > 0);
+
+ /* Allocate memory for the data structures */
+ arr = ut_malloc(sizeof(sync_array_t));
+
+ cell_array = ut_malloc(sizeof(sync_cell_t) * n_cells);
+
+ arr->n_cells = n_cells;
+ arr->n_reserved = 0;
+ arr->array = cell_array;
+ arr->protection = protection;
+ arr->sg_count = 0;
+ arr->res_count = 0;
+
+ /* Then create the mutex to protect the wait array complex */
+ if (protection == SYNC_ARRAY_OS_MUTEX) {
+ arr->os_mutex = os_mutex_create(NULL);
+ } else if (protection == SYNC_ARRAY_MUTEX) {
+ mutex_create(&arr->mutex, SYNC_NO_ORDER_CHECK);
+ } else {
+ ut_error;
+ }
+
+ for (i = 0; i < n_cells; i++) {
+ cell = sync_array_get_nth_cell(arr, i);
+ cell->wait_object = NULL;
+ cell->waiting = FALSE;
+ cell->signal_count = 0;
+ }
+
+ return(arr);
+}
+
+/**********************************************************************
+Frees the resources in a wait array. */
+UNIV_INTERN
+void
+sync_array_free(
+/*============*/
+ sync_array_t* arr) /* in, own: sync wait array */
+{
+ ulint protection;
+
+ ut_a(arr->n_reserved == 0);
+
+ sync_array_validate(arr);
+
+ protection = arr->protection;
+
+ /* Release the mutex protecting the wait array complex */
+
+ if (protection == SYNC_ARRAY_OS_MUTEX) {
+ os_mutex_free(arr->os_mutex);
+ } else if (protection == SYNC_ARRAY_MUTEX) {
+ mutex_free(&(arr->mutex));
+ } else {
+ ut_error;
+ }
+
+ ut_free(arr->array);
+ ut_free(arr);
+}
+
+/************************************************************************
+Validates the integrity of the wait array. Checks
+that the number of reserved cells equals the count variable. */
+UNIV_INTERN
+void
+sync_array_validate(
+/*================*/
+ sync_array_t* arr) /* in: sync wait array */
+{
+ ulint i;
+ sync_cell_t* cell;
+ ulint count = 0;
+
+ sync_array_enter(arr);
+
+ for (i = 0; i < arr->n_cells; i++) {
+ cell = sync_array_get_nth_cell(arr, i);
+ if (cell->wait_object != NULL) {
+ count++;
+ }
+ }
+
+ ut_a(count == arr->n_reserved);
+
+ sync_array_exit(arr);
+}
+
+/***********************************************************************
+Returns the event that the thread owning the cell waits for. */
+static
+os_event_t
+sync_cell_get_event(
+/*================*/
+ sync_cell_t* cell) /* in: non-empty sync array cell */
+{
+ ulint type = cell->request_type;
+
+ if (type == SYNC_MUTEX) {
+ return(((mutex_t *) cell->wait_object)->event);
+ } else if (type == RW_LOCK_WAIT_EX) {
+ return(((rw_lock_t *) cell->wait_object)->wait_ex_event);
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ } else if (type == RW_LOCK_SHARED) {
+ return(((rw_lock_t *) cell->wait_object)->s_event);
+ } else { /* RW_LOCK_EX */
+ return(((rw_lock_t *) cell->wait_object)->x_event);
+#else
+ } else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */
+ return(((rw_lock_t *) cell->wait_object)->event);
+#endif
+ }
+}
+
+/**********************************************************************
+Reserves a wait array cell for waiting for an object.
+The event of the cell is reset to nonsignalled state. */
+UNIV_INTERN
+void
+sync_array_reserve_cell(
+/*====================*/
+ sync_array_t* arr, /* in: wait array */
+ void* object, /* in: pointer to the object to wait for */
+ ulint type, /* in: lock request type */
+ const char* file, /* in: file where requested */
+ ulint line, /* in: line where requested */
+ ulint* index) /* out: index of the reserved cell */
+{
+ sync_cell_t* cell;
+ os_event_t event;
+ ulint i;
+
+ ut_a(object);
+ ut_a(index);
+
+ sync_array_enter(arr);
+
+ arr->res_count++;
+
+ /* Reserve a new cell. */
+ for (i = 0; i < arr->n_cells; i++) {
+ cell = sync_array_get_nth_cell(arr, i);
+
+ if (cell->wait_object == NULL) {
+
+ cell->waiting = FALSE;
+ cell->wait_object = object;
+
+ if (type == SYNC_MUTEX) {
+ cell->old_wait_mutex = object;
+ } else {
+ cell->old_wait_rw_lock = object;
+ }
+
+ cell->request_type = type;
+
+ cell->file = file;
+ cell->line = line;
+
+ arr->n_reserved++;
+
+ *index = i;
+
+ sync_array_exit(arr);
+
+ /* Make sure the event is reset and also store
+ the value of signal_count at which the event
+ was reset. */
+ event = sync_cell_get_event(cell);
+ cell->signal_count = os_event_reset(event);
+
+ cell->reservation_time = time(NULL);
+
+ cell->thread = os_thread_get_curr_id();
+
+ return;
+ }
+ }
+
+ ut_error; /* No free cell found */
+
+ return;
+}
+
+/**********************************************************************
+This function should be called when a thread starts to wait on
+a wait array cell. In the debug version this function checks
+if the wait for a semaphore will result in a deadlock, in which
+case prints info and asserts. */
+UNIV_INTERN
+void
+sync_array_wait_event(
+/*==================*/
+ sync_array_t* arr, /* in: wait array */
+ ulint index) /* in: index of the reserved cell */
+{
+ sync_cell_t* cell;
+ os_event_t event;
+
+ ut_a(arr);
+
+ sync_array_enter(arr);
+
+ cell = sync_array_get_nth_cell(arr, index);
+
+ ut_a(cell->wait_object);
+ ut_a(!cell->waiting);
+ ut_ad(os_thread_get_curr_id() == cell->thread);
+
+ event = sync_cell_get_event(cell);
+ cell->waiting = TRUE;
+
+#ifdef UNIV_SYNC_DEBUG
+
+ /* We use simple enter to the mutex below, because if
+ we cannot acquire it at once, mutex_enter would call
+ recursively sync_array routines, leading to trouble.
+ rw_lock_debug_mutex freezes the debug lists. */
+
+ rw_lock_debug_mutex_enter();
+
+ if (TRUE == sync_array_detect_deadlock(arr, cell, cell, 0)) {
+
+ fputs("########################################\n", stderr);
+ ut_error;
+ }
+
+ rw_lock_debug_mutex_exit();
+#endif
+ sync_array_exit(arr);
+
+ os_event_wait_low(event, cell->signal_count);
+
+ sync_array_free_cell(arr, index);
+}
+
+/**********************************************************************
+Reports info of a wait array cell. */
+static
+void
+sync_array_cell_print(
+/*==================*/
+ FILE* file, /* in: file where to print */
+ sync_cell_t* cell) /* in: sync cell */
+{
+ mutex_t* mutex;
+ rw_lock_t* rwlock;
+ ulint type;
+ ulint writer;
+
+ type = cell->request_type;
+
+ fprintf(file,
+ "--Thread %lu has waited at %s line %lu"
+ " for %.2f seconds the semaphore:\n",
+ (ulong) os_thread_pf(cell->thread), cell->file,
+ (ulong) cell->line,
+ difftime(time(NULL), cell->reservation_time));
+
+ if (type == SYNC_MUTEX) {
+ /* We use old_wait_mutex in case the cell has already
+ been freed meanwhile */
+ mutex = cell->old_wait_mutex;
+
+ fprintf(file,
+ "Mutex at %p created file %s line %lu, lock var %lu\n"
+#ifdef UNIV_SYNC_DEBUG
+ "Last time reserved in file %s line %lu, "
+#endif /* UNIV_SYNC_DEBUG */
+ "waiters flag %lu\n",
+ (void*) mutex, mutex->cfile_name, (ulong) mutex->cline,
+ (ulong) mutex->lock_word,
+#ifdef UNIV_SYNC_DEBUG
+ mutex->file_name, (ulong) mutex->line,
+#endif /* UNIV_SYNC_DEBUG */
+ (ulong) mutex->waiters);
+
+ } else if (type == RW_LOCK_EX
+ || type == RW_LOCK_WAIT_EX
+ || type == RW_LOCK_SHARED) {
+
+ fputs(type == RW_LOCK_SHARED ? "S-lock on" : "X-lock on", file);
+
+ rwlock = cell->old_wait_rw_lock;
+
+ fprintf(file,
+ " RW-latch at %p created in file %s line %lu\n",
+ (void*) rwlock, rwlock->cfile_name,
+ (ulong) rwlock->cline);
+ writer = rw_lock_get_writer(rwlock);
+ if (writer != RW_LOCK_NOT_LOCKED) {
+ fprintf(file,
+ "a writer (thread id %lu) has"
+ " reserved it in mode %s",
+ (ulong) os_thread_pf(rwlock->writer_thread),
+ writer == RW_LOCK_EX
+ ? " exclusive\n"
+ : " wait exclusive\n");
+ }
+
+ fprintf(file,
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ "number of readers %lu, s_waiters flag %lu, x_waiters flag %lu, "
+#else
+ "number of readers %lu, waiters flag %lu, "
+#endif
+ "lock_word: %lx\n"
+ "Last time read locked in file %s line %lu\n"
+ "Last time write locked in file %s line %lu\n",
+ (ulong) rw_lock_get_reader_count(rwlock),
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ (ulong) rwlock->s_waiters,
+ (ulong) (rwlock->x_waiters || rwlock->wait_ex_waiters),
+#else
+ (ulong) rwlock->waiters,
+#endif
+ rwlock->lock_word,
+ rwlock->last_s_file_name,
+ (ulong) rwlock->last_s_line,
+ rwlock->last_x_file_name,
+ (ulong) rwlock->last_x_line);
+ } else {
+ ut_error;
+ }
+
+ if (!cell->waiting) {
+ fputs("wait has ended\n", file);
+ }
+}
+
+#ifdef UNIV_SYNC_DEBUG
+/**********************************************************************
+Looks for a cell with the given thread id. */
+static
+sync_cell_t*
+sync_array_find_thread(
+/*===================*/
+ /* out: pointer to cell or NULL
+ if not found */
+ sync_array_t* arr, /* in: wait array */
+ os_thread_id_t thread) /* in: thread id */
+{
+ ulint i;
+ sync_cell_t* cell;
+
+ for (i = 0; i < arr->n_cells; i++) {
+
+ cell = sync_array_get_nth_cell(arr, i);
+
+ if (cell->wait_object != NULL
+ && os_thread_eq(cell->thread, thread)) {
+
+ return(cell); /* Found */
+ }
+ }
+
+ return(NULL); /* Not found */
+}
+
+/**********************************************************************
+Recursion step for deadlock detection. */
+static
+ibool
+sync_array_deadlock_step(
+/*=====================*/
+ /* out: TRUE if deadlock detected */
+ sync_array_t* arr, /* in: wait array; NOTE! the caller must
+ own the mutex to array */
+ sync_cell_t* start, /* in: cell where recursive search
+ started */
+ os_thread_id_t thread, /* in: thread to look at */
+ ulint pass, /* in: pass value */
+ ulint depth) /* in: recursion depth */
+{
+ sync_cell_t* new;
+ ibool ret;
+
+ depth++;
+
+ if (pass != 0) {
+ /* If pass != 0, then we do not know which threads are
+ responsible of releasing the lock, and no deadlock can
+ be detected. */
+
+ return(FALSE);
+ }
+
+ new = sync_array_find_thread(arr, thread);
+
+ if (new == start) {
+ /* Stop running of other threads */
+
+ ut_dbg_stop_threads = TRUE;
+
+ /* Deadlock */
+ fputs("########################################\n"
+ "DEADLOCK of threads detected!\n", stderr);
+
+ return(TRUE);
+
+ } else if (new) {
+ ret = sync_array_detect_deadlock(arr, start, new, depth);
+
+ if (ret) {
+ return(TRUE);
+ }
+ }
+ return(FALSE);
+}
+
+/**********************************************************************
+This function is called only in the debug version. Detects a deadlock
+of one or more threads because of waits of semaphores. */
+static
+ibool
+sync_array_detect_deadlock(
+/*=======================*/
+ /* out: TRUE if deadlock detected */
+ sync_array_t* arr, /* in: wait array; NOTE! the caller must
+ own the mutex to array */
+ sync_cell_t* start, /* in: cell where recursive search started */
+ sync_cell_t* cell, /* in: cell to search */
+ ulint depth) /* in: recursion depth */
+{
+ mutex_t* mutex;
+ rw_lock_t* lock;
+ os_thread_id_t thread;
+ ibool ret;
+ rw_lock_debug_t*debug;
+
+ ut_a(arr);
+ ut_a(start);
+ ut_a(cell);
+ ut_ad(cell->wait_object);
+ ut_ad(os_thread_get_curr_id() == start->thread);
+ ut_ad(depth < 100);
+
+ depth++;
+
+ if (!cell->waiting) {
+
+ return(FALSE); /* No deadlock here */
+ }
+
+ if (cell->request_type == SYNC_MUTEX) {
+
+ mutex = cell->wait_object;
+
+ if (mutex_get_lock_word(mutex) != 0) {
+
+ thread = mutex->thread_id;
+
+ /* Note that mutex->thread_id above may be
+ also OS_THREAD_ID_UNDEFINED, because the
+ thread which held the mutex maybe has not
+ yet updated the value, or it has already
+ released the mutex: in this case no deadlock
+ can occur, as the wait array cannot contain
+ a thread with ID_UNDEFINED value. */
+
+ ret = sync_array_deadlock_step(arr, start, thread, 0,
+ depth);
+ if (ret) {
+ fprintf(stderr,
+ "Mutex %p owned by thread %lu file %s line %lu\n",
+ mutex, (ulong) os_thread_pf(mutex->thread_id),
+ mutex->file_name, (ulong) mutex->line);
+ sync_array_cell_print(stderr, cell);
+
+ return(TRUE);
+ }
+ }
+
+ return(FALSE); /* No deadlock */
+
+ } else if (cell->request_type == RW_LOCK_EX
+ || cell->request_type == RW_LOCK_WAIT_EX) {
+
+ lock = cell->wait_object;
+
+ debug = UT_LIST_GET_FIRST(lock->debug_list);
+
+ while (debug != NULL) {
+
+ thread = debug->thread_id;
+
+ if (((debug->lock_type == RW_LOCK_EX)
+ && !os_thread_eq(thread, cell->thread))
+ || ((debug->lock_type == RW_LOCK_WAIT_EX)
+ && !os_thread_eq(thread, cell->thread))
+ || (debug->lock_type == RW_LOCK_SHARED)) {
+
+ /* The (wait) x-lock request can block
+ infinitely only if someone (can be also cell
+ thread) is holding s-lock, or someone
+ (cannot be cell thread) (wait) x-lock, and
+ he is blocked by start thread */
+
+ ret = sync_array_deadlock_step(
+ arr, start, thread, debug->pass,
+ depth);
+ if (ret) {
+print:
+ fprintf(stderr, "rw-lock %p ",
+ (void*) lock);
+ sync_array_cell_print(stderr, cell);
+ rw_lock_debug_print(debug);
+ return(TRUE);
+ }
+ }
+
+ debug = UT_LIST_GET_NEXT(list, debug);
+ }
+
+ return(FALSE);
+
+ } else if (cell->request_type == RW_LOCK_SHARED) {
+
+ lock = cell->wait_object;
+ debug = UT_LIST_GET_FIRST(lock->debug_list);
+
+ while (debug != NULL) {
+
+ thread = debug->thread_id;
+
+ if ((debug->lock_type == RW_LOCK_EX)
+ || (debug->lock_type == RW_LOCK_WAIT_EX)) {
+
+ /* The s-lock request can block infinitely
+ only if someone (can also be cell thread) is
+ holding (wait) x-lock, and he is blocked by
+ start thread */
+
+ ret = sync_array_deadlock_step(
+ arr, start, thread, debug->pass,
+ depth);
+ if (ret) {
+ goto print;
+ }
+ }
+
+ debug = UT_LIST_GET_NEXT(list, debug);
+ }
+
+ return(FALSE);
+
+ } else {
+ ut_error;
+ }
+
+ return(TRUE); /* Execution never reaches this line: for compiler
+ fooling only */
+}
+#endif /* UNIV_SYNC_DEBUG */
+
+/**********************************************************************
+Determines if we can wake up the thread waiting for a sempahore. */
+static
+ibool
+sync_arr_cell_can_wake_up(
+/*======================*/
+ sync_cell_t* cell) /* in: cell to search */
+{
+ mutex_t* mutex;
+ rw_lock_t* lock;
+
+ if (cell->request_type == SYNC_MUTEX) {
+
+ mutex = cell->wait_object;
+
+ if (mutex_get_lock_word(mutex) == 0) {
+
+ return(TRUE);
+ }
+
+ } else if (cell->request_type == RW_LOCK_EX) {
+
+ lock = cell->wait_object;
+
+ if (lock->lock_word > 0) {
+ /* Either unlocked or only read locked. */
+
+ return(TRUE);
+ }
+
+ } else if (cell->request_type == RW_LOCK_WAIT_EX) {
+
+ lock = cell->wait_object;
+
+ /* lock_word == 0 means all readers have left */
+ if (lock->lock_word == 0) {
+
+ return(TRUE);
+ }
+ } else if (cell->request_type == RW_LOCK_SHARED) {
+ lock = cell->wait_object;
+
+ /* lock_word > 0 means no writer or reserved writer */
+ if (lock->lock_word > 0) {
+
+ return(TRUE);
+ }
+ }
+
+ return(FALSE);
+}
+
+/**********************************************************************
+Frees the cell. NOTE! sync_array_wait_event frees the cell
+automatically! */
+UNIV_INTERN
+void
+sync_array_free_cell(
+/*=================*/
+ sync_array_t* arr, /* in: wait array */
+ ulint index) /* in: index of the cell in array */
+{
+ sync_cell_t* cell;
+
+ sync_array_enter(arr);
+
+ cell = sync_array_get_nth_cell(arr, index);
+
+ ut_a(cell->wait_object != NULL);
+
+ cell->waiting = FALSE;
+ cell->wait_object = NULL;
+ cell->signal_count = 0;
+
+ ut_a(arr->n_reserved > 0);
+ arr->n_reserved--;
+
+ sync_array_exit(arr);
+}
+
+/**************************************************************************
+Increments the signalled count. */
+UNIV_INTERN
+void
+sync_array_object_signalled(
+/*========================*/
+ sync_array_t* arr) /* in: wait array */
+{
+#ifdef HAVE_GCC_ATOMIC_BUILTINS
+ (void) os_atomic_increment(&arr->sg_count, 1);
+#else
+ sync_array_enter(arr);
+
+ arr->sg_count++;
+
+ sync_array_exit(arr);
+#endif
+}
+
+/**************************************************************************
+If the wakeup algorithm does not work perfectly at semaphore relases,
+this function will do the waking (see the comment in mutex_exit). This
+function should be called about every 1 second in the server.
+
+Note that there's a race condition between this thread and mutex_exit
+changing the lock_word and calling signal_object, so sometimes this finds
+threads to wake up even when nothing has gone wrong. */
+UNIV_INTERN
+void
+sync_arr_wake_threads_if_sema_free(void)
+/*====================================*/
+{
+ sync_array_t* arr = sync_primary_wait_array;
+ sync_cell_t* cell;
+ ulint count;
+ ulint i;
+ os_event_t event;
+
+ sync_array_enter(arr);
+
+ i = 0;
+ count = 0;
+
+ while (count < arr->n_reserved) {
+
+ cell = sync_array_get_nth_cell(arr, i);
+ i++;
+
+ if (cell->wait_object == NULL) {
+ continue;
+ }
+ count++;
+
+ if (sync_arr_cell_can_wake_up(cell)) {
+
+ event = sync_cell_get_event(cell);
+
+ os_event_set(event);
+ }
+
+ }
+
+ sync_array_exit(arr);
+}
+
+/**************************************************************************
+Prints warnings of long semaphore waits to stderr. */
+UNIV_INTERN
+ibool
+sync_array_print_long_waits(void)
+/*=============================*/
+ /* out: TRUE if fatal semaphore wait threshold
+ was exceeded */
+{
+ sync_cell_t* cell;
+ ibool old_val;
+ ibool noticed = FALSE;
+ ulint i;
+ ulint fatal_timeout = srv_fatal_semaphore_wait_threshold;
+ ibool fatal = FALSE;
+
+ for (i = 0; i < sync_primary_wait_array->n_cells; i++) {
+
+ cell = sync_array_get_nth_cell(sync_primary_wait_array, i);
+
+ if (cell->wait_object != NULL && cell->waiting
+ && difftime(time(NULL), cell->reservation_time) > 240) {
+ fputs("InnoDB: Warning: a long semaphore wait:\n",
+ stderr);
+ sync_array_cell_print(stderr, cell);
+ noticed = TRUE;
+ }
+
+ if (cell->wait_object != NULL && cell->waiting
+ && difftime(time(NULL), cell->reservation_time)
+ > fatal_timeout) {
+ fatal = TRUE;
+ }
+ }
+
+ if (noticed) {
+ fprintf(stderr,
+ "InnoDB: ###### Starts InnoDB Monitor"
+ " for 30 secs to print diagnostic info:\n");
+ old_val = srv_print_innodb_monitor;
+
+ /* If some crucial semaphore is reserved, then also the InnoDB
+ Monitor can hang, and we do not get diagnostics. Since in
+ many cases an InnoDB hang is caused by a pwrite() or a pread()
+ call hanging inside the operating system, let us print right
+ now the values of pending calls of these. */
+
+ fprintf(stderr,
+ "InnoDB: Pending preads %lu, pwrites %lu\n",
+ (ulong)os_file_n_pending_preads,
+ (ulong)os_file_n_pending_pwrites);
+
+ srv_print_innodb_monitor = TRUE;
+ os_event_set(srv_lock_timeout_thread_event);
+
+ os_thread_sleep(30000000);
+
+ srv_print_innodb_monitor = old_val;
+ fprintf(stderr,
+ "InnoDB: ###### Diagnostic info printed"
+ " to the standard error stream\n");
+ }
+
+ return(fatal);
+}
+
+/**************************************************************************
+Prints info of the wait array. */
+static
+void
+sync_array_output_info(
+/*===================*/
+ FILE* file, /* in: file where to print */
+ sync_array_t* arr) /* in: wait array; NOTE! caller must own the
+ mutex */
+{
+ sync_cell_t* cell;
+ ulint count;
+ ulint i;
+
+ fprintf(file,
+ "OS WAIT ARRAY INFO: reservation count %ld, signal count %ld\n",
+ (long) arr->res_count, (long) arr->sg_count);
+ i = 0;
+ count = 0;
+
+ while (count < arr->n_reserved) {
+
+ cell = sync_array_get_nth_cell(arr, i);
+
+ if (cell->wait_object != NULL) {
+ count++;
+ sync_array_cell_print(file, cell);
+ }
+
+ i++;
+ }
+}
+
+/**************************************************************************
+Prints info of the wait array. */
+UNIV_INTERN
+void
+sync_array_print_info(
+/*==================*/
+ FILE* file, /* in: file where to print */
+ sync_array_t* arr) /* in: wait array */
+{
+ sync_array_enter(arr);
+
+ sync_array_output_info(file, arr);
+
+ sync_array_exit(arr);
+}
diff --git a/storage/xtradb/sync/sync0rw.c b/storage/xtradb/sync/sync0rw.c
new file mode 100644
index 00000000000..556e46a2ca1
--- /dev/null
+++ b/storage/xtradb/sync/sync0rw.c
@@ -0,0 +1,1285 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************
+The read-write lock (for thread synchronization)
+
+Created 9/11/1995 Heikki Tuuri
+*******************************************************/
+
+#include "sync0rw.h"
+#ifdef UNIV_NONINL
+#include "sync0rw.ic"
+#endif
+
+#include "os0thread.h"
+#include "mem0mem.h"
+#include "srv0srv.h"
+
+/*
+ IMPLEMENTATION OF THE RW_LOCK
+ =============================
+The status of a rw_lock is held in lock_word. The initial value of lock_word is
+X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR
+for each x-lock. This describes the lock state for each value of lock_word:
+
+lock_word == X_LOCK_DECR: Unlocked.
+0 < lock_word < X_LOCK_DECR: Read locked, no waiting writers.
+ (X_LOCK_DECR - lock_word) is the
+ number of readers that hold the lock.
+lock_word == 0: Write locked
+-X_LOCK_DECR < lock_word < 0: Read locked, with a waiting writer.
+ (-lock_word) is the number of readers
+ that hold the lock.
+lock_word <= -X_LOCK_DECR: Recursively write locked. lock_word has been
+ decremented by X_LOCK_DECR once for each lock,
+ so the number of locks is:
+ ((-lock_word) / X_LOCK_DECR) + 1
+When lock_word <= -X_LOCK_DECR, we also know that lock_word % X_LOCK_DECR == 0:
+other values of lock_word are invalid.
+
+The lock_word is always read and updated atomically and consistently, so that
+it always represents the state of the lock, and the state of the lock changes
+with a single atomic operation. This lock_word holds all of the information
+that a thread needs in order to determine if it is eligible to gain the lock
+or if it must spin or sleep. The one exception to this is that writer_thread
+must be verified before recursive write locks: to solve this scenario, we make
+writer_thread readable by all threads, but only writeable by the x-lock holder.
+
+The other members of the lock obey the following rules to remain consistent:
+
+recursive: This and the writer_thread field together control the
+ behaviour of recursive x-locking.
+ lock->recursive must be FALSE in following states:
+ 1) The writer_thread contains garbage i.e.: the
+ lock has just been initialized.
+ 2) The lock is not x-held and there is no
+ x-waiter waiting on WAIT_EX event.
+ 3) The lock is x-held or there is an x-waiter
+ waiting on WAIT_EX event but the 'pass' value
+ is non-zero.
+ lock->recursive is TRUE iff:
+ 1) The lock is x-held or there is an x-waiter
+ waiting on WAIT_EX event and the 'pass' value
+ is zero.
+ This flag must be set after the writer_thread field
+ has been updated with a memory ordering barrier.
+ It is unset before the lock_word has been incremented.
+writer_thread: Is used only in recursive x-locking. Can only be safely
+ read iff lock->recursive flag is TRUE.
+ This field is uninitialized at lock creation time and
+ is updated atomically when x-lock is acquired or when
+ move_ownership is called. A thread is only allowed to
+ set the value of this field to it's thread_id i.e.: a
+ thread cannot set writer_thread to some other thread's
+ id.
+waiters: May be set to 1 anytime, but to avoid unnecessary wake-up
+ signals, it should only be set to 1 when there are threads
+ waiting on event. Must be 1 when a writer starts waiting to
+ ensure the current x-locking thread sends a wake-up signal
+ during unlock. May only be reset to 0 immediately before a
+ a wake-up signal is sent to event. On most platforms, a
+ memory barrier is required after waiters is set, and before
+ verifying lock_word is still held, to ensure some unlocker
+ really does see the flags new value.
+event: Threads wait on event for read or writer lock when another
+ thread has an x-lock or an x-lock reservation (wait_ex). A
+ thread may only wait on event after performing the following
+ actions in order:
+ (1) Record the counter value of event (with os_event_reset).
+ (2) Set waiters to 1.
+ (3) Verify lock_word <= 0.
+ (1) must come before (2) to ensure signal is not missed.
+ (2) must come before (3) to ensure a signal is sent.
+ These restrictions force the above ordering.
+ Immediately before sending the wake-up signal, we should:
+ (1) Verify lock_word == X_LOCK_DECR (unlocked)
+ (2) Reset waiters to 0.
+wait_ex_event: A thread may only wait on the wait_ex_event after it has
+ performed the following actions in order:
+ (1) Decrement lock_word by X_LOCK_DECR.
+ (2) Record counter value of wait_ex_event (os_event_reset,
+ called from sync_array_reserve_cell).
+ (3) Verify that lock_word < 0.
+ (1) must come first to ensures no other threads become reader
+ or next writer, and notifies unlocker that signal must be sent.
+ (2) must come before (3) to ensure the signal is not missed.
+ These restrictions force the above ordering.
+ Immediately before sending the wake-up signal, we should:
+ Verify lock_word == 0 (waiting thread holds x_lock)
+*/
+
+
+/* number of spin waits on rw-latches,
+resulted during shared (read) locks */
+UNIV_INTERN ib_int64_t rw_s_spin_wait_count = 0;
+UNIV_INTERN ib_int64_t rw_s_spin_round_count = 0;
+
+/* number of OS waits on rw-latches,
+resulted during shared (read) locks */
+UNIV_INTERN ib_int64_t rw_s_os_wait_count = 0;
+
+/* number of unlocks (that unlock shared locks),
+set only when UNIV_SYNC_PERF_STAT is defined */
+UNIV_INTERN ib_int64_t rw_s_exit_count = 0;
+
+/* number of spin waits on rw-latches,
+resulted during exclusive (write) locks */
+UNIV_INTERN ib_int64_t rw_x_spin_wait_count = 0;
+UNIV_INTERN ib_int64_t rw_x_spin_round_count = 0;
+
+/* number of OS waits on rw-latches,
+resulted during exclusive (write) locks */
+UNIV_INTERN ib_int64_t rw_x_os_wait_count = 0;
+
+/* number of unlocks (that unlock exclusive locks),
+set only when UNIV_SYNC_PERF_STAT is defined */
+UNIV_INTERN ib_int64_t rw_x_exit_count = 0;
+
+/* The global list of rw-locks */
+UNIV_INTERN rw_lock_list_t rw_lock_list;
+UNIV_INTERN mutex_t rw_lock_list_mutex;
+
+#ifdef UNIV_SYNC_DEBUG
+/* The global mutex which protects debug info lists of all rw-locks.
+To modify the debug info list of an rw-lock, this mutex has to be
+acquired in addition to the mutex protecting the lock. */
+
+UNIV_INTERN mutex_t rw_lock_debug_mutex;
+/* If deadlock detection does not get immediately the mutex,
+it may wait for this event */
+UNIV_INTERN os_event_t rw_lock_debug_event;
+/* This is set to TRUE, if there may be waiters for the event */
+UNIV_INTERN ibool rw_lock_debug_waiters;
+
+/**********************************************************************
+Creates a debug info struct. */
+static
+rw_lock_debug_t*
+rw_lock_debug_create(void);
+/*======================*/
+/**********************************************************************
+Frees a debug info struct. */
+static
+void
+rw_lock_debug_free(
+/*===============*/
+ rw_lock_debug_t* info);
+
+/**********************************************************************
+Creates a debug info struct. */
+static
+rw_lock_debug_t*
+rw_lock_debug_create(void)
+/*======================*/
+{
+ return((rw_lock_debug_t*) mem_alloc(sizeof(rw_lock_debug_t)));
+}
+
+/**********************************************************************
+Frees a debug info struct. */
+static
+void
+rw_lock_debug_free(
+/*===============*/
+ rw_lock_debug_t* info)
+{
+ mem_free(info);
+}
+#endif /* UNIV_SYNC_DEBUG */
+
+/**********************************************************************
+Creates, or rather, initializes an rw-lock object in a specified memory
+location (which must be appropriately aligned). The rw-lock is initialized
+to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
+is necessary only if the memory block containing it is freed. */
+UNIV_INTERN
+void
+rw_lock_create_func(
+/*================*/
+ rw_lock_t* lock, /* in: pointer to memory */
+#ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+ ulint level, /* in: level */
+# endif /* UNIV_SYNC_DEBUG */
+ const char* cmutex_name, /* in: mutex name */
+#endif /* UNIV_DEBUG */
+ const char* cfile_name, /* in: file name where created */
+ ulint cline) /* in: file line where created */
+{
+ /* If this is the very first time a synchronization object is
+ created, then the following call initializes the sync system. */
+
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
+ mutex_create(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK);
+
+ lock->mutex.cfile_name = cfile_name;
+ lock->mutex.cline = cline;
+
+# if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
+ lock->mutex.cmutex_name = cmutex_name;
+ lock->mutex.mutex_type = 1;
+# endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+
+#else /* INNODB_RW_LOCKS_USE_ATOMICS */
+# ifdef UNIV_DEBUG
+ UT_NOT_USED(cmutex_name);
+# endif
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+
+ lock->lock_word = X_LOCK_DECR;
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ lock->s_waiters = 0;
+ lock->x_waiters = 0;
+ lock->wait_ex_waiters = 0;
+ lock->writer = RW_LOCK_NOT_LOCKED;
+ lock->writer_count = 0;
+ lock->reader_count = 0;
+ lock->writer_is_wait_ex = FALSE;
+#else
+ lock->waiters = 0;
+#endif
+
+ /* We set this value to signify that lock->writer_thread
+ contains garbage at initialization and cannot be used for
+ recursive x-locking. */
+ lock->recursive = FALSE;
+
+#ifdef UNIV_SYNC_DEBUG
+ UT_LIST_INIT(lock->debug_list);
+
+ lock->level = level;
+#endif /* UNIV_SYNC_DEBUG */
+
+ lock->magic_n = RW_LOCK_MAGIC_N;
+
+ lock->cfile_name = cfile_name;
+ lock->cline = (unsigned int) cline;
+
+ lock->count_os_wait = 0;
+ lock->last_s_file_name = "not yet reserved";
+ lock->last_x_file_name = "not yet reserved";
+ lock->last_s_line = 0;
+ lock->last_x_line = 0;
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ lock->s_event = os_event_create(NULL);
+ lock->x_event = os_event_create(NULL);
+#else
+ lock->event = os_event_create(NULL);
+#endif
+ lock->wait_ex_event = os_event_create(NULL);
+
+ mutex_enter(&rw_lock_list_mutex);
+
+ if (UT_LIST_GET_LEN(rw_lock_list) > 0) {
+ ut_a(UT_LIST_GET_FIRST(rw_lock_list)->magic_n
+ == RW_LOCK_MAGIC_N);
+ }
+
+ UT_LIST_ADD_FIRST(list, rw_lock_list, lock);
+
+ mutex_exit(&rw_lock_list_mutex);
+}
+
+/**********************************************************************
+Calling this function is obligatory only if the memory buffer containing
+the rw-lock is freed. Removes an rw-lock object from the global list. The
+rw-lock is checked to be in the non-locked state. */
+UNIV_INTERN
+void
+rw_lock_free(
+/*=========*/
+ rw_lock_t* lock) /* in: rw-lock */
+{
+ ut_ad(rw_lock_validate(lock));
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
+ ut_a(rw_lock_get_s_waiters(lock) == 0);
+ ut_a(rw_lock_get_x_waiters(lock) == 0);
+ ut_a(rw_lock_get_wx_waiters(lock) == 0);
+ ut_a(rw_lock_get_reader_count(lock) == 0);
+#else
+ ut_a(lock->lock_word == X_LOCK_DECR);
+#endif
+
+ lock->magic_n = 0;
+
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
+ mutex_free(rw_lock_get_mutex(lock));
+#endif /* INNODB_RW_LOCKS_USE_ATOMICS */
+
+ mutex_enter(&rw_lock_list_mutex);
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ os_event_free(lock->s_event);
+ os_event_free(lock->x_event);
+#else
+ os_event_free(lock->event);
+#endif
+
+ os_event_free(lock->wait_ex_event);
+
+ if (UT_LIST_GET_PREV(list, lock)) {
+ ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N);
+ }
+ if (UT_LIST_GET_NEXT(list, lock)) {
+ ut_a(UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N);
+ }
+
+ UT_LIST_REMOVE(list, rw_lock_list, lock);
+
+ mutex_exit(&rw_lock_list_mutex);
+}
+
+#ifdef UNIV_DEBUG
+/**********************************************************************
+Checks that the rw-lock has been initialized and that there are no
+simultaneous shared and exclusive locks. */
+UNIV_INTERN
+ibool
+rw_lock_validate(
+/*=============*/
+ rw_lock_t* lock)
+{
+ ut_a(lock);
+
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ ut_a(lock->magic_n == RW_LOCK_MAGIC_N);
+
+ ulint waiters = rw_lock_get_s_waiters(lock);
+ ut_a(waiters == 0 || waiters == 1);
+ waiters = rw_lock_get_x_waiters(lock);
+ ut_a(waiters == 0 || waiters == 1);
+ waiters = rw_lock_get_wx_waiters(lock);
+ ut_a(waiters == 0 || waiters == 1);
+#else
+ ulint waiters = rw_lock_get_waiters(lock);
+ lint lock_word = lock->lock_word;
+
+ ut_a(lock->magic_n == RW_LOCK_MAGIC_N);
+ ut_a(waiters == 0 || waiters == 1);
+ ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0);
+#endif
+
+ return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+
+/**********************************************************************
+Lock an rw-lock in shared mode for the current thread. If the rw-lock is
+locked in exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+for the lock, before suspending the thread. */
+UNIV_INTERN
+void
+rw_lock_s_lock_spin(
+/*================*/
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ ulint pass, /* in: pass value; != 0, if the lock
+ will be passed to another thread to unlock */
+ const char* file_name, /* in: file name where lock requested */
+ ulint line) /* in: line where requested */
+{
+ ulint index; /* index of the reserved wait cell */
+ ulint i = 0; /* spin round count */
+
+ ut_ad(rw_lock_validate(lock));
+
+ rw_s_spin_wait_count++; /* Count calls to this function */
+lock_loop:
+
+ /* Spin waiting for the writer field to become free */
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ while (i < SYNC_SPIN_ROUNDS
+ && rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) {
+#else
+ while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) {
+#endif
+ if (srv_spin_wait_delay) {
+ ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
+ }
+
+ i++;
+ }
+
+ if (i == SYNC_SPIN_ROUNDS) {
+ os_thread_yield();
+ }
+
+ if (srv_print_latch_waits) {
+ fprintf(stderr,
+ "Thread %lu spin wait rw-s-lock at %p"
+ " cfile %s cline %lu rnds %lu\n",
+ (ulong) os_thread_pf(os_thread_get_curr_id()),
+ (void*) lock,
+ lock->cfile_name, (ulong) lock->cline, (ulong) i);
+ }
+
+ /* We try once again to obtain the lock */
+ if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
+ rw_s_spin_round_count += i;
+
+ return; /* Success */
+ } else {
+
+ if (i < SYNC_SPIN_ROUNDS) {
+ goto lock_loop;
+ }
+
+ rw_s_spin_round_count += i;
+
+ sync_array_reserve_cell(sync_primary_wait_array,
+ lock, RW_LOCK_SHARED,
+ file_name, line,
+ &index);
+
+ /* Set waiters before checking lock_word to ensure wake-up
+ signal is sent. This may lead to some unnecessary signals. */
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ rw_lock_set_s_waiter_flag(lock);
+#else
+ rw_lock_set_waiter_flag(lock);
+#endif
+
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ for (i = 0; i < 4; i++) {
+#endif
+ if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
+ sync_array_free_cell(sync_primary_wait_array, index);
+ return; /* Success */
+ }
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ }
+
+ /* If wait_ex_waiter stalls, wakes it. */
+ if (lock->reader_count == 0
+ && __sync_lock_test_and_set(&lock->wait_ex_waiters, 0)) {
+ os_event_set(lock->wait_ex_event);
+ sync_array_object_signalled(sync_primary_wait_array);
+ }
+#endif
+
+ if (srv_print_latch_waits) {
+ fprintf(stderr,
+ "Thread %lu OS wait rw-s-lock at %p"
+ " cfile %s cline %lu\n",
+ os_thread_pf(os_thread_get_curr_id()),
+ (void*) lock, lock->cfile_name,
+ (ulong) lock->cline);
+ }
+
+ /* these stats may not be accurate */
+ lock->count_os_wait++;
+ rw_s_os_wait_count++;
+
+ sync_array_wait_event(sync_primary_wait_array, index);
+
+ i = 0;
+ goto lock_loop;
+ }
+}
+
+/**********************************************************************
+This function is used in the insert buffer to move the ownership of an
+x-latch on a buffer frame to the current thread. The x-latch was set by
+the buffer read operation and it protected the buffer frame while the
+read was done. The ownership is moved because we want that the current
+thread is able to acquire a second x-latch which is stored in an mtr.
+This, in turn, is needed to pass the debug checks of index page
+operations. */
+UNIV_INTERN
+void
+rw_lock_x_lock_move_ownership(
+/*==========================*/
+ rw_lock_t* lock) /* in: lock which was x-locked in the
+ buffer read */
+{
+ ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
+
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ lock->writer_thread = os_thread_get_curr_id();
+ lock->recursive = TRUE;
+#else
+ rw_lock_set_writer_id_and_recursion_flag(lock, TRUE);
+#endif
+}
+
+/**********************************************************************
+Function for the next writer to call. Waits for readers to exit.
+The caller must have already decremented lock_word by X_LOCK_DECR.*/
+UNIV_INLINE
+void
+rw_lock_x_lock_wait(
+/*================*/
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+#ifdef UNIV_SYNC_DEBUG
+ ulint pass, /* in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+#endif
+ const char* file_name,/* in: file name where lock requested */
+ ulint line) /* in: line where requested */
+{
+ ulint index;
+ ulint i = 0;
+
+ ut_ad(lock->lock_word <= 0);
+
+ while (lock->lock_word < 0) {
+ if (srv_spin_wait_delay) {
+ ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
+ }
+ if(i < SYNC_SPIN_ROUNDS) {
+ i++;
+ continue;
+ }
+
+ /* If there is still a reader, then go to sleep.*/
+ rw_x_spin_round_count += i;
+ i = 0;
+ sync_array_reserve_cell(sync_primary_wait_array,
+ lock,
+ RW_LOCK_WAIT_EX,
+ file_name, line,
+ &index);
+ /* Check lock_word to ensure wake-up isn't missed.*/
+ if(lock->lock_word < 0) {
+
+ /* these stats may not be accurate */
+ lock->count_os_wait++;
+ rw_x_os_wait_count++;
+
+ /* Add debug info as it is needed to detect possible
+ deadlock. We must add info for WAIT_EX thread for
+ deadlock detection to work properly. */
+#ifdef UNIV_SYNC_DEBUG
+ rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
+ file_name, line);
+#endif
+
+ sync_array_wait_event(sync_primary_wait_array,
+ index);
+#ifdef UNIV_SYNC_DEBUG
+ rw_lock_remove_debug_info(lock, pass,
+ RW_LOCK_WAIT_EX);
+#endif
+ /* It is possible to wake when lock_word < 0.
+ We must pass the while-loop check to proceed.*/
+ } else {
+ sync_array_free_cell(sync_primary_wait_array,
+ index);
+ }
+ }
+ rw_x_spin_round_count += i;
+}
+
+/**********************************************************************
+Low-level function for acquiring an exclusive lock. */
+UNIV_INLINE
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ulint
+#else
+ibool
+#endif
+rw_lock_x_lock_low(
+/*===============*/
+ /* out: RW_LOCK_NOT_LOCKED if did
+ not succeed, RW_LOCK_EX if success. */
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ ulint pass, /* in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/* in: file name where lock requested */
+ ulint line) /* in: line where requested */
+{
+ os_thread_id_t curr_thread = os_thread_get_curr_id();
+
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+retry_writer:
+ /* try to lock writer */
+ if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX)
+ == RW_LOCK_NOT_LOCKED) {
+ /* success */
+ /* obtain RW_LOCK_WAIT_EX right */
+ lock->writer_thread = curr_thread;
+ lock->recursive = pass ? FALSE : TRUE;
+ lock->writer_is_wait_ex = TRUE;
+ /* atomic operation may be safer about memory order. */
+ __sync_synchronize();
+#ifdef UNIV_SYNC_DEBUG
+ rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
+ file_name, line);
+#endif
+ }
+
+ if (!os_thread_eq(lock->writer_thread, curr_thread)) {
+ return(RW_LOCK_NOT_LOCKED);
+ }
+
+ switch(rw_lock_get_writer(lock)) {
+ case RW_LOCK_WAIT_EX:
+ /* have right to try x-lock */
+retry_x_lock:
+ /* try x-lock */
+ if(__sync_sub_and_fetch(&(lock->lock_word),
+ X_LOCK_DECR) == 0) {
+ /* success */
+ lock->recursive = pass ? FALSE : TRUE;
+ lock->writer_is_wait_ex = FALSE;
+ __sync_fetch_and_add(&(lock->writer_count),1);
+
+#ifdef UNIV_SYNC_DEBUG
+ rw_lock_remove_debug_info(lock, pass, RW_LOCK_WAIT_EX);
+ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
+ file_name, line);
+#endif
+
+ lock->last_x_file_name = file_name;
+ lock->last_x_line = line;
+
+ /* Locking succeeded, we may return */
+ return(RW_LOCK_EX);
+ } else if(__sync_fetch_and_add(&(lock->lock_word),
+ X_LOCK_DECR) == 0) {
+ /* retry x-lock */
+ goto retry_x_lock;
+ }
+
+ /* There are readers, we have to wait */
+ return(RW_LOCK_WAIT_EX);
+
+ break;
+
+ case RW_LOCK_EX:
+ /* already have x-lock */
+ if (lock->recursive && (pass == 0)) {
+ __sync_fetch_and_add(&(lock->writer_count),1);
+
+#ifdef UNIV_SYNC_DEBUG
+ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name,
+ line);
+#endif
+
+ lock->last_x_file_name = file_name;
+ lock->last_x_line = line;
+
+ /* Locking succeeded, we may return */
+ return(RW_LOCK_EX);
+ }
+
+ return(RW_LOCK_NOT_LOCKED);
+
+ break;
+
+ default: /* RW_LOCK_NOT_LOCKED? maybe impossible */
+ goto retry_writer;
+ }
+
+ /* Locking did not succeed */
+ return(RW_LOCK_NOT_LOCKED);
+#else
+ if (rw_lock_lock_word_decr(lock, X_LOCK_DECR)) {
+
+ /* lock->recursive also tells us if the writer_thread
+ field is stale or active. As we are going to write
+ our own thread id in that field it must be that the
+ current writer_thread value is not active. */
+ ut_a(!lock->recursive);
+
+ /* Decrement occurred: we are writer or next-writer. */
+ rw_lock_set_writer_id_and_recursion_flag(lock,
+ pass ? FALSE : TRUE);
+
+ rw_lock_x_lock_wait(lock,
+#ifdef UNIV_SYNC_DEBUG
+ pass,
+#endif
+ file_name, line);
+
+ } else {
+ /* Decrement failed: relock or failed lock */
+ if (!pass && lock->recursive
+ && os_thread_eq(lock->writer_thread, curr_thread)) {
+ /* Relock */
+ lock->lock_word -= X_LOCK_DECR;
+ } else {
+ /* Another thread locked before us */
+ return(FALSE);
+ }
+ }
+#ifdef UNIV_SYNC_DEBUG
+ rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
+ file_name, line);
+#endif
+ lock->last_x_file_name = file_name;
+ lock->last_x_line = (unsigned int) line;
+
+ return(TRUE);
+#endif
+}
+
+/**********************************************************************
+NOTE! Use the corresponding macro, not directly this function! Lock an
+rw-lock in exclusive mode for the current thread. If the rw-lock is locked
+in shared or exclusive mode, or there is an exclusive lock request waiting,
+the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+for the lock before suspending the thread. If the same thread has an x-lock
+on the rw-lock, locking succeed, with the following exception: if pass != 0,
+only a single x-lock may be taken on the lock. NOTE: If the same thread has
+an s-lock, locking does not succeed! */
+UNIV_INTERN
+void
+rw_lock_x_lock_func(
+/*================*/
+ rw_lock_t* lock, /* in: pointer to rw-lock */
+ ulint pass, /* in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/* in: file name where lock requested */
+ ulint line) /* in: line where requested */
+{
+ ulint index; /* index of the reserved wait cell */
+ ulint i; /* spin round count */
+ ibool spinning = FALSE;
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ ulint state = RW_LOCK_NOT_LOCKED; /* lock state acquired */
+ ulint prev_state = RW_LOCK_NOT_LOCKED;
+#endif
+
+ ut_ad(rw_lock_validate(lock));
+
+ i = 0;
+
+lock_loop:
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ prev_state = state;
+ state = rw_lock_x_lock_low(lock, pass, file_name, line);
+
+lock_loop_2:
+ if (state != prev_state) i=0; /* if progress, reset counter. */
+
+ if (state == RW_LOCK_EX) {
+#else
+ if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
+#endif
+ rw_x_spin_round_count += i;
+
+ return; /* Locking succeeded */
+
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ } else if (state == RW_LOCK_WAIT_EX) {
+
+ if (!spinning) {
+ spinning = TRUE;
+ rw_x_spin_wait_count++;
+ }
+
+ /* Spin waiting for the reader count field to become zero */
+ while (i < SYNC_SPIN_ROUNDS
+ && lock->lock_word != X_LOCK_DECR) {
+ if (srv_spin_wait_delay) {
+ ut_delay(ut_rnd_interval(0,
+ srv_spin_wait_delay));
+ }
+
+ i++;
+ }
+ if (i == SYNC_SPIN_ROUNDS) {
+ os_thread_yield();
+ } else {
+ goto lock_loop;
+ }
+#endif
+ } else {
+
+ if (!spinning) {
+ spinning = TRUE;
+ rw_x_spin_wait_count++;
+ }
+
+ /* Spin waiting for the lock_word to become free */
+ while (i < SYNC_SPIN_ROUNDS
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ && rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED) {
+#else
+ && lock->lock_word <= 0) {
+#endif
+ if (srv_spin_wait_delay) {
+ ut_delay(ut_rnd_interval(0,
+ srv_spin_wait_delay));
+ }
+
+ i++;
+ }
+ if (i == SYNC_SPIN_ROUNDS) {
+ os_thread_yield();
+ } else {
+ goto lock_loop;
+ }
+ }
+
+ rw_x_spin_round_count += i;
+
+ if (srv_print_latch_waits) {
+ fprintf(stderr,
+ "Thread %lu spin wait rw-x-lock at %p"
+ " cfile %s cline %lu rnds %lu\n",
+ os_thread_pf(os_thread_get_curr_id()), (void*) lock,
+ lock->cfile_name, (ulong) lock->cline, (ulong) i);
+ }
+
+ sync_array_reserve_cell(sync_primary_wait_array,
+ lock,
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ (state == RW_LOCK_WAIT_EX)
+ ? RW_LOCK_WAIT_EX : RW_LOCK_EX,
+#else
+ RW_LOCK_EX,
+#endif
+ file_name, line,
+ &index);
+
+ /* Waiters must be set before checking lock_word, to ensure signal
+ is sent. This could lead to a few unnecessary wake-up signals. */
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ if (state == RW_LOCK_WAIT_EX) {
+ rw_lock_set_wx_waiter_flag(lock);
+ } else {
+ rw_lock_set_x_waiter_flag(lock);
+ }
+#else
+ rw_lock_set_waiter_flag(lock);
+#endif
+
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ for (i = 0; i < 4; i++) {
+ prev_state = state;
+ state = rw_lock_x_lock_low(lock, pass, file_name, line);
+ if (state == RW_LOCK_EX) {
+ sync_array_free_cell(sync_primary_wait_array, index);
+ return; /* Locking succeeded */
+ } else if (state != prev_state) {
+ /* retry! */
+ sync_array_free_cell(sync_primary_wait_array, index);
+ goto lock_loop_2;
+ }
+ }
+#else
+ if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
+ sync_array_free_cell(sync_primary_wait_array, index);
+ return; /* Locking succeeded */
+ }
+#endif
+
+ if (srv_print_latch_waits) {
+ fprintf(stderr,
+ "Thread %lu OS wait for rw-x-lock at %p"
+ " cfile %s cline %lu\n",
+ os_thread_pf(os_thread_get_curr_id()), (void*) lock,
+ lock->cfile_name, (ulong) lock->cline);
+ }
+
+ /* these stats may not be accurate */
+ lock->count_os_wait++;
+ rw_x_os_wait_count++;
+
+ sync_array_wait_event(sync_primary_wait_array, index);
+
+ i = 0;
+ goto lock_loop;
+}
+
+#ifdef UNIV_SYNC_DEBUG
+/**********************************************************************
+Acquires the debug mutex. We cannot use the mutex defined in sync0sync,
+because the debug mutex is also acquired in sync0arr while holding the OS
+mutex protecting the sync array, and the ordinary mutex_enter might
+recursively call routines in sync0arr, leading to a deadlock on the OS
+mutex. */
+UNIV_INTERN
+void
+rw_lock_debug_mutex_enter(void)
+/*==========================*/
+{
+loop:
+ if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) {
+ return;
+ }
+
+ os_event_reset(rw_lock_debug_event);
+
+ rw_lock_debug_waiters = TRUE;
+
+ if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) {
+ return;
+ }
+
+ os_event_wait(rw_lock_debug_event);
+
+ goto loop;
+}
+
+/**********************************************************************
+Releases the debug mutex. */
+UNIV_INTERN
+void
+rw_lock_debug_mutex_exit(void)
+/*==========================*/
+{
+ mutex_exit(&rw_lock_debug_mutex);
+
+ if (rw_lock_debug_waiters) {
+ rw_lock_debug_waiters = FALSE;
+ os_event_set(rw_lock_debug_event);
+ }
+}
+
+/**********************************************************************
+Inserts the debug information for an rw-lock. */
+UNIV_INTERN
+void
+rw_lock_add_debug_info(
+/*===================*/
+ rw_lock_t* lock, /* in: rw-lock */
+ ulint pass, /* in: pass value */
+ ulint lock_type, /* in: lock type */
+ const char* file_name, /* in: file where requested */
+ ulint line) /* in: line where requested */
+{
+ rw_lock_debug_t* info;
+
+ ut_ad(lock);
+ ut_ad(file_name);
+
+ info = rw_lock_debug_create();
+
+ rw_lock_debug_mutex_enter();
+
+ info->file_name = file_name;
+ info->line = line;
+ info->lock_type = lock_type;
+ info->thread_id = os_thread_get_curr_id();
+ info->pass = pass;
+
+ UT_LIST_ADD_FIRST(list, lock->debug_list, info);
+
+ rw_lock_debug_mutex_exit();
+
+ if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
+ sync_thread_add_level(lock, lock->level);
+ }
+}
+
+/**********************************************************************
+Removes a debug information struct for an rw-lock. */
+UNIV_INTERN
+void
+rw_lock_remove_debug_info(
+/*======================*/
+ rw_lock_t* lock, /* in: rw-lock */
+ ulint pass, /* in: pass value */
+ ulint lock_type) /* in: lock type */
+{
+ rw_lock_debug_t* info;
+
+ ut_ad(lock);
+
+ if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) {
+ sync_thread_reset_level(lock);
+ }
+
+ rw_lock_debug_mutex_enter();
+
+ info = UT_LIST_GET_FIRST(lock->debug_list);
+
+ while (info != NULL) {
+ if ((pass == info->pass)
+ && ((pass != 0)
+ || os_thread_eq(info->thread_id,
+ os_thread_get_curr_id()))
+ && (info->lock_type == lock_type)) {
+
+ /* Found! */
+ UT_LIST_REMOVE(list, lock->debug_list, info);
+ rw_lock_debug_mutex_exit();
+
+ rw_lock_debug_free(info);
+
+ return;
+ }
+
+ info = UT_LIST_GET_NEXT(list, info);
+ }
+
+ ut_error;
+}
+#endif /* UNIV_SYNC_DEBUG */
+
+#ifdef UNIV_SYNC_DEBUG
+/**********************************************************************
+Checks if the thread has locked the rw-lock in the specified mode, with
+the pass value == 0. */
+UNIV_INTERN
+ibool
+rw_lock_own(
+/*========*/
+ /* out: TRUE if locked */
+ rw_lock_t* lock, /* in: rw-lock */
+ ulint lock_type) /* in: lock type: RW_LOCK_SHARED,
+ RW_LOCK_EX */
+{
+ rw_lock_debug_t* info;
+
+ ut_ad(lock);
+ ut_ad(rw_lock_validate(lock));
+
+ rw_lock_debug_mutex_enter();
+
+ info = UT_LIST_GET_FIRST(lock->debug_list);
+
+ while (info != NULL) {
+
+ if (os_thread_eq(info->thread_id, os_thread_get_curr_id())
+ && (info->pass == 0)
+ && (info->lock_type == lock_type)) {
+
+ rw_lock_debug_mutex_exit();
+ /* Found! */
+
+ return(TRUE);
+ }
+
+ info = UT_LIST_GET_NEXT(list, info);
+ }
+ rw_lock_debug_mutex_exit();
+
+ return(FALSE);
+}
+#endif /* UNIV_SYNC_DEBUG */
+
+/**********************************************************************
+Checks if somebody has locked the rw-lock in the specified mode. */
+UNIV_INTERN
+ibool
+rw_lock_is_locked(
+/*==============*/
+ /* out: TRUE if locked */
+ rw_lock_t* lock, /* in: rw-lock */
+ ulint lock_type) /* in: lock type: RW_LOCK_SHARED,
+ RW_LOCK_EX */
+{
+ ibool ret = FALSE;
+
+ ut_ad(lock);
+ ut_ad(rw_lock_validate(lock));
+
+ if (lock_type == RW_LOCK_SHARED) {
+ if (rw_lock_get_reader_count(lock) > 0) {
+ ret = TRUE;
+ }
+ } else if (lock_type == RW_LOCK_EX) {
+ if (rw_lock_get_writer(lock) == RW_LOCK_EX) {
+ ret = TRUE;
+ }
+ } else {
+ ut_error;
+ }
+
+ return(ret);
+}
+
+#ifdef UNIV_SYNC_DEBUG
+/*******************************************************************
+Prints debug info of currently locked rw-locks. */
+UNIV_INTERN
+void
+rw_lock_list_print_info(
+/*====================*/
+ FILE* file) /* in: file where to print */
+{
+ rw_lock_t* lock;
+ ulint count = 0;
+ rw_lock_debug_t* info;
+
+ mutex_enter(&rw_lock_list_mutex);
+
+ fputs("-------------\n"
+ "RW-LATCH INFO\n"
+ "-------------\n", file);
+
+ lock = UT_LIST_GET_FIRST(rw_lock_list);
+
+ while (lock != NULL) {
+
+ count++;
+
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
+ mutex_enter(&(lock->mutex));
+#endif
+ if (lock->lock_word != X_LOCK_DECR) {
+
+ fprintf(file, "RW-LOCK: %p ", (void*) lock);
+
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ if (rw_lock_get_s_waiters(lock)) {
+ fputs(" s_waiters for the lock exist", file);
+ }
+ if (rw_lock_get_x_waiters(lock)) {
+ fputs(" x_waiters for the lock exist", file);
+ }
+ if (rw_lock_get_wx_waiters(lock)) {
+ fputs(" wait_ex_waiters for the lock exist", file);
+ }
+ putc('\n', file);
+#else
+ if (rw_lock_get_waiters(lock)) {
+ fputs(" Waiters for the lock exist\n", file);
+ } else {
+ putc('\n', file);
+ }
+#endif
+
+ info = UT_LIST_GET_FIRST(lock->debug_list);
+ while (info != NULL) {
+ rw_lock_debug_print(info);
+ info = UT_LIST_GET_NEXT(list, info);
+ }
+ }
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
+ mutex_exit(&(lock->mutex));
+#endif
+
+ lock = UT_LIST_GET_NEXT(list, lock);
+ }
+
+ fprintf(file, "Total number of rw-locks %ld\n", count);
+ mutex_exit(&rw_lock_list_mutex);
+}
+
+/*******************************************************************
+Prints debug info of an rw-lock. */
+UNIV_INTERN
+void
+rw_lock_print(
+/*==========*/
+ rw_lock_t* lock) /* in: rw-lock */
+{
+ rw_lock_debug_t* info;
+
+ fprintf(stderr,
+ "-------------\n"
+ "RW-LATCH INFO\n"
+ "RW-LATCH: %p ", (void*) lock);
+
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
+ mutex_enter(&(lock->mutex));
+#endif
+ if (lock->lock_word != X_LOCK_DECR) {
+
+#ifdef INNODB_RW_LOCKS_USE_ATOMICS
+ if (rw_lock_get_s_waiters(lock)) {
+ fputs(" s_waiters for the lock exist", stderr);
+ }
+ if (rw_lock_get_x_waiters(lock)) {
+ fputs(" x_waiters for the lock exist", stderr);
+ }
+ if (rw_lock_get_wx_waiters(lock)) {
+ fputs(" wait_ex_waiters for the lock exist", stderr);
+ }
+ putc('\n', stderr);
+#else
+ if (rw_lock_get_waiters(lock)) {
+ fputs(" Waiters for the lock exist\n", stderr);
+ } else {
+ putc('\n', stderr);
+ }
+#endif
+
+ info = UT_LIST_GET_FIRST(lock->debug_list);
+ while (info != NULL) {
+ rw_lock_debug_print(info);
+ info = UT_LIST_GET_NEXT(list, info);
+ }
+ }
+#ifndef INNODB_RW_LOCKS_USE_ATOMICS
+ mutex_exit(&(lock->mutex));
+#endif
+}
+
+/*************************************************************************
+Prints info of a debug struct. */
+UNIV_INTERN
+void
+rw_lock_debug_print(
+/*================*/
+ rw_lock_debug_t* info) /* in: debug struct */
+{
+ ulint rwt;
+
+ rwt = info->lock_type;
+
+ fprintf(stderr, "Locked: thread %ld file %s line %ld ",
+ (ulong) os_thread_pf(info->thread_id), info->file_name,
+ (ulong) info->line);
+ if (rwt == RW_LOCK_SHARED) {
+ fputs("S-LOCK", stderr);
+ } else if (rwt == RW_LOCK_EX) {
+ fputs("X-LOCK", stderr);
+ } else if (rwt == RW_LOCK_WAIT_EX) {
+ fputs("WAIT X-LOCK", stderr);
+ } else {
+ ut_error;
+ }
+ if (info->pass != 0) {
+ fprintf(stderr, " pass value %lu", (ulong) info->pass);
+ }
+ putc('\n', stderr);
+}
+
+/*******************************************************************
+Returns the number of currently locked rw-locks. Works only in the debug
+version. */
+UNIV_INTERN
+ulint
+rw_lock_n_locked(void)
+/*==================*/
+{
+ rw_lock_t* lock;
+ ulint count = 0;
+
+ mutex_enter(&rw_lock_list_mutex);
+
+ lock = UT_LIST_GET_FIRST(rw_lock_list);
+
+ while (lock != NULL) {
+
+ if (lock->lock_word != X_LOCK_DECR) {
+ count++;
+ }
+
+ lock = UT_LIST_GET_NEXT(list, lock);
+ }
+
+ mutex_exit(&rw_lock_list_mutex);
+
+ return(count);
+}
+#endif /* UNIV_SYNC_DEBUG */
diff --git a/storage/xtradb/sync/sync0sync.c b/storage/xtradb/sync/sync0sync.c
new file mode 100644
index 00000000000..3b2d033aae5
--- /dev/null
+++ b/storage/xtradb/sync/sync0sync.c
@@ -0,0 +1,1411 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2008, Google Inc.
+
+Portions of this file contain modifications contributed and copyrighted by
+Google, Inc. Those modifications are gratefully acknowledged and are described
+briefly in the InnoDB documentation. The contributions by Google are
+incorporated with their permission, and subject to the conditions contained in
+the file COPYING.Google.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************
+Mutex, the basic synchronization primitive
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "sync0sync.h"
+#ifdef UNIV_NONINL
+#include "sync0sync.ic"
+#endif
+
+#include "sync0rw.h"
+#include "buf0buf.h"
+#include "srv0srv.h"
+#include "buf0types.h"
+
+/*
+ REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX
+ ============================================
+
+Semaphore operations in operating systems are slow: Solaris on a 1993 Sparc
+takes 3 microseconds (us) for a lock-unlock pair and Windows NT on a 1995
+Pentium takes 20 microseconds for a lock-unlock pair. Therefore, we have to
+implement our own efficient spin lock mutex. Future operating systems may
+provide efficient spin locks, but we cannot count on that.
+
+Another reason for implementing a spin lock is that on multiprocessor systems
+it can be more efficient for a processor to run a loop waiting for the
+semaphore to be released than to switch to a different thread. A thread switch
+takes 25 us on both platforms mentioned above. See Gray and Reuter's book
+Transaction processing for background.
+
+How long should the spin loop last before suspending the thread? On a
+uniprocessor, spinning does not help at all, because if the thread owning the
+mutex is not executing, it cannot be released. Spinning actually wastes
+resources.
+
+On a multiprocessor, we do not know if the thread owning the mutex is
+executing or not. Thus it would make sense to spin as long as the operation
+guarded by the mutex would typically last assuming that the thread is
+executing. If the mutex is not released by that time, we may assume that the
+thread owning the mutex is not executing and suspend the waiting thread.
+
+A typical operation (where no i/o involved) guarded by a mutex or a read-write
+lock may last 1 - 20 us on the current Pentium platform. The longest
+operations are the binary searches on an index node.
+
+We conclude that the best choice is to set the spin time at 20 us. Then the
+system should work well on a multiprocessor. On a uniprocessor we have to
+make sure that thread swithches due to mutex collisions are not frequent,
+i.e., they do not happen every 100 us or so, because that wastes too much
+resources. If the thread switches are not frequent, the 20 us wasted in spin
+loop is not too much.
+
+Empirical studies on the effect of spin time should be done for different
+platforms.
+
+
+ IMPLEMENTATION OF THE MUTEX
+ ===========================
+
+For background, see Curt Schimmel's book on Unix implementation on modern
+architectures. The key points in the implementation are atomicity and
+serialization of memory accesses. The test-and-set instruction (XCHG in
+Pentium) must be atomic. As new processors may have weak memory models, also
+serialization of memory references may be necessary. The successor of Pentium,
+P6, has at least one mode where the memory model is weak. As far as we know,
+in Pentium all memory accesses are serialized in the program order and we do
+not have to worry about the memory model. On other processors there are
+special machine instructions called a fence, memory barrier, or storage
+barrier (STBAR in Sparc), which can be used to serialize the memory accesses
+to happen in program order relative to the fence instruction.
+
+Leslie Lamport has devised a "bakery algorithm" to implement a mutex without
+the atomic test-and-set, but his algorithm should be modified for weak memory
+models. We do not use Lamport's algorithm, because we guess it is slower than
+the atomic test-and-set.
+
+Our mutex implementation works as follows: After that we perform the atomic
+test-and-set instruction on the memory word. If the test returns zero, we
+know we got the lock first. If the test returns not zero, some other thread
+was quicker and got the lock: then we spin in a loop reading the memory word,
+waiting it to become zero. It is wise to just read the word in the loop, not
+perform numerous test-and-set instructions, because they generate memory
+traffic between the cache and the main memory. The read loop can just access
+the cache, saving bus bandwidth.
+
+If we cannot acquire the mutex lock in the specified time, we reserve a cell
+in the wait array, set the waiters byte in the mutex to 1. To avoid a race
+condition, after setting the waiters byte and before suspending the waiting
+thread, we still have to check that the mutex is reserved, because it may
+have happened that the thread which was holding the mutex has just released
+it and did not see the waiters byte set to 1, a case which would lead the
+other thread to an infinite wait.
+
+LEMMA 1: After a thread resets the event of a mutex (or rw_lock), some
+=======
+thread will eventually call os_event_set() on that particular event.
+Thus no infinite wait is possible in this case.
+
+Proof: After making the reservation the thread sets the waiters field in the
+mutex to 1. Then it checks that the mutex is still reserved by some thread,
+or it reserves the mutex for itself. In any case, some thread (which may be
+also some earlier thread, not necessarily the one currently holding the mutex)
+will set the waiters field to 0 in mutex_exit, and then call
+os_event_set() with the mutex as an argument.
+Q.E.D.
+
+LEMMA 2: If an os_event_set() call is made after some thread has called
+=======
+the os_event_reset() and before it starts wait on that event, the call
+will not be lost to the second thread. This is true even if there is an
+intervening call to os_event_reset() by another thread.
+Thus no infinite wait is possible in this case.
+
+Proof (non-windows platforms): os_event_reset() returns a monotonically
+increasing value of signal_count. This value is increased at every
+call of os_event_set() If thread A has called os_event_reset() followed
+by thread B calling os_event_set() and then some other thread C calling
+os_event_reset(), the is_set flag of the event will be set to FALSE;
+but now if thread A calls os_event_wait_low() with the signal_count
+value returned from the earlier call of os_event_reset(), it will
+return immediately without waiting.
+Q.E.D.
+
+Proof (windows): If there is a writer thread which is forced to wait for
+the lock, it may be able to set the state of rw_lock to RW_LOCK_WAIT_EX
+The design of rw_lock ensures that there is one and only one thread
+that is able to change the state to RW_LOCK_WAIT_EX and this thread is
+guaranteed to acquire the lock after it is released by the current
+holders and before any other waiter gets the lock.
+On windows this thread waits on a separate event i.e.: wait_ex_event.
+Since only one thread can wait on this event there is no chance
+of this event getting reset before the writer starts wait on it.
+Therefore, this thread is guaranteed to catch the os_set_event()
+signalled unconditionally at the release of the lock.
+Q.E.D. */
+
+/* Number of spin waits on mutexes: for performance monitoring */
+
+/* round=one iteration of a spin loop */
+UNIV_INTERN ib_int64_t mutex_spin_round_count = 0;
+UNIV_INTERN ib_int64_t mutex_spin_wait_count = 0;
+UNIV_INTERN ib_int64_t mutex_os_wait_count = 0;
+UNIV_INTERN ib_int64_t mutex_exit_count = 0;
+
+/* The global array of wait cells for implementation of the database's own
+mutexes and read-write locks */
+UNIV_INTERN sync_array_t* sync_primary_wait_array;
+
+/* This variable is set to TRUE when sync_init is called */
+UNIV_INTERN ibool sync_initialized = FALSE;
+
+
+typedef struct sync_level_struct sync_level_t;
+typedef struct sync_thread_struct sync_thread_t;
+
+#ifdef UNIV_SYNC_DEBUG
+/* The latch levels currently owned by threads are stored in this data
+structure; the size of this array is OS_THREAD_MAX_N */
+
+UNIV_INTERN sync_thread_t* sync_thread_level_arrays;
+
+/* Mutex protecting sync_thread_level_arrays */
+UNIV_INTERN mutex_t sync_thread_mutex;
+#endif /* UNIV_SYNC_DEBUG */
+
+/* Global list of database mutexes (not OS mutexes) created. */
+UNIV_INTERN ut_list_base_node_t mutex_list;
+
+/* Mutex protecting the mutex_list variable */
+UNIV_INTERN mutex_t mutex_list_mutex;
+
+#ifdef UNIV_SYNC_DEBUG
+/* Latching order checks start when this is set TRUE */
+UNIV_INTERN ibool sync_order_checks_on = FALSE;
+#endif /* UNIV_SYNC_DEBUG */
+
+struct sync_thread_struct{
+ os_thread_id_t id; /* OS thread id */
+ sync_level_t* levels; /* level array for this thread; if this is NULL
+ this slot is unused */
+};
+
+/* Number of slots reserved for each OS thread in the sync level array */
+#define SYNC_THREAD_N_LEVELS 10000
+
+struct sync_level_struct{
+ void* latch; /* pointer to a mutex or an rw-lock; NULL means that
+ the slot is empty */
+ ulint level; /* level of the latch in the latching order */
+};
+
+/**********************************************************************
+Creates, or rather, initializes a mutex object in a specified memory
+location (which must be appropriately aligned). The mutex is initialized
+in the reset state. Explicit freeing of the mutex with mutex_free is
+necessary only if the memory block containing it is freed. */
+UNIV_INTERN
+void
+mutex_create_func(
+/*==============*/
+ mutex_t* mutex, /* in: pointer to memory */
+#ifdef UNIV_DEBUG
+ const char* cmutex_name, /* in: mutex name */
+# ifdef UNIV_SYNC_DEBUG
+ ulint level, /* in: level */
+# endif /* UNIV_SYNC_DEBUG */
+#endif /* UNIV_DEBUG */
+ const char* cfile_name, /* in: file name where created */
+ ulint cline) /* in: file line where created */
+{
+#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
+ mutex_reset_lock_word(mutex);
+#elif defined(HAVE_GCC_ATOMIC_BUILTINS)
+ mutex_reset_lock_word(mutex);
+#else
+ os_fast_mutex_init(&(mutex->os_fast_mutex));
+ mutex->lock_word = 0;
+#endif
+ mutex->event = os_event_create(NULL);
+ mutex_set_waiters(mutex, 0);
+#ifdef UNIV_DEBUG
+ mutex->magic_n = MUTEX_MAGIC_N;
+#endif /* UNIV_DEBUG */
+#ifdef UNIV_SYNC_DEBUG
+ mutex->line = 0;
+ mutex->file_name = "not yet reserved";
+ mutex->level = level;
+#endif /* UNIV_SYNC_DEBUG */
+ mutex->cfile_name = cfile_name;
+ mutex->cline = cline;
+#ifndef UNIV_HOTBACKUP
+ mutex->count_os_wait = 0;
+# ifdef UNIV_DEBUG
+ mutex->cmutex_name= cmutex_name;
+ mutex->count_using= 0;
+ mutex->mutex_type= 0;
+ mutex->lspent_time= 0;
+ mutex->lmax_spent_time= 0;
+ mutex->count_spin_loop= 0;
+ mutex->count_spin_rounds= 0;
+ mutex->count_os_yield= 0;
+# endif /* UNIV_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
+
+ /* Check that lock_word is aligned; this is important on Intel */
+ ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0);
+
+ /* NOTE! The very first mutexes are not put to the mutex list */
+
+ if ((mutex == &mutex_list_mutex)
+#ifdef UNIV_SYNC_DEBUG
+ || (mutex == &sync_thread_mutex)
+#endif /* UNIV_SYNC_DEBUG */
+ ) {
+
+ return;
+ }
+
+ mutex_enter(&mutex_list_mutex);
+
+ ut_ad(UT_LIST_GET_LEN(mutex_list) == 0
+ || UT_LIST_GET_FIRST(mutex_list)->magic_n == MUTEX_MAGIC_N);
+
+ UT_LIST_ADD_FIRST(list, mutex_list, mutex);
+
+ mutex_exit(&mutex_list_mutex);
+}
+
+/**********************************************************************
+Calling this function is obligatory only if the memory buffer containing
+the mutex is freed. Removes a mutex object from the mutex list. The mutex
+is checked to be in the reset state. */
+UNIV_INTERN
+void
+mutex_free(
+/*=======*/
+ mutex_t* mutex) /* in: mutex */
+{
+ ut_ad(mutex_validate(mutex));
+ ut_a(mutex_get_lock_word(mutex) == 0);
+ ut_a(mutex_get_waiters(mutex) == 0);
+
+ if (mutex != &mutex_list_mutex
+#ifdef UNIV_SYNC_DEBUG
+ && mutex != &sync_thread_mutex
+#endif /* UNIV_SYNC_DEBUG */
+ ) {
+
+ mutex_enter(&mutex_list_mutex);
+
+ ut_ad(!UT_LIST_GET_PREV(list, mutex)
+ || UT_LIST_GET_PREV(list, mutex)->magic_n
+ == MUTEX_MAGIC_N);
+ ut_ad(!UT_LIST_GET_NEXT(list, mutex)
+ || UT_LIST_GET_NEXT(list, mutex)->magic_n
+ == MUTEX_MAGIC_N);
+
+ UT_LIST_REMOVE(list, mutex_list, mutex);
+
+ mutex_exit(&mutex_list_mutex);
+ }
+
+ os_event_free(mutex->event);
+
+#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
+#elif defined(HAVE_GCC_ATOMIC_BUILTINS)
+#else
+ os_fast_mutex_free(&(mutex->os_fast_mutex));
+#endif
+ /* If we free the mutex protecting the mutex list (freeing is
+ not necessary), we have to reset the magic number AFTER removing
+ it from the list. */
+#ifdef UNIV_DEBUG
+ mutex->magic_n = 0;
+#endif /* UNIV_DEBUG */
+}
+
+/************************************************************************
+NOTE! Use the corresponding macro in the header file, not this function
+directly. Tries to lock the mutex for the current thread. If the lock is not
+acquired immediately, returns with return value 1. */
+UNIV_INTERN
+ulint
+mutex_enter_nowait_func(
+/*====================*/
+ /* out: 0 if succeed, 1 if not */
+ mutex_t* mutex, /* in: pointer to mutex */
+ const char* file_name __attribute__((unused)),
+ /* in: file name where mutex
+ requested */
+ ulint line __attribute__((unused)))
+ /* in: line where requested */
+{
+ ut_ad(mutex_validate(mutex));
+
+ if (!mutex_test_and_set(mutex)) {
+
+ ut_d(mutex->thread_id = os_thread_get_curr_id());
+#ifdef UNIV_SYNC_DEBUG
+ mutex_set_debug_info(mutex, file_name, line);
+#endif
+
+ return(0); /* Succeeded! */
+ }
+
+ return(1);
+}
+
+#ifdef UNIV_DEBUG
+/**********************************************************************
+Checks that the mutex has been initialized. */
+UNIV_INTERN
+ibool
+mutex_validate(
+/*===========*/
+ const mutex_t* mutex)
+{
+ ut_a(mutex);
+ ut_a(mutex->magic_n == MUTEX_MAGIC_N);
+
+ return(TRUE);
+}
+
+/**********************************************************************
+Checks that the current thread owns the mutex. Works only in the debug
+version. */
+UNIV_INTERN
+ibool
+mutex_own(
+/*======*/
+ /* out: TRUE if owns */
+ const mutex_t* mutex) /* in: mutex */
+{
+ ut_ad(mutex_validate(mutex));
+
+ return(mutex_get_lock_word(mutex) == 1
+ && os_thread_eq(mutex->thread_id, os_thread_get_curr_id()));
+}
+#endif /* UNIV_DEBUG */
+
+/**********************************************************************
+Sets the waiters field in a mutex. */
+UNIV_INTERN
+void
+mutex_set_waiters(
+/*==============*/
+ mutex_t* mutex, /* in: mutex */
+ ulint n) /* in: value to set */
+{
+ volatile ulint* ptr; /* declared volatile to ensure that
+ the value is stored to memory */
+ ut_ad(mutex);
+
+ ptr = &(mutex->waiters);
+
+ *ptr = n; /* Here we assume that the write of a single
+ word in memory is atomic */
+}
+
+/**********************************************************************
+Reserves a mutex for the current thread. If the mutex is reserved, the
+function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+for the mutex before suspending the thread. */
+UNIV_INTERN
+void
+mutex_spin_wait(
+/*============*/
+ mutex_t* mutex, /* in: pointer to mutex */
+ const char* file_name, /* in: file name where mutex
+ requested */
+ ulint line) /* in: line where requested */
+{
+ ulint index; /* index of the reserved wait cell */
+ ulint i; /* spin round count */
+#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
+ ib_int64_t lstart_time = 0, lfinish_time; /* for timing os_wait */
+ ulint ltime_diff;
+ ulint sec;
+ ulint ms;
+ uint timer_started = 0;
+#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+ ut_ad(mutex);
+
+ /* This update is not thread safe, but we don't mind if the count
+ isn't exact. Moved out of ifdef that follows because we are willing
+ to sacrifice the cost of counting this as the data is valuable.
+ Count the number of calls to mutex_spin_wait. */
+ mutex_spin_wait_count++;
+
+mutex_loop:
+
+ i = 0;
+
+ /* Spin waiting for the lock word to become zero. Note that we do
+ not have to assume that the read access to the lock word is atomic,
+ as the actual locking is always committed with atomic test-and-set.
+ In reality, however, all processors probably have an atomic read of
+ a memory word. */
+
+spin_loop:
+#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
+ mutex->count_spin_loop++;
+#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+
+ while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) {
+ if (srv_spin_wait_delay) {
+ ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
+ }
+
+ i++;
+ }
+
+ if (i == SYNC_SPIN_ROUNDS) {
+#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
+ mutex->count_os_yield++;
+ if (timed_mutexes == 1 && timer_started==0) {
+ ut_usectime(&sec, &ms);
+ lstart_time= (ib_int64_t)sec * 1000000 + ms;
+ timer_started = 1;
+ }
+#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+ os_thread_yield();
+ }
+
+#ifdef UNIV_SRV_PRINT_LATCH_WAITS
+ fprintf(stderr,
+ "Thread %lu spin wait mutex at %p"
+ " cfile %s cline %lu rnds %lu\n",
+ (ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
+ mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
+#endif
+
+ mutex_spin_round_count += i;
+
+#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
+ mutex->count_spin_rounds += i;
+#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+
+ if (mutex_test_and_set(mutex) == 0) {
+ /* Succeeded! */
+
+ ut_d(mutex->thread_id = os_thread_get_curr_id());
+#ifdef UNIV_SYNC_DEBUG
+ mutex_set_debug_info(mutex, file_name, line);
+#endif
+
+ goto finish_timing;
+ }
+
+ /* We may end up with a situation where lock_word is 0 but the OS
+ fast mutex is still reserved. On FreeBSD the OS does not seem to
+ schedule a thread which is constantly calling pthread_mutex_trylock
+ (in mutex_test_and_set implementation). Then we could end up
+ spinning here indefinitely. The following 'i++' stops this infinite
+ spin. */
+
+ i++;
+
+ if (i < SYNC_SPIN_ROUNDS) {
+ goto spin_loop;
+ }
+
+ sync_array_reserve_cell(sync_primary_wait_array, mutex,
+ SYNC_MUTEX, file_name, line, &index);
+
+ /* The memory order of the array reservation and the change in the
+ waiters field is important: when we suspend a thread, we first
+ reserve the cell and then set waiters field to 1. When threads are
+ released in mutex_exit, the waiters field is first set to zero and
+ then the event is set to the signaled state. */
+
+ mutex_set_waiters(mutex, 1);
+
+ /* Try to reserve still a few times */
+ for (i = 0; i < 4; i++) {
+ if (mutex_test_and_set(mutex) == 0) {
+ /* Succeeded! Free the reserved wait cell */
+
+ sync_array_free_cell(sync_primary_wait_array, index);
+
+ ut_d(mutex->thread_id = os_thread_get_curr_id());
+#ifdef UNIV_SYNC_DEBUG
+ mutex_set_debug_info(mutex, file_name, line);
+#endif
+
+#ifdef UNIV_SRV_PRINT_LATCH_WAITS
+ fprintf(stderr, "Thread %lu spin wait succeeds at 2:"
+ " mutex at %p\n",
+ (ulong) os_thread_pf(os_thread_get_curr_id()),
+ (void*) mutex);
+#endif
+
+ goto finish_timing;
+
+ /* Note that in this case we leave the waiters field
+ set to 1. We cannot reset it to zero, as we do not
+ know if there are other waiters. */
+ }
+ }
+
+ /* Now we know that there has been some thread holding the mutex
+ after the change in the wait array and the waiters field was made.
+ Now there is no risk of infinite wait on the event. */
+
+#ifdef UNIV_SRV_PRINT_LATCH_WAITS
+ fprintf(stderr,
+ "Thread %lu OS wait mutex at %p cfile %s cline %lu rnds %lu\n",
+ (ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex,
+ mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
+#endif
+
+ mutex_os_wait_count++;
+
+#ifndef UNIV_HOTBACKUP
+ mutex->count_os_wait++;
+# ifdef UNIV_DEBUG
+ /* !!!!! Sometimes os_wait can be called without os_thread_yield */
+
+ if (timed_mutexes == 1 && timer_started==0) {
+ ut_usectime(&sec, &ms);
+ lstart_time= (ib_int64_t)sec * 1000000 + ms;
+ timer_started = 1;
+ }
+# endif /* UNIV_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
+
+ sync_array_wait_event(sync_primary_wait_array, index);
+ goto mutex_loop;
+
+finish_timing:
+#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
+ if (timed_mutexes == 1 && timer_started==1) {
+ ut_usectime(&sec, &ms);
+ lfinish_time= (ib_int64_t)sec * 1000000 + ms;
+
+ ltime_diff= (ulint) (lfinish_time - lstart_time);
+ mutex->lspent_time += ltime_diff;
+
+ if (mutex->lmax_spent_time < ltime_diff) {
+ mutex->lmax_spent_time= ltime_diff;
+ }
+ }
+#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
+ return;
+}
+
+/**********************************************************************
+Releases the threads waiting in the primary wait array for this mutex. */
+UNIV_INTERN
+void
+mutex_signal_object(
+/*================*/
+ mutex_t* mutex) /* in: mutex */
+{
+ mutex_set_waiters(mutex, 0);
+
+ /* The memory order of resetting the waiters field and
+ signaling the object is important. See LEMMA 1 above. */
+ os_event_set(mutex->event);
+ sync_array_object_signalled(sync_primary_wait_array);
+}
+
+#ifdef UNIV_SYNC_DEBUG
+/**********************************************************************
+Sets the debug information for a reserved mutex. */
+UNIV_INTERN
+void
+mutex_set_debug_info(
+/*=================*/
+ mutex_t* mutex, /* in: mutex */
+ const char* file_name, /* in: file where requested */
+ ulint line) /* in: line where requested */
+{
+ ut_ad(mutex);
+ ut_ad(file_name);
+
+ sync_thread_add_level(mutex, mutex->level);
+
+ mutex->file_name = file_name;
+ mutex->line = line;
+}
+
+/**********************************************************************
+Gets the debug information for a reserved mutex. */
+UNIV_INTERN
+void
+mutex_get_debug_info(
+/*=================*/
+ mutex_t* mutex, /* in: mutex */
+ const char** file_name, /* out: file where requested */
+ ulint* line, /* out: line where requested */
+ os_thread_id_t* thread_id) /* out: id of the thread which owns
+ the mutex */
+{
+ ut_ad(mutex);
+
+ *file_name = mutex->file_name;
+ *line = mutex->line;
+ *thread_id = mutex->thread_id;
+}
+
+/**********************************************************************
+Prints debug info of currently reserved mutexes. */
+static
+void
+mutex_list_print_info(
+/*==================*/
+ FILE* file) /* in: file where to print */
+{
+ mutex_t* mutex;
+ const char* file_name;
+ ulint line;
+ os_thread_id_t thread_id;
+ ulint count = 0;
+
+ fputs("----------\n"
+ "MUTEX INFO\n"
+ "----------\n", file);
+
+ mutex_enter(&mutex_list_mutex);
+
+ mutex = UT_LIST_GET_FIRST(mutex_list);
+
+ while (mutex != NULL) {
+ count++;
+
+ if (mutex_get_lock_word(mutex) != 0) {
+ mutex_get_debug_info(mutex, &file_name, &line,
+ &thread_id);
+ fprintf(file,
+ "Locked mutex: addr %p thread %ld"
+ " file %s line %ld\n",
+ (void*) mutex, os_thread_pf(thread_id),
+ file_name, line);
+ }
+
+ mutex = UT_LIST_GET_NEXT(list, mutex);
+ }
+
+ fprintf(file, "Total number of mutexes %ld\n", count);
+
+ mutex_exit(&mutex_list_mutex);
+}
+
+/**********************************************************************
+Counts currently reserved mutexes. Works only in the debug version. */
+UNIV_INTERN
+ulint
+mutex_n_reserved(void)
+/*==================*/
+{
+ mutex_t* mutex;
+ ulint count = 0;
+
+ mutex_enter(&mutex_list_mutex);
+
+ mutex = UT_LIST_GET_FIRST(mutex_list);
+
+ while (mutex != NULL) {
+ if (mutex_get_lock_word(mutex) != 0) {
+
+ count++;
+ }
+
+ mutex = UT_LIST_GET_NEXT(list, mutex);
+ }
+
+ mutex_exit(&mutex_list_mutex);
+
+ ut_a(count >= 1);
+
+ return(count - 1); /* Subtract one, because this function itself
+ was holding one mutex (mutex_list_mutex) */
+}
+
+/**********************************************************************
+Returns TRUE if no mutex or rw-lock is currently locked. Works only in
+the debug version. */
+UNIV_INTERN
+ibool
+sync_all_freed(void)
+/*================*/
+{
+ return(mutex_n_reserved() + rw_lock_n_locked() == 0);
+}
+
+/**********************************************************************
+Gets the value in the nth slot in the thread level arrays. */
+static
+sync_thread_t*
+sync_thread_level_arrays_get_nth(
+/*=============================*/
+ /* out: pointer to thread slot */
+ ulint n) /* in: slot number */
+{
+ ut_ad(n < OS_THREAD_MAX_N);
+
+ return(sync_thread_level_arrays + n);
+}
+
+/**********************************************************************
+Looks for the thread slot for the calling thread. */
+static
+sync_thread_t*
+sync_thread_level_arrays_find_slot(void)
+/*====================================*/
+ /* out: pointer to thread slot, NULL if not found */
+
+{
+ sync_thread_t* slot;
+ os_thread_id_t id;
+ ulint i;
+
+ id = os_thread_get_curr_id();
+
+ for (i = 0; i < OS_THREAD_MAX_N; i++) {
+
+ slot = sync_thread_level_arrays_get_nth(i);
+
+ if (slot->levels && os_thread_eq(slot->id, id)) {
+
+ return(slot);
+ }
+ }
+
+ return(NULL);
+}
+
+/**********************************************************************
+Looks for an unused thread slot. */
+static
+sync_thread_t*
+sync_thread_level_arrays_find_free(void)
+/*====================================*/
+ /* out: pointer to thread slot */
+
+{
+ sync_thread_t* slot;
+ ulint i;
+
+ for (i = 0; i < OS_THREAD_MAX_N; i++) {
+
+ slot = sync_thread_level_arrays_get_nth(i);
+
+ if (slot->levels == NULL) {
+
+ return(slot);
+ }
+ }
+
+ return(NULL);
+}
+
+/**********************************************************************
+Gets the value in the nth slot in the thread level array. */
+static
+sync_level_t*
+sync_thread_levels_get_nth(
+/*=======================*/
+ /* out: pointer to level slot */
+ sync_level_t* arr, /* in: pointer to level array for an OS
+ thread */
+ ulint n) /* in: slot number */
+{
+ ut_ad(n < SYNC_THREAD_N_LEVELS);
+
+ return(arr + n);
+}
+
+/**********************************************************************
+Checks if all the level values stored in the level array are greater than
+the given limit. */
+static
+ibool
+sync_thread_levels_g(
+/*=================*/
+ /* out: TRUE if all greater */
+ sync_level_t* arr, /* in: pointer to level array for an OS
+ thread */
+ ulint limit) /* in: level limit */
+{
+ sync_level_t* slot;
+ rw_lock_t* lock;
+ mutex_t* mutex;
+ ulint i;
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(arr, i);
+
+ if (slot->latch != NULL) {
+ if (slot->level <= limit) {
+
+ lock = slot->latch;
+ mutex = slot->latch;
+
+ fprintf(stderr,
+ "InnoDB: sync levels should be"
+ " > %lu but a level is %lu\n",
+ (ulong) limit, (ulong) slot->level);
+
+ if (mutex->magic_n == MUTEX_MAGIC_N) {
+ fprintf(stderr,
+ "Mutex created at %s %lu\n",
+ mutex->cfile_name,
+ (ulong) mutex->cline);
+
+ if (mutex_get_lock_word(mutex) != 0) {
+ const char* file_name;
+ ulint line;
+ os_thread_id_t thread_id;
+
+ mutex_get_debug_info(
+ mutex, &file_name,
+ &line, &thread_id);
+
+ fprintf(stderr,
+ "InnoDB: Locked mutex:"
+ " addr %p thread %ld"
+ " file %s line %ld\n",
+ (void*) mutex,
+ os_thread_pf(
+ thread_id),
+ file_name,
+ (ulong) line);
+ } else {
+ fputs("Not locked\n", stderr);
+ }
+ } else {
+ rw_lock_print(lock);
+ }
+
+ return(FALSE);
+ }
+ }
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************************
+Checks if the level value is stored in the level array. */
+static
+ibool
+sync_thread_levels_contain(
+/*=======================*/
+ /* out: TRUE if stored */
+ sync_level_t* arr, /* in: pointer to level array for an OS
+ thread */
+ ulint level) /* in: level */
+{
+ sync_level_t* slot;
+ ulint i;
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(arr, i);
+
+ if (slot->latch != NULL) {
+ if (slot->level == level) {
+
+ return(TRUE);
+ }
+ }
+ }
+
+ return(FALSE);
+}
+
+/**********************************************************************
+Checks that the level array for the current thread is empty. */
+UNIV_INTERN
+ibool
+sync_thread_levels_empty_gen(
+/*=========================*/
+ /* out: TRUE if empty except the
+ exceptions specified below */
+ ibool dict_mutex_allowed) /* in: TRUE if dictionary mutex is
+ allowed to be owned by the thread,
+ also purge_is_running mutex is
+ allowed */
+{
+ sync_level_t* arr;
+ sync_thread_t* thread_slot;
+ sync_level_t* slot;
+ ulint i;
+
+ if (!sync_order_checks_on) {
+
+ return(TRUE);
+ }
+
+ mutex_enter(&sync_thread_mutex);
+
+ thread_slot = sync_thread_level_arrays_find_slot();
+
+ if (thread_slot == NULL) {
+
+ mutex_exit(&sync_thread_mutex);
+
+ return(TRUE);
+ }
+
+ arr = thread_slot->levels;
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(arr, i);
+
+ if (slot->latch != NULL
+ && (!dict_mutex_allowed
+ || (slot->level != SYNC_DICT
+ && slot->level != SYNC_DICT_OPERATION))) {
+
+ mutex_exit(&sync_thread_mutex);
+ ut_error;
+
+ return(FALSE);
+ }
+ }
+
+ mutex_exit(&sync_thread_mutex);
+
+ return(TRUE);
+}
+
+/**********************************************************************
+Checks that the level array for the current thread is empty. */
+UNIV_INTERN
+ibool
+sync_thread_levels_empty(void)
+/*==========================*/
+ /* out: TRUE if empty */
+{
+ return(sync_thread_levels_empty_gen(FALSE));
+}
+
+/**********************************************************************
+Adds a latch and its level in the thread level array. Allocates the memory
+for the array if called first time for this OS thread. Makes the checks
+against other latch levels stored in the array for this thread. */
+UNIV_INTERN
+void
+sync_thread_add_level(
+/*==================*/
+ void* latch, /* in: pointer to a mutex or an rw-lock */
+ ulint level) /* in: level in the latching order; if
+ SYNC_LEVEL_VARYING, nothing is done */
+{
+ sync_level_t* array;
+ sync_level_t* slot;
+ sync_thread_t* thread_slot;
+ ulint i;
+
+ if (!sync_order_checks_on) {
+
+ return;
+ }
+
+ if ((latch == (void*)&sync_thread_mutex)
+ || (latch == (void*)&mutex_list_mutex)
+ || (latch == (void*)&rw_lock_debug_mutex)
+ || (latch == (void*)&rw_lock_list_mutex)) {
+
+ return;
+ }
+
+ if (level == SYNC_LEVEL_VARYING) {
+
+ return;
+ }
+
+ mutex_enter(&sync_thread_mutex);
+
+ thread_slot = sync_thread_level_arrays_find_slot();
+
+ if (thread_slot == NULL) {
+ /* We have to allocate the level array for a new thread */
+ array = ut_malloc(sizeof(sync_level_t) * SYNC_THREAD_N_LEVELS);
+
+ thread_slot = sync_thread_level_arrays_find_free();
+
+ thread_slot->id = os_thread_get_curr_id();
+ thread_slot->levels = array;
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(array, i);
+
+ slot->latch = NULL;
+ }
+ }
+
+ array = thread_slot->levels;
+
+ /* NOTE that there is a problem with _NODE and _LEAF levels: if the
+ B-tree height changes, then a leaf can change to an internal node
+ or the other way around. We do not know at present if this can cause
+ unnecessary assertion failures below. */
+
+ switch (level) {
+ case SYNC_NO_ORDER_CHECK:
+ case SYNC_EXTERN_STORAGE:
+ case SYNC_TREE_NODE_FROM_HASH:
+ /* Do no order checking */
+ break;
+ case SYNC_MEM_POOL:
+ case SYNC_MEM_HASH:
+ case SYNC_RECV:
+ case SYNC_WORK_QUEUE:
+ case SYNC_LOG:
+ case SYNC_THR_LOCAL:
+ case SYNC_ANY_LATCH:
+ case SYNC_TRX_SYS_HEADER:
+ case SYNC_FILE_FORMAT_TAG:
+ case SYNC_DOUBLEWRITE:
+ case SYNC_BUF_POOL:
+ case SYNC_SEARCH_SYS:
+ case SYNC_SEARCH_SYS_CONF:
+ case SYNC_TRX_LOCK_HEAP:
+ case SYNC_KERNEL:
+ case SYNC_IBUF_BITMAP_MUTEX:
+ case SYNC_RSEG:
+ case SYNC_TRX_UNDO:
+ case SYNC_PURGE_LATCH:
+ case SYNC_PURGE_SYS:
+ case SYNC_DICT_AUTOINC_MUTEX:
+ case SYNC_DICT_OPERATION:
+ case SYNC_DICT_HEADER:
+ case SYNC_TRX_I_S_RWLOCK:
+ case SYNC_TRX_I_S_LAST_READ:
+ if (!sync_thread_levels_g(array, level)) {
+ fprintf(stderr,
+ "InnoDB: sync_thread_levels_g(array, %lu)"
+ " does not hold!\n", level);
+ ut_error;
+ }
+ break;
+ case SYNC_BUF_BLOCK:
+ /* Either the thread must own the buffer pool mutex
+ (buf_pool_mutex), or it is allowed to latch only ONE
+ buffer block (block->mutex or buf_pool_zip_mutex). */
+ ut_a((sync_thread_levels_contain(array, SYNC_BUF_POOL)
+ && sync_thread_levels_g(array, SYNC_BUF_BLOCK - 1))
+ || sync_thread_levels_g(array, SYNC_BUF_BLOCK));
+ break;
+ case SYNC_REC_LOCK:
+ ut_a((sync_thread_levels_contain(array, SYNC_KERNEL)
+ && sync_thread_levels_g(array, SYNC_REC_LOCK - 1))
+ || sync_thread_levels_g(array, SYNC_REC_LOCK));
+ break;
+ case SYNC_IBUF_BITMAP:
+ /* Either the thread must own the master mutex to all
+ the bitmap pages, or it is allowed to latch only ONE
+ bitmap page. */
+ ut_a((sync_thread_levels_contain(array, SYNC_IBUF_BITMAP_MUTEX)
+ && sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1))
+ || sync_thread_levels_g(array, SYNC_IBUF_BITMAP));
+ break;
+ case SYNC_FSP_PAGE:
+ ut_a(sync_thread_levels_contain(array, SYNC_FSP));
+ break;
+ case SYNC_FSP:
+ ut_a(sync_thread_levels_contain(array, SYNC_FSP)
+ || sync_thread_levels_g(array, SYNC_FSP));
+ break;
+ case SYNC_TRX_UNDO_PAGE:
+ ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
+ || sync_thread_levels_contain(array, SYNC_RSEG)
+ || sync_thread_levels_contain(array, SYNC_PURGE_SYS)
+ || sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE));
+ break;
+ case SYNC_RSEG_HEADER:
+ ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
+ break;
+ case SYNC_RSEG_HEADER_NEW:
+ ut_a(sync_thread_levels_contain(array, SYNC_KERNEL)
+ && sync_thread_levels_contain(array, SYNC_FSP_PAGE));
+ break;
+ case SYNC_TREE_NODE:
+ ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
+ || sync_thread_levels_contain(array, SYNC_DICT_OPERATION)
+ || sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
+ break;
+ case SYNC_TREE_NODE_NEW:
+ ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE)
+ || sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
+ break;
+ case SYNC_INDEX_TREE:
+ ut_a((sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
+ && sync_thread_levels_contain(array, SYNC_FSP)
+ && sync_thread_levels_g(array, SYNC_FSP_PAGE - 1))
+ || sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
+ break;
+ case SYNC_IBUF_MUTEX:
+ ut_a(sync_thread_levels_g(array, SYNC_FSP_PAGE - 1));
+ break;
+ case SYNC_IBUF_PESS_INSERT_MUTEX:
+ ut_a(sync_thread_levels_g(array, SYNC_FSP - 1)
+ && !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
+ break;
+ case SYNC_IBUF_HEADER:
+ ut_a(sync_thread_levels_g(array, SYNC_FSP - 1)
+ && !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
+ && !sync_thread_levels_contain(
+ array, SYNC_IBUF_PESS_INSERT_MUTEX));
+ break;
+ case SYNC_DICT:
+#ifdef UNIV_DEBUG
+ ut_a(buf_debug_prints
+ || sync_thread_levels_g(array, SYNC_DICT));
+#else /* UNIV_DEBUG */
+ ut_a(sync_thread_levels_g(array, SYNC_DICT));
+#endif /* UNIV_DEBUG */
+ break;
+ default:
+ ut_error;
+ }
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(array, i);
+
+ if (slot->latch == NULL) {
+ slot->latch = latch;
+ slot->level = level;
+
+ break;
+ }
+ }
+
+ ut_a(i < SYNC_THREAD_N_LEVELS);
+
+ mutex_exit(&sync_thread_mutex);
+}
+
+/**********************************************************************
+Removes a latch from the thread level array if it is found there. */
+UNIV_INTERN
+ibool
+sync_thread_reset_level(
+/*====================*/
+ /* out: TRUE if found from the array; it is an error
+ if the latch is not found */
+ void* latch) /* in: pointer to a mutex or an rw-lock */
+{
+ sync_level_t* array;
+ sync_level_t* slot;
+ sync_thread_t* thread_slot;
+ ulint i;
+
+ if (!sync_order_checks_on) {
+
+ return(FALSE);
+ }
+
+ if ((latch == (void*)&sync_thread_mutex)
+ || (latch == (void*)&mutex_list_mutex)
+ || (latch == (void*)&rw_lock_debug_mutex)
+ || (latch == (void*)&rw_lock_list_mutex)) {
+
+ return(FALSE);
+ }
+
+ mutex_enter(&sync_thread_mutex);
+
+ thread_slot = sync_thread_level_arrays_find_slot();
+
+ if (thread_slot == NULL) {
+
+ ut_error;
+
+ mutex_exit(&sync_thread_mutex);
+ return(FALSE);
+ }
+
+ array = thread_slot->levels;
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(array, i);
+
+ if (slot->latch == latch) {
+ slot->latch = NULL;
+
+ mutex_exit(&sync_thread_mutex);
+
+ return(TRUE);
+ }
+ }
+
+ if (((mutex_t*) latch)->magic_n != MUTEX_MAGIC_N) {
+ rw_lock_t* rw_lock;
+
+ rw_lock = (rw_lock_t*) latch;
+
+ if (rw_lock->level == SYNC_LEVEL_VARYING) {
+ mutex_exit(&sync_thread_mutex);
+
+ return(TRUE);
+ }
+ }
+
+ ut_error;
+
+ mutex_exit(&sync_thread_mutex);
+
+ return(FALSE);
+}
+#endif /* UNIV_SYNC_DEBUG */
+
+/**********************************************************************
+Initializes the synchronization data structures. */
+UNIV_INTERN
+void
+sync_init(void)
+/*===========*/
+{
+#ifdef UNIV_SYNC_DEBUG
+ sync_thread_t* thread_slot;
+ ulint i;
+#endif /* UNIV_SYNC_DEBUG */
+
+ ut_a(sync_initialized == FALSE);
+
+ sync_initialized = TRUE;
+
+ /* Create the primary system wait array which is protected by an OS
+ mutex */
+
+ sync_primary_wait_array = sync_array_create(OS_THREAD_MAX_N,
+ SYNC_ARRAY_OS_MUTEX);
+#ifdef UNIV_SYNC_DEBUG
+ /* Create the thread latch level array where the latch levels
+ are stored for each OS thread */
+
+ sync_thread_level_arrays = ut_malloc(OS_THREAD_MAX_N
+ * sizeof(sync_thread_t));
+ for (i = 0; i < OS_THREAD_MAX_N; i++) {
+
+ thread_slot = sync_thread_level_arrays_get_nth(i);
+ thread_slot->levels = NULL;
+ }
+#endif /* UNIV_SYNC_DEBUG */
+ /* Init the mutex list and create the mutex to protect it. */
+
+ UT_LIST_INIT(mutex_list);
+ mutex_create(&mutex_list_mutex, SYNC_NO_ORDER_CHECK);
+#ifdef UNIV_SYNC_DEBUG
+ mutex_create(&sync_thread_mutex, SYNC_NO_ORDER_CHECK);
+#endif /* UNIV_SYNC_DEBUG */
+
+ /* Init the rw-lock list and create the mutex to protect it. */
+
+ UT_LIST_INIT(rw_lock_list);
+ mutex_create(&rw_lock_list_mutex, SYNC_NO_ORDER_CHECK);
+
+#ifdef UNIV_SYNC_DEBUG
+ mutex_create(&rw_lock_debug_mutex, SYNC_NO_ORDER_CHECK);
+
+ rw_lock_debug_event = os_event_create(NULL);
+ rw_lock_debug_waiters = FALSE;
+#endif /* UNIV_SYNC_DEBUG */
+}
+
+/**********************************************************************
+Frees the resources in InnoDB's own synchronization data structures. Use
+os_sync_free() after calling this. */
+UNIV_INTERN
+void
+sync_close(void)
+/*===========*/
+{
+ mutex_t* mutex;
+
+ sync_array_free(sync_primary_wait_array);
+
+ mutex = UT_LIST_GET_FIRST(mutex_list);
+
+ while (mutex) {
+ mutex_free(mutex);
+ mutex = UT_LIST_GET_FIRST(mutex_list);
+ }
+
+ mutex_free(&mutex_list_mutex);
+#ifdef UNIV_SYNC_DEBUG
+ mutex_free(&sync_thread_mutex);
+#endif /* UNIV_SYNC_DEBUG */
+}
+
+/***********************************************************************
+Prints wait info of the sync system. */
+UNIV_INTERN
+void
+sync_print_wait_info(
+/*=================*/
+ FILE* file) /* in: file where to print */
+{
+#ifdef UNIV_SYNC_DEBUG
+ fprintf(file, "Mutex exits %llu, rws exits %llu, rwx exits %llu\n",
+ mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
+#endif
+
+ fprintf(file,
+ "Mutex spin waits %llu, rounds %llu, OS waits %llu\n"
+ "RW-shared spins %llu, OS waits %llu;"
+ " RW-excl spins %llu, OS waits %llu\n",
+ mutex_spin_wait_count,
+ mutex_spin_round_count,
+ mutex_os_wait_count,
+ rw_s_spin_wait_count,
+ rw_s_os_wait_count,
+ rw_x_spin_wait_count,
+ rw_x_os_wait_count);
+
+ fprintf(file,
+ "Spin rounds per wait: %.2f mutex, %.2f RW-shared, "
+ "%.2f RW-excl\n",
+ (double) mutex_spin_round_count /
+ (mutex_spin_wait_count ? mutex_spin_wait_count : 1),
+ (double) rw_s_spin_round_count /
+ (rw_s_spin_wait_count ? rw_s_spin_wait_count : 1),
+ (double) rw_x_spin_round_count /
+ (rw_x_spin_wait_count ? rw_x_spin_wait_count : 1));
+}
+
+/***********************************************************************
+Prints info of the sync system. */
+UNIV_INTERN
+void
+sync_print(
+/*=======*/
+ FILE* file) /* in: file where to print */
+{
+#ifdef UNIV_SYNC_DEBUG
+ mutex_list_print_info(file);
+
+ rw_lock_list_print_info(file);
+#endif /* UNIV_SYNC_DEBUG */
+
+ sync_array_print_info(file, sync_primary_wait_array);
+
+ sync_print_wait_info(file);
+}