summaryrefslogtreecommitdiff
path: root/storage/innobase/sync/sync0sync.c
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/sync/sync0sync.c')
-rw-r--r--storage/innobase/sync/sync0sync.c1346
1 files changed, 1346 insertions, 0 deletions
diff --git a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
new file mode 100644
index 00000000000..e604912e996
--- /dev/null
+++ b/storage/innobase/sync/sync0sync.c
@@ -0,0 +1,1346 @@
+/******************************************************
+Mutex, the basic synchronization primitive
+
+(c) 1995 Innobase Oy
+
+Created 9/5/1995 Heikki Tuuri
+*******************************************************/
+
+#include "sync0sync.h"
+#ifdef UNIV_NONINL
+#include "sync0sync.ic"
+#endif
+
+#include "sync0rw.h"
+#include "buf0buf.h"
+#include "srv0srv.h"
+#include "buf0types.h"
+
+/*
+ REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX
+ ============================================
+
+Semaphore operations in operating systems are slow: Solaris on a 1993 Sparc
+takes 3 microseconds (us) for a lock-unlock pair and Windows NT on a 1995
+Pentium takes 20 microseconds for a lock-unlock pair. Therefore, we have to
+implement our own efficient spin lock mutex. Future operating systems may
+provide efficient spin locks, but we cannot count on that.
+
+Another reason for implementing a spin lock is that on multiprocessor systems
+it can be more efficient for a processor to run a loop waiting for the
+semaphore to be released than to switch to a different thread. A thread switch
+takes 25 us on both platforms mentioned above. See Gray and Reuter's book
+Transaction processing for background.
+
+How long should the spin loop last before suspending the thread? On a
+uniprocessor, spinning does not help at all, because if the thread owning the
+mutex is not executing, it cannot be released. Spinning actually wastes
+resources.
+
+On a multiprocessor, we do not know if the thread owning the mutex is
+executing or not. Thus it would make sense to spin as long as the operation
+guarded by the mutex would typically last assuming that the thread is
+executing. If the mutex is not released by that time, we may assume that the
+thread owning the mutex is not executing and suspend the waiting thread.
+
+A typical operation (where no i/o involved) guarded by a mutex or a read-write
+lock may last 1 - 20 us on the current Pentium platform. The longest
+operations are the binary searches on an index node.
+
+We conclude that the best choice is to set the spin time at 20 us. Then the
+system should work well on a multiprocessor. On a uniprocessor we have to
+make sure that thread swithches due to mutex collisions are not frequent,
+i.e., they do not happen every 100 us or so, because that wastes too much
+resources. If the thread switches are not frequent, the 20 us wasted in spin
+loop is not too much.
+
+Empirical studies on the effect of spin time should be done for different
+platforms.
+
+
+ IMPLEMENTATION OF THE MUTEX
+ ===========================
+
+For background, see Curt Schimmel's book on Unix implementation on modern
+architectures. The key points in the implementation are atomicity and
+serialization of memory accesses. The test-and-set instruction (XCHG in
+Pentium) must be atomic. As new processors may have weak memory models, also
+serialization of memory references may be necessary. The successor of Pentium,
+P6, has at least one mode where the memory model is weak. As far as we know,
+in Pentium all memory accesses are serialized in the program order and we do
+not have to worry about the memory model. On other processors there are
+special machine instructions called a fence, memory barrier, or storage
+barrier (STBAR in Sparc), which can be used to serialize the memory accesses
+to happen in program order relative to the fence instruction.
+
+Leslie Lamport has devised a "bakery algorithm" to implement a mutex without
+the atomic test-and-set, but his algorithm should be modified for weak memory
+models. We do not use Lamport's algorithm, because we guess it is slower than
+the atomic test-and-set.
+
+Our mutex implementation works as follows: After that we perform the atomic
+test-and-set instruction on the memory word. If the test returns zero, we
+know we got the lock first. If the test returns not zero, some other thread
+was quicker and got the lock: then we spin in a loop reading the memory word,
+waiting it to become zero. It is wise to just read the word in the loop, not
+perform numerous test-and-set instructions, because they generate memory
+traffic between the cache and the main memory. The read loop can just access
+the cache, saving bus bandwidth.
+
+If we cannot acquire the mutex lock in the specified time, we reserve a cell
+in the wait array, set the waiters byte in the mutex to 1. To avoid a race
+condition, after setting the waiters byte and before suspending the waiting
+thread, we still have to check that the mutex is reserved, because it may
+have happened that the thread which was holding the mutex has just released
+it and did not see the waiters byte set to 1, a case which would lead the
+other thread to an infinite wait.
+
+LEMMA 1: After a thread resets the event of the cell it reserves for waiting
+========
+for a mutex, some thread will eventually call sync_array_signal_object with
+the mutex as an argument. Thus no infinite wait is possible.
+
+Proof: After making the reservation the thread sets the waiters field in the
+mutex to 1. Then it checks that the mutex is still reserved by some thread,
+or it reserves the mutex for itself. In any case, some thread (which may be
+also some earlier thread, not necessarily the one currently holding the mutex)
+will set the waiters field to 0 in mutex_exit, and then call
+sync_array_signal_object with the mutex as an argument.
+Q.E.D. */
+
+ulint sync_dummy = 0;
+
+/* The number of system calls made in this module. Intended for performance
+monitoring. */
+
+ulint mutex_system_call_count = 0;
+
+/* Number of spin waits on mutexes: for performance monitoring */
+
+ulint mutex_spin_round_count = 0;
+ulint mutex_spin_wait_count = 0;
+ulint mutex_os_wait_count = 0;
+ulint mutex_exit_count = 0;
+
+/* The global array of wait cells for implementation of the database's own
+mutexes and read-write locks */
+sync_array_t* sync_primary_wait_array;
+
+/* This variable is set to TRUE when sync_init is called */
+ibool sync_initialized = FALSE;
+
+
+typedef struct sync_level_struct sync_level_t;
+typedef struct sync_thread_struct sync_thread_t;
+
+/* The latch levels currently owned by threads are stored in this data
+structure; the size of this array is OS_THREAD_MAX_N */
+
+sync_thread_t* sync_thread_level_arrays;
+
+/* Mutex protecting sync_thread_level_arrays */
+mutex_t sync_thread_mutex;
+
+/* Latching order checks start when this is set TRUE */
+ibool sync_order_checks_on = FALSE;
+
+/* Dummy mutex used to implement mutex_fence */
+mutex_t dummy_mutex_for_fence;
+
+struct sync_thread_struct{
+ os_thread_id_t id; /* OS thread id */
+ sync_level_t* levels; /* level array for this thread; if this is NULL
+ this slot is unused */
+};
+
+/* Number of slots reserved for each OS thread in the sync level array */
+#define SYNC_THREAD_N_LEVELS 10000
+
+struct sync_level_struct{
+ void* latch; /* pointer to a mutex or an rw-lock; NULL means that
+ the slot is empty */
+ ulint level; /* level of the latch in the latching order */
+};
+
+/**********************************************************************
+A noninlined function that reserves a mutex. In ha_innodb.cc we have disabled
+inlining of InnoDB functions, and no inlined functions should be called from
+there. That is why we need to duplicate the inlined function here. */
+
+void
+mutex_enter_noninline(
+/*==================*/
+ mutex_t* mutex) /* in: mutex */
+{
+ mutex_enter(mutex);
+}
+
+/**********************************************************************
+Releases a mutex. */
+
+void
+mutex_exit_noninline(
+/*=================*/
+ mutex_t* mutex) /* in: mutex */
+{
+ mutex_exit(mutex);
+}
+
+/**********************************************************************
+Creates, or rather, initializes a mutex object in a specified memory
+location (which must be appropriately aligned). The mutex is initialized
+in the reset state. Explicit freeing of the mutex with mutex_free is
+necessary only if the memory block containing it is freed. */
+
+void
+mutex_create_func(
+/*==============*/
+ mutex_t* mutex, /* in: pointer to memory */
+ const char* cfile_name, /* in: file name where created */
+ ulint cline, /* in: file line where created */
+ const char* cmutex_name) /* in: mutex name */
+{
+#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
+ mutex_reset_lock_word(mutex);
+#else
+ os_fast_mutex_init(&(mutex->os_fast_mutex));
+ mutex->lock_word = 0;
+#endif
+ mutex_set_waiters(mutex, 0);
+ mutex->magic_n = MUTEX_MAGIC_N;
+#ifdef UNIV_SYNC_DEBUG
+ mutex->line = 0;
+ mutex->file_name = "not yet reserved";
+#endif /* UNIV_SYNC_DEBUG */
+ mutex->level = SYNC_LEVEL_NONE;
+ mutex->cfile_name = cfile_name;
+ mutex->cline = cline;
+#ifndef UNIV_HOTBACKUP
+ mutex->cmutex_name= cmutex_name;
+ mutex->count_using= 0;
+ mutex->mutex_type= 0;
+ mutex->lspent_time= 0;
+ mutex->lmax_spent_time= 0;
+ mutex->count_spin_loop= 0;
+ mutex->count_spin_rounds= 0;
+ mutex->count_os_wait= 0;
+ mutex->count_os_yield= 0;
+#endif /* !UNIV_HOTBACKUP */
+
+ /* Check that lock_word is aligned; this is important on Intel */
+ ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0);
+
+ /* NOTE! The very first mutexes are not put to the mutex list */
+
+ if ((mutex == &mutex_list_mutex) || (mutex == &sync_thread_mutex)) {
+
+ return;
+ }
+
+ mutex_enter(&mutex_list_mutex);
+
+ if (UT_LIST_GET_LEN(mutex_list) > 0) {
+ ut_a(UT_LIST_GET_FIRST(mutex_list)->magic_n == MUTEX_MAGIC_N);
+ }
+
+ UT_LIST_ADD_FIRST(list, mutex_list, mutex);
+
+ mutex_exit(&mutex_list_mutex);
+}
+
+/**********************************************************************
+Calling this function is obligatory only if the memory buffer containing
+the mutex is freed. Removes a mutex object from the mutex list. The mutex
+is checked to be in the reset state. */
+
+void
+mutex_free(
+/*=======*/
+ mutex_t* mutex) /* in: mutex */
+{
+#ifdef UNIV_DEBUG
+ ut_a(mutex_validate(mutex));
+#endif /* UNIV_DEBUG */
+ ut_a(mutex_get_lock_word(mutex) == 0);
+ ut_a(mutex_get_waiters(mutex) == 0);
+
+ if (mutex != &mutex_list_mutex && mutex != &sync_thread_mutex) {
+
+ mutex_enter(&mutex_list_mutex);
+
+ if (UT_LIST_GET_PREV(list, mutex)) {
+ ut_a(UT_LIST_GET_PREV(list, mutex)->magic_n
+ == MUTEX_MAGIC_N);
+ }
+ if (UT_LIST_GET_NEXT(list, mutex)) {
+ ut_a(UT_LIST_GET_NEXT(list, mutex)->magic_n
+ == MUTEX_MAGIC_N);
+ }
+
+ UT_LIST_REMOVE(list, mutex_list, mutex);
+
+ mutex_exit(&mutex_list_mutex);
+ }
+
+#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
+ os_fast_mutex_free(&(mutex->os_fast_mutex));
+#endif
+ /* If we free the mutex protecting the mutex list (freeing is
+ not necessary), we have to reset the magic number AFTER removing
+ it from the list. */
+
+ mutex->magic_n = 0;
+}
+
+/************************************************************************
+Tries to lock the mutex for the current thread. If the lock is not acquired
+immediately, returns with return value 1. */
+
+ulint
+mutex_enter_nowait(
+/*===============*/
+ /* out: 0 if succeed, 1 if not */
+ mutex_t* mutex, /* in: pointer to mutex */
+ const char* file_name __attribute__((unused)),
+ /* in: file name where mutex
+ requested */
+ ulint line __attribute__((unused)))
+ /* in: line where requested */
+{
+ ut_ad(mutex_validate(mutex));
+
+ if (!mutex_test_and_set(mutex)) {
+
+#ifdef UNIV_SYNC_DEBUG
+ mutex_set_debug_info(mutex, file_name, line);
+#endif
+
+ return(0); /* Succeeded! */
+ }
+
+ return(1);
+}
+
+/**********************************************************************
+Checks that the mutex has been initialized. */
+
+ibool
+mutex_validate(
+/*===========*/
+ mutex_t* mutex)
+{
+ ut_a(mutex);
+ ut_a(mutex->magic_n == MUTEX_MAGIC_N);
+
+ return(TRUE);
+}
+
+/**********************************************************************
+Sets the waiters field in a mutex. */
+
+void
+mutex_set_waiters(
+/*==============*/
+ mutex_t* mutex, /* in: mutex */
+ ulint n) /* in: value to set */
+{
+volatile ulint* ptr; /* declared volatile to ensure that
+ the value is stored to memory */
+ ut_ad(mutex);
+
+ ptr = &(mutex->waiters);
+
+ *ptr = n; /* Here we assume that the write of a single
+ word in memory is atomic */
+}
+
+/**********************************************************************
+Reserves a mutex for the current thread. If the mutex is reserved, the
+function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting
+for the mutex before suspending the thread. */
+
+void
+mutex_spin_wait(
+/*============*/
+ mutex_t* mutex, /* in: pointer to mutex */
+ const char* file_name, /* in: file name where
+ mutex requested */
+ ulint line) /* in: line where requested */
+{
+ ulint index; /* index of the reserved wait cell */
+ ulint i; /* spin round count */
+#ifndef UNIV_HOTBACKUP
+ ib_longlong lstart_time = 0, lfinish_time; /* for timing os_wait */
+ ulint ltime_diff;
+ ulint sec;
+ ulint ms;
+ uint timer_started = 0;
+#endif /* !UNIV_HOTBACKUP */
+ ut_ad(mutex);
+
+mutex_loop:
+
+ i = 0;
+
+/* Spin waiting for the lock word to become zero. Note that we do not
+ have to assume that the read access to the lock word is atomic, as the
+ actual locking is always committed with atomic test-and-set. In
+ reality, however, all processors probably have an atomic read of a
+ memory word. */
+
+spin_loop:
+#ifndef UNIV_HOTBACKUP
+ mutex_spin_wait_count++;
+ mutex->count_spin_loop++;
+#endif /* !UNIV_HOTBACKUP */
+
+ while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS)
+ {
+ if (srv_spin_wait_delay)
+ {
+ ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
+ }
+
+ i++;
+ }
+
+ if (i == SYNC_SPIN_ROUNDS)
+ {
+#ifndef UNIV_HOTBACKUP
+ mutex->count_os_yield++;
+ if (timed_mutexes == 1 && timer_started==0)
+ {
+ ut_usectime(&sec, &ms);
+ lstart_time= (ib_longlong)sec * 1000000 + ms;
+ timer_started = 1;
+ }
+#endif /* !UNIV_HOTBACKUP */
+ os_thread_yield();
+ }
+
+#ifdef UNIV_SRV_PRINT_LATCH_WAITS
+ fprintf(stderr,
+ "Thread %lu spin wait mutex at %p cfile %s cline %lu rnds %lu\n",
+ (ulong) os_thread_pf(os_thread_get_curr_id()), mutex,
+ mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
+#endif
+
+ mutex_spin_round_count += i;
+
+#ifndef UNIV_HOTBACKUP
+ mutex->count_spin_rounds += i;
+#endif /* !UNIV_HOTBACKUP */
+
+ if (mutex_test_and_set(mutex) == 0)
+ {
+ /* Succeeded! */
+
+#ifdef UNIV_SYNC_DEBUG
+ mutex_set_debug_info(mutex, file_name, line);
+#endif
+
+ goto finish_timing;
+ }
+
+ /* We may end up with a situation where lock_word is
+ 0 but the OS fast mutex is still reserved. On FreeBSD
+ the OS does not seem to schedule a thread which is constantly
+ calling pthread_mutex_trylock (in mutex_test_and_set
+ implementation). Then we could end up spinning here indefinitely.
+ The following 'i++' stops this infinite spin. */
+
+ i++;
+
+ if (i < SYNC_SPIN_ROUNDS)
+ {
+ goto spin_loop;
+ }
+
+ sync_array_reserve_cell(sync_primary_wait_array, mutex,
+ SYNC_MUTEX, file_name, line, &index);
+
+ mutex_system_call_count++;
+
+ /* The memory order of the array reservation and the change in the
+ waiters field is important: when we suspend a thread, we first
+ reserve the cell and then set waiters field to 1. When threads are
+ released in mutex_exit, the waiters field is first set to zero and
+ then the event is set to the signaled state. */
+
+ mutex_set_waiters(mutex, 1);
+
+ /* Try to reserve still a few times */
+ for (i = 0; i < 4; i++)
+ {
+ if (mutex_test_and_set(mutex) == 0)
+ {
+ /* Succeeded! Free the reserved wait cell */
+
+ sync_array_free_cell(sync_primary_wait_array, index);
+
+#ifdef UNIV_SYNC_DEBUG
+ mutex_set_debug_info(mutex, file_name, line);
+#endif
+
+#ifdef UNIV_SRV_PRINT_LATCH_WAITS
+ fprintf(stderr, "Thread %lu spin wait succeeds at 2:"
+ " mutex at %p\n",
+ (ulong) os_thread_pf(os_thread_get_curr_id()),
+ mutex);
+#endif
+
+ goto finish_timing;
+
+ /* Note that in this case we leave the waiters field
+ set to 1. We cannot reset it to zero, as we do not know
+ if there are other waiters. */
+ }
+ }
+
+ /* Now we know that there has been some thread holding the mutex
+ after the change in the wait array and the waiters field was made.
+Now there is no risk of infinite wait on the event. */
+
+#ifdef UNIV_SRV_PRINT_LATCH_WAITS
+ fprintf(stderr,
+ "Thread %lu OS wait mutex at %p cfile %s cline %lu rnds %lu\n",
+ (ulong) os_thread_pf(os_thread_get_curr_id()), mutex,
+ mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
+#endif
+
+ mutex_system_call_count++;
+ mutex_os_wait_count++;
+
+#ifndef UNIV_HOTBACKUP
+ mutex->count_os_wait++;
+ /*
+ !!!!! Sometimes os_wait can be called without os_thread_yield
+ */
+
+ if (timed_mutexes == 1 && timer_started==0)
+ {
+ ut_usectime(&sec, &ms);
+ lstart_time= (ib_longlong)sec * 1000000 + ms;
+ timer_started = 1;
+ }
+#endif /* !UNIV_HOTBACKUP */
+
+ sync_array_wait_event(sync_primary_wait_array, index);
+ goto mutex_loop;
+
+finish_timing:
+#ifndef UNIV_HOTBACKUP
+ if (timed_mutexes == 1 && timer_started==1)
+ {
+ ut_usectime(&sec, &ms);
+ lfinish_time= (ib_longlong)sec * 1000000 + ms;
+
+ ltime_diff= (ulint) (lfinish_time - lstart_time);
+ mutex->lspent_time += ltime_diff;
+ if (mutex->lmax_spent_time < ltime_diff)
+ {
+ mutex->lmax_spent_time= ltime_diff;
+ }
+ }
+#endif /* !UNIV_HOTBACKUP */
+ return;
+}
+
+/**********************************************************************
+Releases the threads waiting in the primary wait array for this mutex. */
+
+void
+mutex_signal_object(
+/*================*/
+ mutex_t* mutex) /* in: mutex */
+{
+ mutex_set_waiters(mutex, 0);
+
+ /* The memory order of resetting the waiters field and
+ signaling the object is important. See LEMMA 1 above. */
+
+ sync_array_signal_object(sync_primary_wait_array, mutex);
+}
+
+#ifdef UNIV_SYNC_DEBUG
+/**********************************************************************
+Sets the debug information for a reserved mutex. */
+
+void
+mutex_set_debug_info(
+/*=================*/
+ mutex_t* mutex, /* in: mutex */
+ const char* file_name, /* in: file where requested */
+ ulint line) /* in: line where requested */
+{
+ ut_ad(mutex);
+ ut_ad(file_name);
+
+ sync_thread_add_level(mutex, mutex->level);
+
+ mutex->file_name = file_name;
+ mutex->line = line;
+ mutex->thread_id = os_thread_get_curr_id();
+}
+
+/**********************************************************************
+Gets the debug information for a reserved mutex. */
+
+void
+mutex_get_debug_info(
+/*=================*/
+ mutex_t* mutex, /* in: mutex */
+ const char** file_name, /* out: file where requested */
+ ulint* line, /* out: line where requested */
+ os_thread_id_t* thread_id) /* out: id of the thread which owns
+ the mutex */
+{
+ ut_ad(mutex);
+
+ *file_name = mutex->file_name;
+ *line = mutex->line;
+ *thread_id = mutex->thread_id;
+}
+#endif /* UNIV_SYNC_DEBUG */
+
+/**********************************************************************
+Sets the mutex latching level field. */
+
+void
+mutex_set_level(
+/*============*/
+ mutex_t* mutex, /* in: mutex */
+ ulint level) /* in: level */
+{
+ mutex->level = level;
+}
+
+
+#ifdef UNIV_SYNC_DEBUG
+/**********************************************************************
+Checks that the current thread owns the mutex. Works only in the debug
+version. */
+
+ibool
+mutex_own(
+/*======*/
+ /* out: TRUE if owns */
+ mutex_t* mutex) /* in: mutex */
+{
+ ut_a(mutex_validate(mutex));
+
+ if (mutex_get_lock_word(mutex) != 1) {
+
+ return(FALSE);
+ }
+
+ if (!os_thread_eq(mutex->thread_id, os_thread_get_curr_id())) {
+
+ return(FALSE);
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************************
+Prints debug info of currently reserved mutexes. */
+
+void
+mutex_list_print_info(void)
+/*=======================*/
+{
+ mutex_t* mutex;
+ const char* file_name;
+ ulint line;
+ os_thread_id_t thread_id;
+ ulint count = 0;
+
+ fputs("----------\n"
+ "MUTEX INFO\n"
+ "----------\n", stderr);
+
+ mutex_enter(&mutex_list_mutex);
+
+ mutex = UT_LIST_GET_FIRST(mutex_list);
+
+ while (mutex != NULL) {
+ count++;
+
+ if (mutex_get_lock_word(mutex) != 0) {
+ mutex_get_debug_info(mutex, &file_name, &line,
+ &thread_id);
+ fprintf(stderr,
+ "Locked mutex: addr %p thread %ld file %s line %ld\n",
+ mutex, os_thread_pf(thread_id),
+ file_name, line);
+ }
+
+ mutex = UT_LIST_GET_NEXT(list, mutex);
+ }
+
+ fprintf(stderr, "Total number of mutexes %ld\n", count);
+
+ mutex_exit(&mutex_list_mutex);
+}
+
+/**********************************************************************
+Counts currently reserved mutexes. Works only in the debug version. */
+
+ulint
+mutex_n_reserved(void)
+/*==================*/
+{
+ mutex_t* mutex;
+ ulint count = 0;
+
+ mutex_enter(&mutex_list_mutex);
+
+ mutex = UT_LIST_GET_FIRST(mutex_list);
+
+ while (mutex != NULL) {
+ if (mutex_get_lock_word(mutex) != 0) {
+
+ count++;
+ }
+
+ mutex = UT_LIST_GET_NEXT(list, mutex);
+ }
+
+ mutex_exit(&mutex_list_mutex);
+
+ ut_a(count >= 1);
+
+ return(count - 1); /* Subtract one, because this function itself
+ was holding one mutex (mutex_list_mutex) */
+}
+
+/**********************************************************************
+Returns TRUE if no mutex or rw-lock is currently locked. Works only in
+the debug version. */
+
+ibool
+sync_all_freed(void)
+/*================*/
+{
+ return(mutex_n_reserved() + rw_lock_n_locked() == 0);
+}
+#endif /* UNIV_SYNC_DEBUG */
+
+/**********************************************************************
+Gets the value in the nth slot in the thread level arrays. */
+static
+sync_thread_t*
+sync_thread_level_arrays_get_nth(
+/*=============================*/
+ /* out: pointer to thread slot */
+ ulint n) /* in: slot number */
+{
+ ut_ad(n < OS_THREAD_MAX_N);
+
+ return(sync_thread_level_arrays + n);
+}
+
+/**********************************************************************
+Looks for the thread slot for the calling thread. */
+static
+sync_thread_t*
+sync_thread_level_arrays_find_slot(void)
+/*====================================*/
+ /* out: pointer to thread slot, NULL if not found */
+
+{
+ sync_thread_t* slot;
+ os_thread_id_t id;
+ ulint i;
+
+ id = os_thread_get_curr_id();
+
+ for (i = 0; i < OS_THREAD_MAX_N; i++) {
+
+ slot = sync_thread_level_arrays_get_nth(i);
+
+ if (slot->levels && os_thread_eq(slot->id, id)) {
+
+ return(slot);
+ }
+ }
+
+ return(NULL);
+}
+
+/**********************************************************************
+Looks for an unused thread slot. */
+static
+sync_thread_t*
+sync_thread_level_arrays_find_free(void)
+/*====================================*/
+ /* out: pointer to thread slot */
+
+{
+ sync_thread_t* slot;
+ ulint i;
+
+ for (i = 0; i < OS_THREAD_MAX_N; i++) {
+
+ slot = sync_thread_level_arrays_get_nth(i);
+
+ if (slot->levels == NULL) {
+
+ return(slot);
+ }
+ }
+
+ return(NULL);
+}
+
+/**********************************************************************
+Gets the value in the nth slot in the thread level array. */
+static
+sync_level_t*
+sync_thread_levels_get_nth(
+/*=======================*/
+ /* out: pointer to level slot */
+ sync_level_t* arr, /* in: pointer to level array for an OS
+ thread */
+ ulint n) /* in: slot number */
+{
+ ut_ad(n < SYNC_THREAD_N_LEVELS);
+
+ return(arr + n);
+}
+
+/**********************************************************************
+Checks if all the level values stored in the level array are greater than
+the given limit. */
+static
+ibool
+sync_thread_levels_g(
+/*=================*/
+ /* out: TRUE if all greater */
+ sync_level_t* arr, /* in: pointer to level array for an OS
+ thread */
+ ulint limit) /* in: level limit */
+{
+ sync_level_t* slot;
+ rw_lock_t* lock;
+ mutex_t* mutex;
+ ulint i;
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(arr, i);
+
+ if (slot->latch != NULL) {
+ if (slot->level <= limit) {
+
+ lock = slot->latch;
+ mutex = slot->latch;
+
+ fprintf(stderr,
+ "InnoDB error: sync levels should be > %lu but a level is %lu\n",
+ (ulong) limit, (ulong) slot->level);
+
+ if (mutex->magic_n == MUTEX_MAGIC_N) {
+ fprintf(stderr,
+ "Mutex created at %s %lu\n",
+ mutex->cfile_name,
+ (ulong) mutex->cline);
+
+ if (mutex_get_lock_word(mutex) != 0) {
+#ifdef UNIV_SYNC_DEBUG
+ const char* file_name;
+ ulint line;
+ os_thread_id_t thread_id;
+
+ mutex_get_debug_info(mutex,
+ &file_name, &line, &thread_id);
+
+ fprintf(stderr,
+ "InnoDB: Locked mutex: addr %p thread %ld file %s line %ld\n",
+ mutex, os_thread_pf(thread_id), file_name, (ulong) line);
+#else /* UNIV_SYNC_DEBUG */
+ fprintf(stderr,
+ "InnoDB: Locked mutex: addr %p\n", mutex);
+#endif /* UNIV_SYNC_DEBUG */
+ } else {
+ fputs("Not locked\n", stderr);
+ }
+ } else {
+#ifdef UNIV_SYNC_DEBUG
+ rw_lock_print(lock);
+#endif /* UNIV_SYNC_DEBUG */
+ }
+
+ return(FALSE);
+ }
+ }
+ }
+
+ return(TRUE);
+}
+
+/**********************************************************************
+Checks if the level value is stored in the level array. */
+static
+ibool
+sync_thread_levels_contain(
+/*=======================*/
+ /* out: TRUE if stored */
+ sync_level_t* arr, /* in: pointer to level array for an OS
+ thread */
+ ulint level) /* in: level */
+{
+ sync_level_t* slot;
+ ulint i;
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(arr, i);
+
+ if (slot->latch != NULL) {
+ if (slot->level == level) {
+
+ return(TRUE);
+ }
+ }
+ }
+
+ return(FALSE);
+}
+
+/**********************************************************************
+Checks that the level array for the current thread is empty. */
+
+ibool
+sync_thread_levels_empty_gen(
+/*=========================*/
+ /* out: TRUE if empty except the
+ exceptions specified below */
+ ibool dict_mutex_allowed) /* in: TRUE if dictionary mutex is
+ allowed to be owned by the thread,
+ also purge_is_running mutex is
+ allowed */
+{
+ sync_level_t* arr;
+ sync_thread_t* thread_slot;
+ sync_level_t* slot;
+ ulint i;
+
+ if (!sync_order_checks_on) {
+
+ return(TRUE);
+ }
+
+ mutex_enter(&sync_thread_mutex);
+
+ thread_slot = sync_thread_level_arrays_find_slot();
+
+ if (thread_slot == NULL) {
+
+ mutex_exit(&sync_thread_mutex);
+
+ return(TRUE);
+ }
+
+ arr = thread_slot->levels;
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(arr, i);
+
+ if (slot->latch != NULL && (!dict_mutex_allowed ||
+ (slot->level != SYNC_DICT
+ && slot->level != SYNC_DICT_OPERATION))) {
+
+ mutex_exit(&sync_thread_mutex);
+ ut_error;
+
+ return(FALSE);
+ }
+ }
+
+ mutex_exit(&sync_thread_mutex);
+
+ return(TRUE);
+}
+
+/**********************************************************************
+Checks that the level array for the current thread is empty. */
+
+ibool
+sync_thread_levels_empty(void)
+/*==========================*/
+ /* out: TRUE if empty */
+{
+ return(sync_thread_levels_empty_gen(FALSE));
+}
+
+/**********************************************************************
+Adds a latch and its level in the thread level array. Allocates the memory
+for the array if called first time for this OS thread. Makes the checks
+against other latch levels stored in the array for this thread. */
+
+void
+sync_thread_add_level(
+/*==================*/
+ void* latch, /* in: pointer to a mutex or an rw-lock */
+ ulint level) /* in: level in the latching order; if SYNC_LEVEL_NONE,
+ nothing is done */
+{
+ sync_level_t* array;
+ sync_level_t* slot;
+ sync_thread_t* thread_slot;
+ ulint i;
+
+ if (!sync_order_checks_on) {
+
+ return;
+ }
+
+ if ((latch == (void*)&sync_thread_mutex)
+ || (latch == (void*)&mutex_list_mutex)
+#ifdef UNIV_SYNC_DEBUG
+ || (latch == (void*)&rw_lock_debug_mutex)
+#endif /* UNIV_SYNC_DEBUG */
+ || (latch == (void*)&rw_lock_list_mutex)) {
+
+ return;
+ }
+
+ if (level == SYNC_LEVEL_NONE) {
+
+ return;
+ }
+
+ mutex_enter(&sync_thread_mutex);
+
+ thread_slot = sync_thread_level_arrays_find_slot();
+
+ if (thread_slot == NULL) {
+ /* We have to allocate the level array for a new thread */
+ array = ut_malloc(sizeof(sync_level_t) * SYNC_THREAD_N_LEVELS);
+
+ thread_slot = sync_thread_level_arrays_find_free();
+
+ thread_slot->id = os_thread_get_curr_id();
+ thread_slot->levels = array;
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(array, i);
+
+ slot->latch = NULL;
+ }
+ }
+
+ array = thread_slot->levels;
+
+ /* NOTE that there is a problem with _NODE and _LEAF levels: if the
+ B-tree height changes, then a leaf can change to an internal node
+ or the other way around. We do not know at present if this can cause
+ unnecessary assertion failures below. */
+
+ if (level == SYNC_NO_ORDER_CHECK) {
+ /* Do no order checking */
+
+ } else if (level == SYNC_MEM_POOL) {
+ ut_a(sync_thread_levels_g(array, SYNC_MEM_POOL));
+ } else if (level == SYNC_MEM_HASH) {
+ ut_a(sync_thread_levels_g(array, SYNC_MEM_HASH));
+ } else if (level == SYNC_RECV) {
+ ut_a(sync_thread_levels_g(array, SYNC_RECV));
+ } else if (level == SYNC_LOG) {
+ ut_a(sync_thread_levels_g(array, SYNC_LOG));
+ } else if (level == SYNC_THR_LOCAL) {
+ ut_a(sync_thread_levels_g(array, SYNC_THR_LOCAL));
+ } else if (level == SYNC_ANY_LATCH) {
+ ut_a(sync_thread_levels_g(array, SYNC_ANY_LATCH));
+ } else if (level == SYNC_TRX_SYS_HEADER) {
+ ut_a(sync_thread_levels_g(array, SYNC_TRX_SYS_HEADER));
+ } else if (level == SYNC_DOUBLEWRITE) {
+ ut_a(sync_thread_levels_g(array, SYNC_DOUBLEWRITE));
+ } else if (level == SYNC_BUF_BLOCK) {
+ ut_a((sync_thread_levels_contain(array, SYNC_BUF_POOL)
+ && sync_thread_levels_g(array, SYNC_BUF_BLOCK - 1))
+ || sync_thread_levels_g(array, SYNC_BUF_BLOCK));
+ } else if (level == SYNC_BUF_POOL) {
+ ut_a(sync_thread_levels_g(array, SYNC_BUF_POOL));
+ } else if (level == SYNC_SEARCH_SYS) {
+ ut_a(sync_thread_levels_g(array, SYNC_SEARCH_SYS));
+ } else if (level == SYNC_TRX_LOCK_HEAP) {
+ ut_a(sync_thread_levels_g(array, SYNC_TRX_LOCK_HEAP));
+ } else if (level == SYNC_REC_LOCK) {
+ ut_a((sync_thread_levels_contain(array, SYNC_KERNEL)
+ && sync_thread_levels_g(array, SYNC_REC_LOCK - 1))
+ || sync_thread_levels_g(array, SYNC_REC_LOCK));
+ } else if (level == SYNC_KERNEL) {
+ ut_a(sync_thread_levels_g(array, SYNC_KERNEL));
+ } else if (level == SYNC_IBUF_BITMAP) {
+ ut_a((sync_thread_levels_contain(array, SYNC_IBUF_BITMAP_MUTEX)
+ && sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1))
+ || sync_thread_levels_g(array, SYNC_IBUF_BITMAP));
+ } else if (level == SYNC_IBUF_BITMAP_MUTEX) {
+ ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP_MUTEX));
+ } else if (level == SYNC_FSP_PAGE) {
+ ut_a(sync_thread_levels_contain(array, SYNC_FSP));
+ } else if (level == SYNC_FSP) {
+ ut_a(sync_thread_levels_contain(array, SYNC_FSP)
+ || sync_thread_levels_g(array, SYNC_FSP));
+ } else if (level == SYNC_EXTERN_STORAGE) {
+ ut_a(TRUE);
+ } else if (level == SYNC_TRX_UNDO_PAGE) {
+ ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
+ || sync_thread_levels_contain(array, SYNC_RSEG)
+ || sync_thread_levels_contain(array, SYNC_PURGE_SYS)
+ || sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE));
+ } else if (level == SYNC_RSEG_HEADER) {
+ ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
+ } else if (level == SYNC_RSEG_HEADER_NEW) {
+ ut_a(sync_thread_levels_contain(array, SYNC_KERNEL)
+ && sync_thread_levels_contain(array, SYNC_FSP_PAGE));
+ } else if (level == SYNC_RSEG) {
+ ut_a(sync_thread_levels_g(array, SYNC_RSEG));
+ } else if (level == SYNC_TRX_UNDO) {
+ ut_a(sync_thread_levels_g(array, SYNC_TRX_UNDO));
+ } else if (level == SYNC_PURGE_LATCH) {
+ ut_a(sync_thread_levels_g(array, SYNC_PURGE_LATCH));
+ } else if (level == SYNC_PURGE_SYS) {
+ ut_a(sync_thread_levels_g(array, SYNC_PURGE_SYS));
+ } else if (level == SYNC_TREE_NODE) {
+ ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE)
+ || sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
+ } else if (level == SYNC_TREE_NODE_FROM_HASH) {
+ ut_a(1);
+ } else if (level == SYNC_TREE_NODE_NEW) {
+ ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE)
+ || sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
+ } else if (level == SYNC_INDEX_TREE) {
+ ut_a((sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
+ && sync_thread_levels_contain(array, SYNC_FSP)
+ && sync_thread_levels_g(array, SYNC_FSP_PAGE - 1))
+ || sync_thread_levels_g(array, SYNC_TREE_NODE - 1));
+ } else if (level == SYNC_IBUF_MUTEX) {
+ ut_a(sync_thread_levels_g(array, SYNC_FSP_PAGE - 1));
+ } else if (level == SYNC_IBUF_PESS_INSERT_MUTEX) {
+ ut_a(sync_thread_levels_g(array, SYNC_FSP - 1)
+ && !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX));
+ } else if (level == SYNC_IBUF_HEADER) {
+ ut_a(sync_thread_levels_g(array, SYNC_FSP - 1)
+ && !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)
+ && !sync_thread_levels_contain(array,
+ SYNC_IBUF_PESS_INSERT_MUTEX));
+ } else if (level == SYNC_DICT_AUTOINC_MUTEX) {
+ ut_a(sync_thread_levels_g(array, SYNC_DICT_AUTOINC_MUTEX));
+ } else if (level == SYNC_DICT_OPERATION) {
+ ut_a(sync_thread_levels_g(array, SYNC_DICT_OPERATION));
+ } else if (level == SYNC_DICT_HEADER) {
+ ut_a(sync_thread_levels_g(array, SYNC_DICT_HEADER));
+ } else if (level == SYNC_DICT) {
+ ut_a(buf_debug_prints
+ || sync_thread_levels_g(array, SYNC_DICT));
+ } else {
+ ut_error;
+ }
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(array, i);
+
+ if (slot->latch == NULL) {
+ slot->latch = latch;
+ slot->level = level;
+
+ break;
+ }
+ }
+
+ ut_a(i < SYNC_THREAD_N_LEVELS);
+
+ mutex_exit(&sync_thread_mutex);
+}
+
+/**********************************************************************
+Removes a latch from the thread level array if it is found there. */
+
+ibool
+sync_thread_reset_level(
+/*====================*/
+ /* out: TRUE if found from the array; it is an error
+ if the latch is not found */
+ void* latch) /* in: pointer to a mutex or an rw-lock */
+{
+ sync_level_t* array;
+ sync_level_t* slot;
+ sync_thread_t* thread_slot;
+ ulint i;
+
+ if (!sync_order_checks_on) {
+
+ return(FALSE);
+ }
+
+ if ((latch == (void*)&sync_thread_mutex)
+ || (latch == (void*)&mutex_list_mutex)
+#ifdef UNIV_SYNC_DEBUG
+ || (latch == (void*)&rw_lock_debug_mutex)
+#endif /* UNIV_SYNC_DEBUG */
+ || (latch == (void*)&rw_lock_list_mutex)) {
+
+ return(FALSE);
+ }
+
+ mutex_enter(&sync_thread_mutex);
+
+ thread_slot = sync_thread_level_arrays_find_slot();
+
+ if (thread_slot == NULL) {
+
+ ut_error;
+
+ mutex_exit(&sync_thread_mutex);
+ return(FALSE);
+ }
+
+ array = thread_slot->levels;
+
+ for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+ slot = sync_thread_levels_get_nth(array, i);
+
+ if (slot->latch == latch) {
+ slot->latch = NULL;
+
+ mutex_exit(&sync_thread_mutex);
+
+ return(TRUE);
+ }
+ }
+
+ ut_error;
+
+ mutex_exit(&sync_thread_mutex);
+
+ return(FALSE);
+}
+
+/**********************************************************************
+Initializes the synchronization data structures. */
+
+void
+sync_init(void)
+/*===========*/
+{
+ sync_thread_t* thread_slot;
+ ulint i;
+
+ ut_a(sync_initialized == FALSE);
+
+ sync_initialized = TRUE;
+
+ /* Create the primary system wait array which is protected by an OS
+ mutex */
+
+ sync_primary_wait_array = sync_array_create(OS_THREAD_MAX_N,
+ SYNC_ARRAY_OS_MUTEX);
+
+ /* Create the thread latch level array where the latch levels
+ are stored for each OS thread */
+
+ sync_thread_level_arrays = ut_malloc(OS_THREAD_MAX_N
+ * sizeof(sync_thread_t));
+ for (i = 0; i < OS_THREAD_MAX_N; i++) {
+
+ thread_slot = sync_thread_level_arrays_get_nth(i);
+ thread_slot->levels = NULL;
+ }
+
+ /* Init the mutex list and create the mutex to protect it. */
+
+ UT_LIST_INIT(mutex_list);
+ mutex_create(&mutex_list_mutex);
+ mutex_set_level(&mutex_list_mutex, SYNC_NO_ORDER_CHECK);
+
+ mutex_create(&sync_thread_mutex);
+ mutex_set_level(&sync_thread_mutex, SYNC_NO_ORDER_CHECK);
+
+ /* Init the rw-lock list and create the mutex to protect it. */
+
+ UT_LIST_INIT(rw_lock_list);
+ mutex_create(&rw_lock_list_mutex);
+ mutex_set_level(&rw_lock_list_mutex, SYNC_NO_ORDER_CHECK);
+
+#ifdef UNIV_SYNC_DEBUG
+ mutex_create(&rw_lock_debug_mutex);
+ mutex_set_level(&rw_lock_debug_mutex, SYNC_NO_ORDER_CHECK);
+
+ rw_lock_debug_event = os_event_create(NULL);
+ rw_lock_debug_waiters = FALSE;
+#endif /* UNIV_SYNC_DEBUG */
+}
+
+/**********************************************************************
+Frees the resources in InnoDB's own synchronization data structures. Use
+os_sync_free() after calling this. */
+
+void
+sync_close(void)
+/*===========*/
+{
+ mutex_t* mutex;
+
+ sync_array_free(sync_primary_wait_array);
+
+ mutex = UT_LIST_GET_FIRST(mutex_list);
+
+ while (mutex) {
+ mutex_free(mutex);
+ mutex = UT_LIST_GET_FIRST(mutex_list);
+ }
+
+ mutex_free(&mutex_list_mutex);
+ mutex_free(&sync_thread_mutex);
+}
+
+/***********************************************************************
+Prints wait info of the sync system. */
+
+void
+sync_print_wait_info(
+/*=================*/
+ FILE* file) /* in: file where to print */
+{
+#ifdef UNIV_SYNC_DEBUG
+ fprintf(stderr, "Mutex exits %lu, rws exits %lu, rwx exits %lu\n",
+ mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
+#endif
+
+ fprintf(file,
+"Mutex spin waits %lu, rounds %lu, OS waits %lu\n"
+"RW-shared spins %lu, OS waits %lu; RW-excl spins %lu, OS waits %lu\n",
+ (ulong) mutex_spin_wait_count,
+ (ulong) mutex_spin_round_count,
+ (ulong) mutex_os_wait_count,
+ (ulong) rw_s_spin_wait_count,
+ (ulong) rw_s_os_wait_count,
+ (ulong) rw_x_spin_wait_count,
+ (ulong) rw_x_os_wait_count);
+}
+
+/***********************************************************************
+Prints info of the sync system. */
+
+void
+sync_print(
+/*=======*/
+ FILE* file) /* in: file where to print */
+{
+#ifdef UNIV_SYNC_DEBUG
+ mutex_list_print_info();
+
+ rw_lock_list_print_info();
+#endif /* UNIV_SYNC_DEBUG */
+
+ sync_array_print_info(file, sync_primary_wait_array);
+
+ sync_print_wait_info(file);
+}