diff options
author | Marko Mäkelä <marko.makela@oracle.com> | 2011-04-05 10:37:58 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@oracle.com> | 2011-04-05 10:37:58 +0300 |
commit | 67496884aee65b324dc0082119e53e0f0f331266 (patch) | |
tree | c419190c13c36cbb11ad0ab30f35bba6c4edd8cf /storage | |
parent | 6fe99e2dda90ba8b579ca76217606c9f8a630916 (diff) | |
download | mariadb-git-67496884aee65b324dc0082119e53e0f0f331266.tar.gz |
Bug 12323643 - CLEAN UP THE INNODB THREAD SHUTDOWN AND ASSERTIONS (WL#5136)
On shutdown, do not exit threads in os_event_wait(). This method of
exiting was only used by the I/O handler threads. Exit them on a
higher level.
os_event_wait_low(), os_event_wait_time_low(): Do not exit on shutdown.
os_thread_exit(), ut_dbg_assertion_failed(), ut_print_timestamp(): Add
attribute cold, so that GCC knows that these functions are rarely
invoked and can be optimized for size.
os_aio_linux_collect(): Return on shutdown.
os_aio_linux_handle(), os_aio_simulated_handle(), os_aio_windows_handle():
Set *message1 = *message2 = NULL and return TRUE on shutdown.
fil_aio_wait(): Return on shutdown.
logs_empty_and_mark_files_at_shutdown(): Even in very fast shutdown
(innodb_fast_shutdown=2), allow the background threads to exit, but
skip the flushing and log checkpointing.
innobase_shutdown_for_mysql(): Always wait for all the threads to exit.
rb:633 approved by Sunny Bains
Diffstat (limited to 'storage')
-rw-r--r-- | storage/innobase/fil/fil0fil.c | 6 | ||||
-rw-r--r-- | storage/innobase/include/os0sync.h | 5 | ||||
-rw-r--r-- | storage/innobase/include/os0thread.h | 3 | ||||
-rw-r--r-- | storage/innobase/include/ut0dbg.h | 7 | ||||
-rw-r--r-- | storage/innobase/include/ut0ut.h | 3 | ||||
-rw-r--r-- | storage/innobase/log/log0log.c | 93 | ||||
-rw-r--r-- | storage/innobase/os/os0file.c | 205 | ||||
-rw-r--r-- | storage/innobase/os/os0sync.c | 52 | ||||
-rw-r--r-- | storage/innobase/srv/srv0srv.c | 4 | ||||
-rw-r--r-- | storage/innobase/srv/srv0start.c | 12 |
10 files changed, 185 insertions, 205 deletions
diff --git a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c index 6c50b853187..0d9846fdbf8 100644 --- a/storage/innobase/fil/fil0fil.c +++ b/storage/innobase/fil/fil0fil.c @@ -4527,8 +4527,8 @@ fil_aio_wait( ret = os_aio_linux_handle(segment, &fil_node, &message, &type); #else - ret = 0; /* Eliminate compiler warning */ ut_error; + ret = 0; /* Eliminate compiler warning */ #endif } else { srv_set_io_thread_op_info(segment, "simulated aio handle"); @@ -4538,6 +4538,10 @@ fil_aio_wait( } ut_a(ret); + if (UNIV_UNLIKELY(fil_node == NULL)) { + ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS); + return; + } srv_set_io_thread_op_info(segment, "complete io for fil node"); diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h index b294d7421c8..1b98f94f641 100644 --- a/storage/innobase/include/os0sync.h +++ b/storage/innobase/include/os0sync.h @@ -150,10 +150,7 @@ os_event_free( os_event_t event); /*!< in: event to free */ /**********************************************************//** -Waits for an event object until it is in the signaled state. If -srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the -waiting thread when the event becomes signaled (or immediately if the -event is already in the signaled state). +Waits for an event object until it is in the signaled state. Typically, if the event has been signalled after the os_event_reset() we'll return immediately because event->is_set == TRUE. diff --git a/storage/innobase/include/os0thread.h b/storage/innobase/include/os0thread.h index cc56e2158ee..3a4448b463e 100644 --- a/storage/innobase/include/os0thread.h +++ b/storage/innobase/include/os0thread.h @@ -107,8 +107,9 @@ UNIV_INTERN void os_thread_exit( /*===========*/ - void* exit_value); /*!< in: exit value; in Windows this void* + void* exit_value) /*!< in: exit value; in Windows this void* is cast as a DWORD */ + __attribute__((cold, noreturn)); /*****************************************************************//** Returns the thread identifier of current thread. @return current thread identifier */ diff --git a/storage/innobase/include/ut0dbg.h b/storage/innobase/include/ut0dbg.h index d7ec90db0fb..2cb208a4e9e 100644 --- a/storage/innobase/include/ut0dbg.h +++ b/storage/innobase/include/ut0dbg.h @@ -50,9 +50,10 @@ UNIV_INTERN void ut_dbg_assertion_failed( /*====================*/ - const char* expr, /*!< in: the failed assertion */ - const char* file, /*!< in: source file containing the assertion */ - ulint line); /*!< in: line number of the assertion */ + const char* expr, /*!< in: the failed assertion */ + const char* file, /*!< in: source file containing the assertion */ + ulint line) /*!< in: line number of the assertion */ + __attribute__((nonnull(2), cold)); #if defined(__WIN__) || defined(__INTEL_COMPILER) # undef UT_DBG_USE_ABORT diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h index cd5c7ca99f1..fe28a573631 100644 --- a/storage/innobase/include/ut0ut.h +++ b/storage/innobase/include/ut0ut.h @@ -275,7 +275,8 @@ UNIV_INTERN void ut_print_timestamp( /*===============*/ - FILE* file); /*!< in: file where to print */ + FILE* file) /*!< in: file where to print */ + __attribute__((nonnull, cold)); /**********************************************************//** Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */ UNIV_INTERN diff --git a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c index 3fef4ee4fc5..d638fe5cb6d 100644 --- a/storage/innobase/log/log0log.c +++ b/storage/innobase/log/log0log.c @@ -3078,6 +3078,7 @@ logs_empty_and_mark_files_at_shutdown(void) { ib_uint64_t lsn; ulint arch_log_no; + ibool server_busy; if (srv_print_verbose_log) { ut_print_timestamp(stderr); @@ -3092,14 +3093,12 @@ loop: mutex_enter(&kernel_mutex); - /* We need the monitor threads to stop before we proceed with a - normal shutdown. In case of very fast shutdown, however, we can - proceed without waiting for monitor threads. */ + /* We need the monitor threads to stop before we proceed with + a shutdown. */ - if (srv_fast_shutdown < 2 - && (srv_error_monitor_active - || srv_lock_timeout_active - || srv_monitor_active)) { + if (srv_error_monitor_active + || srv_lock_timeout_active + || srv_monitor_active) { mutex_exit(&kernel_mutex); @@ -3114,65 +3113,57 @@ loop: for the 'very fast' shutdown, because the InnoDB layer may have committed or prepared transactions and we don't want to lose them. */ - if (trx_n_mysql_transactions > 0 - || UT_LIST_GET_LEN(trx_sys->trx_list) > 0) { - - mutex_exit(&kernel_mutex); - - goto loop; - } - - if (srv_fast_shutdown == 2) { - /* In this fastest shutdown we do not flush the buffer pool: - it is essentially a 'crash' of the InnoDB server. Make sure - that the log is all flushed to disk, so that we can recover - all committed transactions in a crash recovery. We must not - write the lsn stamps to the data files, since at a startup - InnoDB deduces from the stamps if the previous shutdown was - clean. */ - - log_buffer_flush_to_disk(); - - mutex_exit(&kernel_mutex); - - return; /* We SKIP ALL THE REST !! */ - } - + server_busy = trx_n_mysql_transactions > 0 + || UT_LIST_GET_LEN(trx_sys->trx_list) > 0; mutex_exit(&kernel_mutex); - /* Check that the background threads are suspended */ - - if (srv_is_any_background_thread_active()) { + if (server_busy || srv_is_any_background_thread_active()) { goto loop; } - mutex_enter(&(log_sys->mutex)); - - if (log_sys->n_pending_checkpoint_writes + mutex_enter(&log_sys->mutex); + server_busy = log_sys->n_pending_checkpoint_writes #ifdef UNIV_LOG_ARCHIVE - || log_sys->n_pending_archive_ios + || log_sys->n_pending_archive_ios #endif /* UNIV_LOG_ARCHIVE */ - || log_sys->n_pending_writes) { - - mutex_exit(&(log_sys->mutex)); - - goto loop; - } - - mutex_exit(&(log_sys->mutex)); - - if (!buf_pool_check_no_pending_io()) { + || log_sys->n_pending_writes; + mutex_exit(&log_sys->mutex); + if (server_busy || !buf_pool_check_no_pending_io()) { goto loop; } #ifdef UNIV_LOG_ARCHIVE log_archive_all(); #endif /* UNIV_LOG_ARCHIVE */ + if (srv_fast_shutdown == 2) { + /* In this fastest shutdown we do not flush the buffer + pool: it is essentially a 'crash' of the InnoDB + server. Make sure that the log is all flushed to disk, + so that we can recover all committed transactions in a + crash recovery. We must not write the lsn stamps to + the data files, since at a startup InnoDB deduces from + the stamps if the previous shutdown was clean. */ + + log_buffer_flush_to_disk(); + + /* Check that the background threads stay suspended */ + if (srv_is_any_background_thread_active()) { + fprintf(stderr, + "InnoDB: Warning: some background thread" + " woke up during shutdown\n"); + goto loop; + } + + srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; + fil_close_all_files(); + ut_a(!srv_is_any_background_thread_active()); + return; + } log_make_checkpoint_at(IB_ULONGLONG_MAX, TRUE); - mutex_enter(&(log_sys->mutex)); + mutex_enter(&log_sys->mutex); lsn = log_sys->lsn; @@ -3183,7 +3174,7 @@ loop: #endif /* UNIV_LOG_ARCHIVE */ ) { - mutex_exit(&(log_sys->mutex)); + mutex_exit(&log_sys->mutex); goto loop; } @@ -3201,7 +3192,7 @@ loop: log_archive_close_groups(TRUE); #endif /* UNIV_LOG_ARCHIVE */ - mutex_exit(&(log_sys->mutex)); + mutex_exit(&log_sys->mutex); /* Check that the background threads stay suspended */ if (srv_is_any_background_thread_active()) { diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c index 74dbac3bc96..50607e07076 100644 --- a/storage/innobase/os/os0file.c +++ b/storage/innobase/os/os0file.c @@ -4064,13 +4064,13 @@ os_aio_func( } try_again: - if (mode == OS_AIO_NORMAL) { - if (type == OS_FILE_READ) { - array = os_aio_read_array; - } else { - array = os_aio_write_array; - } - } else if (mode == OS_AIO_IBUF) { + switch (mode) { + case OS_AIO_NORMAL: + array = (type == OS_FILE_READ) + ? os_aio_read_array + : os_aio_write_array; + break; + case OS_AIO_IBUF: ut_ad(type == OS_FILE_READ); /* Reduce probability of deadlock bugs in connection with ibuf: do not let the ibuf i/o handler sleep */ @@ -4078,19 +4078,21 @@ try_again: wake_later = FALSE; array = os_aio_ibuf_array; - } else if (mode == OS_AIO_LOG) { - + break; + case OS_AIO_LOG: array = os_aio_log_array; - } else if (mode == OS_AIO_SYNC) { + break; + case OS_AIO_SYNC: array = os_aio_sync_array; #if defined(LINUX_NATIVE_AIO) /* In Linux native AIO we don't use sync IO array. */ ut_a(!srv_use_native_aio); #endif /* LINUX_NATIVE_AIO */ - } else { - array = NULL; /* Eliminate compiler warning */ + break; + default: ut_error; + array = NULL; /* Eliminate compiler warning */ } slot = os_aio_array_reserve_slot(type, array, message1, message2, file, @@ -4253,11 +4255,17 @@ os_aio_windows_handle( INFINITE); } - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_thread_exit(NULL); + os_mutex_enter(array->mutex); + + if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS + && array->n_reserved == 0) { + *message1 = NULL; + *message2 = NULL; + os_mutex_exit(array->mutex); + return(TRUE); } - os_mutex_enter(array->mutex); + ut_a(i >= WAIT_OBJECT_0 && i <= WAIT_OBJECT_0 + n); slot = os_aio_array_get_nth_slot(array, i + segment * n); @@ -4403,14 +4411,6 @@ os_aio_linux_collect( retry: - /* Go down if we are in shutdown mode. - In case of srv_fast_shutdown == 2, there may be pending - IO requests but that should be OK as we essentially treat - that as a crash of InnoDB. */ - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_thread_exit(NULL); - } - /* Initialize the events. The timeout value is arbitrary. We probably need to experiment with it a little. */ memset(events, 0, sizeof(*events) * seg_size); @@ -4419,76 +4419,72 @@ retry: ret = io_getevents(io_ctx, 1, seg_size, events, &timeout); - /* This error handling is for any error in collecting the - IO requests. The errors, if any, for any particular IO - request are simply passed on to the calling routine. */ - - /* Not enough resources! Try again. */ - if (ret == -EAGAIN) { - goto retry; - } - - /* Interrupted! I have tested the behaviour in case of an - interrupt. If we have some completed IOs available then - the return code will be the number of IOs. We get EINTR only - if there are no completed IOs and we have been interrupted. */ - if (ret == -EINTR) { - goto retry; - } - - /* No pending request! Go back and check again. */ - if (ret == 0) { - goto retry; - } + if (ret > 0) { + for (i = 0; i < ret; i++) { + os_aio_slot_t* slot; + struct iocb* control; - /* All other errors! should cause a trap for now. */ - if (UNIV_UNLIKELY(ret < 0)) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: unexpected ret_code[%d] from" - " io_getevents()!\n", ret); - ut_error; - } + control = (struct iocb *)events[i].obj; + ut_a(control != NULL); - ut_a(ret > 0); + slot = (os_aio_slot_t *) control->data; - for (i = 0; i < ret; i++) { - os_aio_slot_t* slot; - struct iocb* control; + /* Some sanity checks. */ + ut_a(slot != NULL); + ut_a(slot->reserved); - control = (struct iocb *)events[i].obj; - ut_a(control != NULL); +#if defined(UNIV_AIO_DEBUG) + fprintf(stderr, + "io_getevents[%c]: slot[%p] ctx[%p]" + " seg[%lu]\n", + (slot->type == OS_FILE_WRITE) ? 'w' : 'r', + slot, io_ctx, segment); +#endif - slot = (os_aio_slot_t *) control->data; + /* We are not scribbling previous segment. */ + ut_a(slot->pos >= start_pos); - /* Some sanity checks. */ - ut_a(slot != NULL); - ut_a(slot->reserved); + /* We have not overstepped to next segment. */ + ut_a(slot->pos < end_pos); -#if defined(UNIV_AIO_DEBUG) - fprintf(stderr, - "io_getevents[%c]: slot[%p] ctx[%p]" - " seg[%lu]\n", - (slot->type == OS_FILE_WRITE) ? 'w' : 'r', - slot, io_ctx, segment); -#endif + /* Mark this request as completed. The error handling + will be done in the calling function. */ + os_mutex_enter(array->mutex); + slot->n_bytes = events[i].res; + slot->ret = events[i].res2; + slot->io_already_done = TRUE; + os_mutex_exit(array->mutex); + } + return; + } - /* We are not scribbling previous segment. */ - ut_a(slot->pos >= start_pos); + if (UNIV_UNLIKELY(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS)) { + return; + } - /* We have not overstepped to next segment. */ - ut_a(slot->pos < end_pos); + /* This error handling is for any error in collecting the + IO requests. The errors, if any, for any particular IO + request are simply passed on to the calling routine. */ - /* Mark this request as completed. The error handling - will be done in the calling function. */ - os_mutex_enter(array->mutex); - slot->n_bytes = events[i].res; - slot->ret = events[i].res2; - slot->io_already_done = TRUE; - os_mutex_exit(array->mutex); + switch (ret) { + case -EAGAIN: + /* Not enough resources! Try again. */ + case -EINTR: + /* Interrupted! I have tested the behaviour in case of an + interrupt. If we have some completed IOs available then + the return code will be the number of IOs. We get EINTR only + if there are no completed IOs and we have been interrupted. */ + case 0: + /* No pending request! Go back and check again. */ + goto retry; } - return; + /* All other errors should cause a trap for now. */ + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: unexpected ret_code[%d] from io_getevents()!\n", + ret); + ut_error; } /**********************************************************************//** @@ -4532,20 +4528,35 @@ os_aio_linux_handle( /* Loop until we have found a completed request. */ for (;;) { + ibool any_reserved = FALSE; os_mutex_enter(array->mutex); for (i = 0; i < n; ++i) { slot = os_aio_array_get_nth_slot( - array, i + segment * n); - if (slot->reserved && slot->io_already_done) { + array, i + segment * n); + if (!slot->reserved) { + continue; + } else if (slot->io_already_done) { /* Something for us to work on. */ goto found; + } else { + any_reserved = TRUE; } } os_mutex_exit(array->mutex); - /* We don't have any completed request. - Wait for some request. Note that we return + /* There is no completed request. + If there is no pending request at all, + and the system is being shut down, exit. */ + if (UNIV_UNLIKELY + (!any_reserved + && srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS)) { + *message1 = NULL; + *message2 = NULL; + return(TRUE); + } + + /* Wait for some request. Note that we return from wait iff we have found a request. */ srv_set_io_thread_op_info(global_seg, @@ -4641,6 +4652,7 @@ os_aio_simulated_handle( byte* combined_buf; byte* combined_buf2; ibool ret; + ibool any_reserved; ulint n; ulint i; @@ -4671,18 +4683,21 @@ restart: goto recommended_sleep; } - os_mutex_enter(array->mutex); - srv_set_io_thread_op_info(global_segment, "looking for i/o requests (b)"); /* Check if there is a slot for which the i/o has already been done */ + any_reserved = FALSE; + + os_mutex_enter(array->mutex); for (i = 0; i < n; i++) { slot = os_aio_array_get_nth_slot(array, i + segment * n); - if (slot->reserved && slot->io_already_done) { + if (!slot->reserved) { + continue; + } else if (slot->io_already_done) { if (os_aio_print_debug) { fprintf(stderr, @@ -4694,9 +4709,23 @@ restart: ret = TRUE; goto slot_io_done; + } else { + any_reserved = TRUE; } } + /* There is no completed request. + If there is no pending request at all, + and the system is being shut down, exit. */ + if (UNIV_UNLIKELY + (!any_reserved + && srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS)) { + os_mutex_exit(array->mutex); + *message1 = NULL; + *message2 = NULL; + return(TRUE); + } + n_consecutive = 0; /* If there are at least 2 seconds old requests, then pick the oldest diff --git a/storage/innobase/os/os0sync.c b/storage/innobase/os/os0sync.c index b461f9b7c78..41a19843812 100644 --- a/storage/innobase/os/os0sync.c +++ b/storage/innobase/os/os0sync.c @@ -558,10 +558,7 @@ os_event_free( } /**********************************************************//** -Waits for an event object until it is in the signaled state. If -srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the -waiting thread when the event becomes signaled (or immediately if the -event is already in the signaled state). +Waits for an event object until it is in the signaled state. Typically, if the event has been signalled after the os_event_reset() we'll return immediately because event->is_set == TRUE. @@ -586,8 +583,6 @@ os_event_wait_low( returned by previous call of os_event_reset(). */ { - ib_int64_t old_signal_count; - #ifdef __WIN__ if(!srv_use_native_conditions) { DWORD err; @@ -600,43 +595,25 @@ os_event_wait_low( err = WaitForSingleObject(event->handle, INFINITE); ut_a(err == WAIT_OBJECT_0); - - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_thread_exit(NULL); - } return; } #endif - os_fast_mutex_lock(&(event->os_mutex)); + os_fast_mutex_lock(&event->os_mutex); - if (reset_sig_count) { - old_signal_count = reset_sig_count; - } else { - old_signal_count = event->signal_count; + if (!reset_sig_count) { + reset_sig_count = event->signal_count; } - for (;;) { - if (event->is_set == TRUE - || event->signal_count != old_signal_count) { - - os_fast_mutex_unlock(&(event->os_mutex)); - - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - - os_thread_exit(NULL); - } - /* Ok, we may return */ - - return; - } - + while (!event->is_set && event->signal_count == reset_sig_count) { os_cond_wait(&(event->cond_var), &(event->os_mutex)); /* Solaris manual said that spurious wakeups may occur: we have to check if the event really has been signaled after we came here to wait */ } + + os_fast_mutex_unlock(&event->os_mutex); } /**********************************************************//** @@ -657,7 +634,6 @@ os_event_wait_time_low( { ibool timed_out = FALSE; - ib_int64_t old_signal_count; #ifdef __WIN__ DWORD time_in_ms; @@ -727,15 +703,12 @@ os_event_wait_time_low( os_fast_mutex_lock(&event->os_mutex); - if (reset_sig_count) { - old_signal_count = reset_sig_count; - } else { - old_signal_count = event->signal_count; + if (!reset_sig_count) { + reset_sig_count = event->signal_count; } do { - if (event->is_set == TRUE - || event->signal_count != old_signal_count) { + if (event->is_set || event->signal_count != reset_sig_count) { break; } @@ -753,11 +726,6 @@ os_event_wait_time_low( os_fast_mutex_unlock(&event->os_mutex); - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - - os_thread_exit(NULL); - } - return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0); } diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index f0012b949ed..ecf1432016d 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -3082,11 +3082,7 @@ suspend_thread: os_event_wait(slot->event); if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - /* This is only extra safety, the thread should exit - already when the event wait ends */ - os_thread_exit(NULL); - } /* When there is user activity, InnoDB will set the event and the diff --git a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c index 79eae610a05..864c71209c0 100644 --- a/storage/innobase/srv/srv0start.c +++ b/storage/innobase/srv/srv0start.c @@ -2122,17 +2122,9 @@ innobase_shutdown_for_mysql(void) srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS; - /* In a 'very fast' shutdown, we do not need to wait for these threads - to die; all which counts is that we flushed the log; a 'very fast' - shutdown is essentially a crash. */ - - if (srv_fast_shutdown == 2) { - return(DB_SUCCESS); - } - /* All threads end up waiting for certain events. Put those events - to the signaled state. Then the threads will exit themselves in - os_thread_event_wait(). */ + to the signaled state. Then the threads will exit themselves after + os_event_wait(). */ for (i = 0; i < 1000; i++) { /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM |