diff options
author | Sunny Bains <Sunny.Bains@Oracle.Com> | 2010-10-14 14:12:02 +1100 |
---|---|---|
committer | Sunny Bains <Sunny.Bains@Oracle.Com> | 2010-10-14 14:12:02 +1100 |
commit | e16a61c6a652941c62e9133cfdb885ec5858cf02 (patch) | |
tree | 3013c325a266ccae2c5246852b5fff5bcf51e200 /storage/innobase | |
parent | 695a9502fbc685795ca0b67d49cb116b0da87802 (diff) | |
download | mariadb-git-e16a61c6a652941c62e9133cfdb885ec5858cf02.tar.gz |
Bug# 55681 - MTR slowdown after default storage engine was changed to InnoDB
Add new function os_cond_wait_timed(). Change the os_thread_sleep() calls
to timed conditional waits. Signal the background threads during the shutdown
phase so that we avoid waiting for the sleep to timeout thus saving some time.
rb://439 -- Approved by Jimmy Yang
Diffstat (limited to 'storage/innobase')
-rw-r--r-- | storage/innobase/include/os0sync.h | 22 | ||||
-rw-r--r-- | storage/innobase/include/srv0srv.h | 9 | ||||
-rw-r--r-- | storage/innobase/log/log0log.c | 9 | ||||
-rw-r--r-- | storage/innobase/os/os0sync.c | 151 | ||||
-rw-r--r-- | storage/innobase/srv/srv0srv.c | 83 |
5 files changed, 244 insertions, 30 deletions
diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h index ec5ccee3e27..b294d7421c8 100644 --- a/storage/innobase/include/os0sync.h +++ b/storage/innobase/include/os0sync.h @@ -76,6 +76,12 @@ struct os_event_struct { /*!< list of all created events */ }; +/** Denotes an infinite delay for os_event_wait_time() */ +#define OS_SYNC_INFINITE_TIME ULINT_UNDEFINED + +/** Return value of os_event_wait_time() when the time is exceeded */ +#define OS_SYNC_TIME_EXCEEDED 1 + /** Operating system mutex */ typedef struct os_mutex_struct os_mutex_str_t; /** Operating system mutex handle */ @@ -173,7 +179,23 @@ os_event_wait_low( os_event_reset(). */ #define os_event_wait(event) os_event_wait_low(event, 0) +#define os_event_wait_time(e, t) os_event_wait_time_low(event, t, 0) +/**********************************************************//** +Waits for an event object until it is in the signaled state or +a timeout is exceeded. In Unix the timeout is always infinite. +@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ +UNIV_INTERN +ulint +os_event_wait_time_low( +/*===================*/ + os_event_t event, /*!< in: event to wait */ + ulint time_in_usec, /*!< in: timeout in + microseconds, or + OS_SYNC_INFINITE_TIME */ + ib_int64_t reset_sig_count); /*!< in: zero or the value + returned by previous call of + os_event_reset(). */ /*********************************************************//** Creates an operating system mutex semaphore. Because these are slow, the mutex semaphore of InnoDB itself (mutex_t) should be used where possible. diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h index 5d2fb808dc9..98b07f5e893 100644 --- a/storage/innobase/include/srv0srv.h +++ b/storage/innobase/include/srv0srv.h @@ -57,6 +57,15 @@ extern const char srv_mysql50_table_name_prefix[9]; thread starts running */ extern os_event_t srv_lock_timeout_thread_event; +/* The monitor thread waits on this event. */ +extern os_event_t srv_monitor_event; + +/* The lock timeout thread waits on this event. */ +extern os_event_t srv_timeout_event; + +/* The error monitor thread waits on this event. */ +extern os_event_t srv_error_event; + /* If the last data file is auto-extended, we add this many pages to it at a time */ #define SRV_AUTO_EXTEND_INCREMENT \ diff --git a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c index 401cede1d8f..3fef4ee4fc5 100644 --- a/storage/innobase/log/log0log.c +++ b/storage/innobase/log/log0log.c @@ -3098,10 +3098,15 @@ loop: if (srv_fast_shutdown < 2 && (srv_error_monitor_active - || srv_lock_timeout_active || srv_monitor_active)) { + || srv_lock_timeout_active + || srv_monitor_active)) { mutex_exit(&kernel_mutex); + os_event_set(srv_error_event); + os_event_set(srv_monitor_event); + os_event_set(srv_timeout_event); + goto loop; } @@ -3128,6 +3133,8 @@ loop: log_buffer_flush_to_disk(); + mutex_exit(&kernel_mutex); + return; /* We SKIP ALL THE REST !! */ } diff --git a/storage/innobase/os/os0sync.c b/storage/innobase/os/os0sync.c index 3c70e93aae0..975c66ad1e1 100644 --- a/storage/innobase/os/os0sync.c +++ b/storage/innobase/os/os0sync.c @@ -72,6 +72,9 @@ UNIV_INTERN ulint os_event_count = 0; UNIV_INTERN ulint os_mutex_count = 0; UNIV_INTERN ulint os_fast_mutex_count = 0; +/* The number of microsecnds in a second. */ +static const ulint MICROSECS_IN_A_SECOND = 1000000; + /* Because a mutex is embedded inside an event and there is an event embedded inside a mutex, on free, this generates a recursive call. This version of the free event function doesn't acquire the global lock */ @@ -122,6 +125,47 @@ os_cond_init( } /*********************************************************//** +Do a timed wait on condition variable. +@return TRUE if timed out, FALSE otherwise */ +UNIV_INLINE +ibool +os_cond_wait_timed( +/*===============*/ + os_cond_t* cond, /*!< in: condition variable. */ + os_fast_mutex_t* mutex, /*!< in: fast mutex */ +#ifndef __WIN__ + const struct timespec* abstime /*!< in: timeout */ +#else + ulint time_in_ms /*!< in: timeout in + milliseconds */ +#endif /* !__WIN__ */ +) +{ +#ifdef __WIN__ + BOOL ret; + + ut_a(sleep_condition_variable != NULL); + + ret = sleep_condition_variable(cond, mutex, time_in_ms); + + if (!ret && GetLastError() == WAIT_TIMEOUT) { + return(TRUE); + } + + ut_a(ret); + + return(FALSE); +#else + int ret; + + ret = pthread_cond_timedwait(cond, mutex, abstime); + + ut_a(ret == 0 || ret == ETIMEDOUT); + + return(ret == ETIMEDOUT); +#endif +} +/*********************************************************//** Wait on condition variable */ UNIV_INLINE void @@ -572,6 +616,113 @@ os_event_wait_low( } } +/**********************************************************//** +Waits for an event object until it is in the signaled state or +a timeout is exceeded. +@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */ +UNIV_INTERN +ulint +os_event_wait_time_low( +/*===================*/ + os_event_t event, /*!< in: event to wait */ + ulint time_in_usec, /*!< in: timeout in + microseconds, or + OS_SYNC_INFINITE_TIME */ + ib_int64_t reset_sig_count) /*!< in: zero or the value + returned by previous call of + os_event_reset(). */ + +{ + ibool timed_out; + ib_int64_t old_signal_count; + +#ifdef __WIN__ + DWORD time_in_ms = time_in_usec / 1000; + + if (!srv_use_native_conditions) { + DWORD err; + + ut_a(event); + + if (time_in_ms != OS_SYNC_INFINITE_TIME) { + err = WaitForSingleObject(event->handle, time_in_ms); + } else { + err = WaitForSingleObject(event->handle, INFINITE); + } + + if (err == WAIT_OBJECT_0) { + return(0); + } else if (err == WAIT_TIMEOUT) { + return(OS_SYNC_TIME_EXCEEDED); + } + + ut_error; + /* Dummy value to eliminate compiler warning. */ + return(42); + } else { + ut_a(sleep_condition_variable != NULL); + } +#else + struct timeval tv; + ulint sec; + ulint usec; + int ret; + struct timespec abstime; + + ret = ut_usectime(&sec, &usec); + ut_a(ret == 0); + + tv.tv_sec = sec; + tv.tv_usec = usec; + + tv.tv_usec += time_in_usec; + + if ((ulint) tv.tv_usec > MICROSECS_IN_A_SECOND) { + tv.tv_sec += time_in_usec / MICROSECS_IN_A_SECOND; + tv.tv_usec %= MICROSECS_IN_A_SECOND; + } + + /* Convert to nano seconds. We ignore overflow. */ + abstime.tv_sec = tv.tv_sec; + abstime.tv_nsec = tv.tv_usec * 1000; +#endif /* __WIN__ */ + + os_fast_mutex_lock(&event->os_mutex); + + if (reset_sig_count) { + old_signal_count = reset_sig_count; + } else { + old_signal_count = event->signal_count; + } + + do { + if (event->is_set == TRUE + || event->signal_count != old_signal_count) { + + break; + } + + timed_out = os_cond_wait_timed( + &event->cond_var, &event->os_mutex, +#ifndef __WIN__ + &abstime +#else + time_in_ms +#endif /* !__WIN__ */ + ); + + } while (!timed_out); + + os_fast_mutex_unlock(&event->os_mutex); + + if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { + + os_thread_exit(NULL); + } + + return(timed_out ? OS_SYNC_TIME_EXCEEDED : 0); +} + /*********************************************************//** Creates an operating system mutex semaphore. Because these are slow, the mutex semaphore of InnoDB itself (mutex_t) should be used where possible. diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c index 83355bd1322..2ac5c8ce4fc 100644 --- a/storage/innobase/srv/srv0srv.c +++ b/storage/innobase/srv/srv0srv.c @@ -695,6 +695,12 @@ struct srv_slot_struct{ /* Table for MySQL threads where they will be suspended to wait for locks */ UNIV_INTERN srv_slot_t* srv_mysql_table = NULL; +UNIV_INTERN os_event_t srv_timeout_event; + +UNIV_INTERN os_event_t srv_monitor_event; + +UNIV_INTERN os_event_t srv_error_event; + UNIV_INTERN os_event_t srv_lock_timeout_thread_event; UNIV_INTERN srv_sys_t* srv_sys = NULL; @@ -1012,6 +1018,12 @@ srv_init(void) ut_a(slot->event); } + srv_error_event = os_event_create(NULL); + + srv_timeout_event = os_event_create(NULL); + + srv_monitor_event = os_event_create(NULL); + srv_lock_timeout_thread_event = os_event_create(NULL); for (i = 0; i < SRV_MASTER + 1; i++) { @@ -2049,6 +2061,7 @@ srv_monitor_thread( /*!< in: a dummy parameter required by os_thread_create */ { + ib_int64_t sig_count; double time_elapsed; time_t current_time; time_t last_table_monitor_time; @@ -2067,26 +2080,28 @@ srv_monitor_thread( #endif UT_NOT_USED(arg); - srv_last_monitor_time = time(NULL); - last_table_monitor_time = time(NULL); - last_tablespace_monitor_time = time(NULL); - last_monitor_time = time(NULL); + srv_last_monitor_time = ut_time(); + last_table_monitor_time = ut_time(); + last_tablespace_monitor_time = ut_time(); + last_monitor_time = ut_time(); mutex_skipped = 0; last_srv_print_monitor = srv_print_innodb_monitor; loop: srv_monitor_active = TRUE; /* Wake up every 5 seconds to see if we need to print - monitor information. */ + monitor information or if signalled at shutdown. */ - os_thread_sleep(5000000); + sig_count = os_event_reset(srv_monitor_event); - current_time = time(NULL); + os_event_wait_time_low(srv_monitor_event, 5000000, sig_count); + + current_time = ut_time(); time_elapsed = difftime(current_time, last_monitor_time); if (time_elapsed > 15) { - last_monitor_time = time(NULL); + last_monitor_time = ut_time(); if (srv_print_innodb_monitor) { /* Reset mutex_skipped counter everytime @@ -2130,7 +2145,7 @@ loop: if (srv_print_innodb_tablespace_monitor && difftime(current_time, last_tablespace_monitor_time) > 60) { - last_tablespace_monitor_time = time(NULL); + last_tablespace_monitor_time = ut_time(); fputs("========================" "========================\n", @@ -2156,7 +2171,7 @@ loop: if (srv_print_innodb_table_monitor && difftime(current_time, last_table_monitor_time) > 60) { - last_table_monitor_time = time(NULL); + last_table_monitor_time = ut_time(); fputs("===========================================\n", stderr); @@ -2216,16 +2231,20 @@ srv_lock_timeout_thread( ibool some_waits; double wait_time; ulint i; + ib_int64_t sig_count; #ifdef UNIV_PFS_THREAD pfs_register_thread(srv_lock_timeout_thread_key); #endif loop: + /* When someone is waiting for a lock, we wake up every second and check if a timeout has passed for a lock wait */ - os_thread_sleep(1000000); + sig_count = os_event_reset(srv_timeout_event); + + os_event_wait_time_low(srv_timeout_event, 1000000, sig_count); srv_lock_timeout_active = TRUE; @@ -2320,6 +2339,7 @@ srv_error_monitor_thread( ulint fatal_cnt = 0; ib_uint64_t old_lsn; ib_uint64_t new_lsn; + ib_int64_t sig_count; old_lsn = srv_start_lsn; @@ -2395,7 +2415,9 @@ loop: fflush(stderr); - os_thread_sleep(1000000); + sig_count = os_event_reset(srv_error_event); + + os_event_wait_time_low(srv_error_event, 1000000, sig_count); if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) { @@ -2646,6 +2668,21 @@ loop: for (i = 0; i < 10; i++) { ulint cur_time = ut_time_ms(); + /* ALTER TABLE in MySQL requires on Unix that the table handler + can drop tables lazily after there no longer are SELECT + queries to them. */ + + srv_main_thread_op_info = "doing background drop tables"; + + row_drop_tables_for_mysql_in_background(); + + srv_main_thread_op_info = ""; + + if (srv_fast_shutdown && srv_shutdown_state > 0) { + + goto background_loop; + } + buf_get_total_stat(&buf_stat); n_ios_old = log_sys->n_log_ios + buf_stat.n_pages_read @@ -2654,7 +2691,8 @@ loop: srv_main_thread_op_info = "sleeping"; srv_main_1_second_loops++; - if (next_itr_time > cur_time) { + if (next_itr_time > cur_time + && srv_shutdown_state == SRV_SHUTDOWN_NONE) { /* Get sleep interval in micro seconds. We use ut_min() to avoid long sleep in case of @@ -2668,21 +2706,6 @@ loop: /* Each iteration should happen at 1 second interval. */ next_itr_time = ut_time_ms() + 1000; - /* ALTER TABLE in MySQL requires on Unix that the table handler - can drop tables lazily after there no longer are SELECT - queries to them. */ - - srv_main_thread_op_info = "doing background drop tables"; - - row_drop_tables_for_mysql_in_background(); - - srv_main_thread_op_info = ""; - - if (srv_fast_shutdown && srv_shutdown_state > 0) { - - goto background_loop; - } - /* Flush logs if needed */ srv_sync_log_buffer_in_background(); @@ -2860,7 +2883,9 @@ background_loop: MySQL tries to drop a table while there are still open handles to it and we had to put it to the background drop queue.) */ - os_thread_sleep(100000); + if (srv_shutdown_state == SRV_SHUTDOWN_NONE) { + os_thread_sleep(100000); + } } if (srv_n_purge_threads == 0) { |