diff options
author | Michael Cahill <michael.cahill@mongodb.com> | 2017-07-28 16:21:07 +1000 |
---|---|---|
committer | Alex Gorrod <alexander.gorrod@mongodb.com> | 2017-09-21 15:32:01 +1000 |
commit | 3b6c35e22c457368763ff538706969472e1b2a14 (patch) | |
tree | 112cb1ac146f41e1a10f50cae9a961356dc6f7db | |
parent | e780d9da4b4808bb9b6aab5d6bcaaefbca41657e (diff) | |
download | mongo-3b6c35e22c457368763ff538706969472e1b2a14.tar.gz |
WT-3461 Use CLOCK_MONOTONIC for pthread_cond_timedwait if possible. (#3537)
* WT-3461 Use CLOCK_MONOTONIC for pthread_cond_timedwait if possible.
Regardless, don't adjust the realtime clock before calculating when a timed sleep should end. Otherwise, we can sleep for longer than expected by however much the clock changed.
* __wt_epoch() is now identical between POSIX and Windows, pull the OS-independent time functions out into a new file.
(cherry picked from commit eb01ff0d21f2ec64f37ed54f867c13958fc1d16a)
-rw-r--r-- | build_posix/configure.ac.in | 38 | ||||
-rw-r--r-- | build_win/wiredtiger_config.h | 3 | ||||
-rw-r--r-- | dist/filelist | 1 | ||||
-rw-r--r-- | src/include/extern.h | 2 | ||||
-rw-r--r-- | src/include/extern_posix.h | 2 | ||||
-rw-r--r-- | src/include/extern_win.h | 2 | ||||
-rw-r--r-- | src/include/misc.i | 39 | ||||
-rw-r--r-- | src/os_posix/os_mtx_cond.c | 33 | ||||
-rw-r--r-- | src/os_posix/os_time.c | 27 | ||||
-rw-r--r-- | src/os_win/os_time.c | 15 | ||||
-rw-r--r-- | src/support/time.c | 89 |
11 files changed, 179 insertions, 72 deletions
diff --git a/build_posix/configure.ac.in b/build_posix/configure.ac.in index 0fef587b4b8..415545a0d56 100644 --- a/build_posix/configure.ac.in +++ b/build_posix/configure.ac.in @@ -160,6 +160,44 @@ AS_CASE([$host_os], [darwin*], [], [AC_CHECK_FUNCS([fdatasync])]) # the generic declaration in AC_CHECK_FUNCS is incompatible. AX_FUNC_POSIX_MEMALIGN +# Check for POSIX condition variables with monotonic clock support +AC_CACHE_CHECK([for condition waits with monotonic clock support], + [wt_cv_pthread_cond_monotonic], + [AC_RUN_IFELSE([AC_LANG_SOURCE([[ +#include <errno.h> +#include <pthread.h> +#include <stdlib.h> +#include <time.h> + +int main() +{ + int ret; + pthread_condattr_t condattr; + pthread_cond_t cond; + pthread_mutex_t mtx; + struct timespec ts; + + if ((ret = pthread_condattr_init(&condattr)) != 0) exit(1); + if ((ret = pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC)) != 0) exit(1); + if ((ret = pthread_cond_init(&cond, &condattr)) != 0) exit(1); + if ((ret = pthread_mutex_init(&mtx, NULL)) != 0) exit(1); + if ((ret = clock_gettime(CLOCK_MONOTONIC, &ts)) != 0) exit(1); + ts.tv_sec += 1; + if ((ret = pthread_mutex_lock(&mtx)) != 0) exit(1); + if ((ret = pthread_cond_timedwait(&cond, &mtx, &ts)) != 0 && ret != EINTR && ret != ETIMEDOUT) exit(1); + + exit(0); +} + ]])], + [wt_pthread_cond_monotonic=yes], + [wt_pthread_cond_monotonic=no], + [wt_pthread_cond_monotonic=no])]) +AC_MSG_RESULT($wt_pthread_cond_monotonic) +if test "$wt_pthread_cond_monotonic" = "yes" ; then + AC_DEFINE([HAVE_PTHREAD_COND_MONOTONIC], [1], + [Define to 1 if pthread condition variables support monotonic clocks.]) +fi + AC_SYS_LARGEFILE AC_C_BIGENDIAN diff --git a/build_win/wiredtiger_config.h b/build_win/wiredtiger_config.h index 78d2784cb70..8babdbfdc1b 100644 --- a/build_win/wiredtiger_config.h +++ b/build_win/wiredtiger_config.h @@ -79,6 +79,9 @@ /* Define to 1 if you have the <memory.h> header file. */ /* #undef HAVE_MEMORY_H */ +/* Define to 1 if pthread condition variables support monotonic clocks. */ +/* #undef HAVE_PTHREAD_COND_MONOTONIC */ + /* Define to 1 if you have the `posix_fadvise' function. */ /* #undef HAVE_POSIX_FADVISE */ diff --git a/dist/filelist b/dist/filelist index 5a3348b940a..f53509e96ec 100644 --- a/dist/filelist +++ b/dist/filelist @@ -191,6 +191,7 @@ src/support/rand.c src/support/scratch.c src/support/stat.c src/support/thread_group.c +src/support/time.c src/txn/txn.c src/txn/txn_ckpt.c src/txn/txn_ext.c diff --git a/src/include/extern.h b/src/include/extern.h index 12233c0247a..2b5cec4dc41 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -732,6 +732,8 @@ extern int __wt_thread_group_create( WT_SESSION_IMPL *session, WT_THREAD_GROUP * extern int __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_thread_group_start_one( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool wait) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_thread_group_stop_one( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool wait) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern void __wt_seconds(WT_SESSION_IMPL *session, time_t *timep); extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session); extern void __wt_txn_get_snapshot(WT_SESSION_IMPL *session); extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/include/extern_posix.h b/src/include/extern_posix.h index c0ed056c7b6..9e32e86e64c 100644 --- a/src/include/extern_posix.h +++ b/src/include/extern_posix.h @@ -28,5 +28,5 @@ extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, co extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); +extern void __wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp); extern void __wt_yield(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); diff --git a/src/include/extern_win.h b/src/include/extern_win.h index d548ee0b2ec..85db8175615 100644 --- a/src/include/extern_win.h +++ b/src/include/extern_win.h @@ -26,7 +26,7 @@ extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, co extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp); +extern void __wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp); extern int __wt_to_utf16_string( WT_SESSION_IMPL *session, const char*utf8, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_to_utf8_string( WT_SESSION_IMPL *session, const wchar_t*wide, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern DWORD __wt_getlasterror(void); diff --git a/src/include/misc.i b/src/include/misc.i index fad10f01103..eb99de3dcab 100644 --- a/src/include/misc.i +++ b/src/include/misc.i @@ -41,45 +41,6 @@ __wt_strdup(WT_SESSION_IMPL *session, const char *str, void *retp) } /* - * __wt_seconds -- - * Return the seconds since the Epoch. - */ -static inline void -__wt_seconds(WT_SESSION_IMPL *session, time_t *timep) -{ - struct timespec t; - - __wt_epoch(session, &t); - - *timep = t.tv_sec; -} - -/* - * __wt_time_check_monotonic -- - * Check and prevent time running backward. If we detect that it has, we - * set the time structure to the previous values, making time stand still - * until we see a time in the future of the highest value seen so far. - */ -static inline void -__wt_time_check_monotonic(WT_SESSION_IMPL *session, struct timespec *tsp) -{ - /* - * Detect time going backward. If so, use the last - * saved timestamp. - */ - if (session == NULL) - return; - - if (tsp->tv_sec < session->last_epoch.tv_sec || - (tsp->tv_sec == session->last_epoch.tv_sec && - tsp->tv_nsec < session->last_epoch.tv_nsec)) { - WT_STAT_CONN_INCR(session, time_travel); - *tsp = session->last_epoch; - } else - session->last_epoch = *tsp; -} - -/* * __wt_verbose -- * Verbose message. * diff --git a/src/os_posix/os_mtx_cond.c b/src/os_posix/os_mtx_cond.c index fe010b62305..e4a6683dee9 100644 --- a/src/os_posix/os_mtx_cond.c +++ b/src/os_posix/os_mtx_cond.c @@ -19,11 +19,19 @@ __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp) WT_DECL_RET; WT_RET(__wt_calloc_one(session, &cond)); - WT_ERR(pthread_mutex_init(&cond->mtx, NULL)); - /* Initialize the condition variable to permit self-blocking. */ +#ifdef HAVE_PTHREAD_COND_MONOTONIC + { + pthread_condattr_t condattr; + + WT_ERR(pthread_condattr_init(&condattr)); + WT_ERR(pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC)); + WT_ERR(pthread_cond_init(&cond->cond, &condattr)); + } +#else WT_ERR(pthread_cond_init(&cond->cond, NULL)); +#endif cond->name = name; cond->waiters = 0; @@ -79,7 +87,26 @@ __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, goto skipping; if (usecs > 0) { - __wt_epoch(session, &ts); + /* + * Get the current time as the basis for calculating when the + * wait should end. Prefer a monotonic clock source to avoid + * unexpectedly long sleeps when the system clock is adjusted. + * + * Failing that, query the time directly and don't attempt to + * correct for the clock moving backwards, which would result + * in a sleep that is too long by however much the clock is + * updated. This isn't as good as a monotonic clock source but + * makes the window of vulnerability smaller (i.e., the + * calculated time is only incorrect if the system clock + * changes in between us querying it and waiting). + */ +#ifdef HAVE_PTHREAD_COND_MONOTONIC + WT_SYSCALL_RETRY(clock_gettime(CLOCK_MONOTONIC, &ts), ret); + if (ret != 0) + WT_PANIC_MSG(session, ret, "clock_gettime"); +#else + __wt_epoch_raw(session, &ts); +#endif ts.tv_sec += (time_t) (((uint64_t)ts.tv_nsec + WT_THOUSAND * usecs) / WT_BILLION); ts.tv_nsec = (long) diff --git a/src/os_posix/os_time.c b/src/os_posix/os_time.c index fe337fea7cf..25a08d62355 100644 --- a/src/os_posix/os_time.c +++ b/src/os_posix/os_time.c @@ -9,14 +9,12 @@ #include "wt_internal.h" /* - * __wt_epoch -- - * Return the time since the Epoch. + * __wt_epoch_raw -- + * Return the time since the Epoch as reported by a system call. */ void -__wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) - WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) +__wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp) { - struct timespec tmp; WT_DECL_RET; /* @@ -28,19 +26,10 @@ __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) tsp->tv_sec = 0; tsp->tv_nsec = 0; - /* - * Read into a local variable so that we're comparing the correct - * value when we check for monotonic increasing time. There are - * many places we read into an unlocked global variable. - */ #if defined(HAVE_CLOCK_GETTIME) - WT_SYSCALL_RETRY(clock_gettime(CLOCK_REALTIME, &tmp), ret); - if (ret == 0) { - __wt_time_check_monotonic(session, &tmp); - tsp->tv_sec = tmp.tv_sec; - tsp->tv_nsec = tmp.tv_nsec; + WT_SYSCALL_RETRY(clock_gettime(CLOCK_REALTIME, tsp), ret); + if (ret == 0) return; - } WT_PANIC_MSG(session, ret, "clock_gettime"); #elif defined(HAVE_GETTIMEOFDAY) { @@ -48,10 +37,8 @@ __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_SYSCALL_RETRY(gettimeofday(&v, NULL), ret); if (ret == 0) { - tmp.tv_sec = v.tv_sec; - tmp.tv_nsec = v.tv_usec * WT_THOUSAND; - __wt_time_check_monotonic(session, &tmp); - *tsp = tmp; + tsp->tv_sec = v.tv_sec; + tsp->tv_nsec = v.tv_usec * WT_THOUSAND; return; } WT_PANIC_MSG(session, ret, "gettimeofday"); diff --git a/src/os_win/os_time.c b/src/os_win/os_time.c index ba71341ab22..84c06bed6e5 100644 --- a/src/os_win/os_time.c +++ b/src/os_win/os_time.c @@ -9,24 +9,23 @@ #include "wt_internal.h" /* - * __wt_epoch -- - * Return the time since the Epoch. + * __wt_epoch_raw -- + * Return the time since the Epoch as reported by the system. */ void -__wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) +__wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp) { - struct timespec tmp; FILETIME time; uint64_t ns100; + WT_UNUSED(session); + GetSystemTimeAsFileTime(&time); ns100 = (((int64_t)time.dwHighDateTime << 32) + time.dwLowDateTime) - 116444736000000000LL; - tmp.tv_sec = ns100 / 10000000; - tmp.tv_nsec = (long)((ns100 % 10000000) * 100); - __wt_time_check_monotonic(session, &tmp); - *tsp = tmp; + tsp->tv_sec = ns100 / 10000000; + tsp->tv_nsec = (long)((ns100 % 10000000) * 100); } /* diff --git a/src/support/time.c b/src/support/time.c new file mode 100644 index 00000000000..0e4562c0234 --- /dev/null +++ b/src/support/time.c @@ -0,0 +1,89 @@ +/*- + * Public Domain 2014-2017 MongoDB, Inc. + * Public Domain 2008-2014 WiredTiger, Inc. + * + * This is free and unencumbered software released into the public domain. + * + * Anyone is free to copy, modify, publish, use, compile, sell, or + * distribute this software, either in source code form or as a compiled + * binary, for any purpose, commercial or non-commercial, and by any + * means. + * + * In jurisdictions that recognize copyright laws, the author or authors + * of this software dedicate any and all copyright interest in the + * software to the public domain. We make this dedication for the benefit + * of the public at large and to the detriment of our heirs and + * successors. We intend this dedication to be an overt act of + * relinquishment in perpetuity of all present and future rights to this + * software under copyright law. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "wt_internal.h" + +/* + * __time_check_monotonic -- + * Check and prevent time running backward. If we detect that it has, we + * set the time structure to the previous values, making time stand still + * until we see a time in the future of the highest value seen so far. + */ +static void +__time_check_monotonic(WT_SESSION_IMPL *session, struct timespec *tsp) +{ + /* + * Detect time going backward. If so, use the last + * saved timestamp. + */ + if (session == NULL) + return; + + if (tsp->tv_sec < session->last_epoch.tv_sec || + (tsp->tv_sec == session->last_epoch.tv_sec && + tsp->tv_nsec < session->last_epoch.tv_nsec)) { + WT_STAT_CONN_INCR(session, time_travel); + *tsp = session->last_epoch; + } else + session->last_epoch = *tsp; +} + +/* + * __wt_epoch -- + * Return the time since the Epoch, adjusted so it never appears to go + * backwards. + */ +void +__wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) + WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) +{ + struct timespec tmp; + + /* + * Read into a local variable so that we're comparing the correct + * value when we check for monotonic increasing time. There are + * many places we read into an unlocked global variable. + */ + __wt_epoch_raw(session, &tmp); + __time_check_monotonic(session, &tmp); + *tsp = tmp; +} + +/* + * __wt_seconds -- + * Return the seconds since the Epoch. + */ +void +__wt_seconds(WT_SESSION_IMPL *session, time_t *timep) +{ + struct timespec t; + + __wt_epoch(session, &t); + + *timep = t.tv_sec; +} |