summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@mongodb.com>2017-07-28 16:21:07 +1000
committerAlex Gorrod <alexander.gorrod@mongodb.com>2017-09-21 15:32:01 +1000
commit3b6c35e22c457368763ff538706969472e1b2a14 (patch)
tree112cb1ac146f41e1a10f50cae9a961356dc6f7db
parente780d9da4b4808bb9b6aab5d6bcaaefbca41657e (diff)
downloadmongo-3b6c35e22c457368763ff538706969472e1b2a14.tar.gz
WT-3461 Use CLOCK_MONOTONIC for pthread_cond_timedwait if possible. (#3537)
* WT-3461 Use CLOCK_MONOTONIC for pthread_cond_timedwait if possible. Regardless, don't adjust the realtime clock before calculating when a timed sleep should end. Otherwise, we can sleep for longer than expected by however much the clock changed. * __wt_epoch() is now identical between POSIX and Windows, pull the OS-independent time functions out into a new file. (cherry picked from commit eb01ff0d21f2ec64f37ed54f867c13958fc1d16a)
-rw-r--r--build_posix/configure.ac.in38
-rw-r--r--build_win/wiredtiger_config.h3
-rw-r--r--dist/filelist1
-rw-r--r--src/include/extern.h2
-rw-r--r--src/include/extern_posix.h2
-rw-r--r--src/include/extern_win.h2
-rw-r--r--src/include/misc.i39
-rw-r--r--src/os_posix/os_mtx_cond.c33
-rw-r--r--src/os_posix/os_time.c27
-rw-r--r--src/os_win/os_time.c15
-rw-r--r--src/support/time.c89
11 files changed, 179 insertions, 72 deletions
diff --git a/build_posix/configure.ac.in b/build_posix/configure.ac.in
index 0fef587b4b8..415545a0d56 100644
--- a/build_posix/configure.ac.in
+++ b/build_posix/configure.ac.in
@@ -160,6 +160,44 @@ AS_CASE([$host_os], [darwin*], [], [AC_CHECK_FUNCS([fdatasync])])
# the generic declaration in AC_CHECK_FUNCS is incompatible.
AX_FUNC_POSIX_MEMALIGN
+# Check for POSIX condition variables with monotonic clock support
+AC_CACHE_CHECK([for condition waits with monotonic clock support],
+ [wt_cv_pthread_cond_monotonic],
+ [AC_RUN_IFELSE([AC_LANG_SOURCE([[
+#include <errno.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <time.h>
+
+int main()
+{
+ int ret;
+ pthread_condattr_t condattr;
+ pthread_cond_t cond;
+ pthread_mutex_t mtx;
+ struct timespec ts;
+
+ if ((ret = pthread_condattr_init(&condattr)) != 0) exit(1);
+ if ((ret = pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC)) != 0) exit(1);
+ if ((ret = pthread_cond_init(&cond, &condattr)) != 0) exit(1);
+ if ((ret = pthread_mutex_init(&mtx, NULL)) != 0) exit(1);
+ if ((ret = clock_gettime(CLOCK_MONOTONIC, &ts)) != 0) exit(1);
+ ts.tv_sec += 1;
+ if ((ret = pthread_mutex_lock(&mtx)) != 0) exit(1);
+ if ((ret = pthread_cond_timedwait(&cond, &mtx, &ts)) != 0 && ret != EINTR && ret != ETIMEDOUT) exit(1);
+
+ exit(0);
+}
+ ]])],
+ [wt_pthread_cond_monotonic=yes],
+ [wt_pthread_cond_monotonic=no],
+ [wt_pthread_cond_monotonic=no])])
+AC_MSG_RESULT($wt_pthread_cond_monotonic)
+if test "$wt_pthread_cond_monotonic" = "yes" ; then
+ AC_DEFINE([HAVE_PTHREAD_COND_MONOTONIC], [1],
+ [Define to 1 if pthread condition variables support monotonic clocks.])
+fi
+
AC_SYS_LARGEFILE
AC_C_BIGENDIAN
diff --git a/build_win/wiredtiger_config.h b/build_win/wiredtiger_config.h
index 78d2784cb70..8babdbfdc1b 100644
--- a/build_win/wiredtiger_config.h
+++ b/build_win/wiredtiger_config.h
@@ -79,6 +79,9 @@
/* Define to 1 if you have the <memory.h> header file. */
/* #undef HAVE_MEMORY_H */
+/* Define to 1 if pthread condition variables support monotonic clocks. */
+/* #undef HAVE_PTHREAD_COND_MONOTONIC */
+
/* Define to 1 if you have the `posix_fadvise' function. */
/* #undef HAVE_POSIX_FADVISE */
diff --git a/dist/filelist b/dist/filelist
index 5a3348b940a..f53509e96ec 100644
--- a/dist/filelist
+++ b/dist/filelist
@@ -191,6 +191,7 @@ src/support/rand.c
src/support/scratch.c
src/support/stat.c
src/support/thread_group.c
+src/support/time.c
src/txn/txn.c
src/txn/txn_ckpt.c
src/txn/txn_ext.c
diff --git a/src/include/extern.h b/src/include/extern.h
index 12233c0247a..2b5cec4dc41 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -732,6 +732,8 @@ extern int __wt_thread_group_create( WT_SESSION_IMPL *session, WT_THREAD_GROUP *
extern int __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_thread_group_start_one( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool wait) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_thread_group_stop_one( WT_SESSION_IMPL *session, WT_THREAD_GROUP *group, bool wait) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
+extern void __wt_seconds(WT_SESSION_IMPL *session, time_t *timep);
extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session);
extern void __wt_txn_get_snapshot(WT_SESSION_IMPL *session);
extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/include/extern_posix.h b/src/include/extern_posix.h
index c0ed056c7b6..9e32e86e64c 100644
--- a/src/include/extern_posix.h
+++ b/src/include/extern_posix.h
@@ -28,5 +28,5 @@ extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, co
extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
+extern void __wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp);
extern void __wt_yield(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
diff --git a/src/include/extern_win.h b/src/include/extern_win.h
index d548ee0b2ec..85db8175615 100644
--- a/src/include/extern_win.h
+++ b/src/include/extern_win.h
@@ -26,7 +26,7 @@ extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, co
extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp);
+extern void __wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp);
extern int __wt_to_utf16_string( WT_SESSION_IMPL *session, const char*utf8, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_to_utf8_string( WT_SESSION_IMPL *session, const wchar_t*wide, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern DWORD __wt_getlasterror(void);
diff --git a/src/include/misc.i b/src/include/misc.i
index fad10f01103..eb99de3dcab 100644
--- a/src/include/misc.i
+++ b/src/include/misc.i
@@ -41,45 +41,6 @@ __wt_strdup(WT_SESSION_IMPL *session, const char *str, void *retp)
}
/*
- * __wt_seconds --
- * Return the seconds since the Epoch.
- */
-static inline void
-__wt_seconds(WT_SESSION_IMPL *session, time_t *timep)
-{
- struct timespec t;
-
- __wt_epoch(session, &t);
-
- *timep = t.tv_sec;
-}
-
-/*
- * __wt_time_check_monotonic --
- * Check and prevent time running backward. If we detect that it has, we
- * set the time structure to the previous values, making time stand still
- * until we see a time in the future of the highest value seen so far.
- */
-static inline void
-__wt_time_check_monotonic(WT_SESSION_IMPL *session, struct timespec *tsp)
-{
- /*
- * Detect time going backward. If so, use the last
- * saved timestamp.
- */
- if (session == NULL)
- return;
-
- if (tsp->tv_sec < session->last_epoch.tv_sec ||
- (tsp->tv_sec == session->last_epoch.tv_sec &&
- tsp->tv_nsec < session->last_epoch.tv_nsec)) {
- WT_STAT_CONN_INCR(session, time_travel);
- *tsp = session->last_epoch;
- } else
- session->last_epoch = *tsp;
-}
-
-/*
* __wt_verbose --
* Verbose message.
*
diff --git a/src/os_posix/os_mtx_cond.c b/src/os_posix/os_mtx_cond.c
index fe010b62305..e4a6683dee9 100644
--- a/src/os_posix/os_mtx_cond.c
+++ b/src/os_posix/os_mtx_cond.c
@@ -19,11 +19,19 @@ __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp)
WT_DECL_RET;
WT_RET(__wt_calloc_one(session, &cond));
-
WT_ERR(pthread_mutex_init(&cond->mtx, NULL));
- /* Initialize the condition variable to permit self-blocking. */
+#ifdef HAVE_PTHREAD_COND_MONOTONIC
+ {
+ pthread_condattr_t condattr;
+
+ WT_ERR(pthread_condattr_init(&condattr));
+ WT_ERR(pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC));
+ WT_ERR(pthread_cond_init(&cond->cond, &condattr));
+ }
+#else
WT_ERR(pthread_cond_init(&cond->cond, NULL));
+#endif
cond->name = name;
cond->waiters = 0;
@@ -79,7 +87,26 @@ __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond,
goto skipping;
if (usecs > 0) {
- __wt_epoch(session, &ts);
+ /*
+ * Get the current time as the basis for calculating when the
+ * wait should end. Prefer a monotonic clock source to avoid
+ * unexpectedly long sleeps when the system clock is adjusted.
+ *
+ * Failing that, query the time directly and don't attempt to
+ * correct for the clock moving backwards, which would result
+ * in a sleep that is too long by however much the clock is
+ * updated. This isn't as good as a monotonic clock source but
+ * makes the window of vulnerability smaller (i.e., the
+ * calculated time is only incorrect if the system clock
+ * changes in between us querying it and waiting).
+ */
+#ifdef HAVE_PTHREAD_COND_MONOTONIC
+ WT_SYSCALL_RETRY(clock_gettime(CLOCK_MONOTONIC, &ts), ret);
+ if (ret != 0)
+ WT_PANIC_MSG(session, ret, "clock_gettime");
+#else
+ __wt_epoch_raw(session, &ts);
+#endif
ts.tv_sec += (time_t)
(((uint64_t)ts.tv_nsec + WT_THOUSAND * usecs) / WT_BILLION);
ts.tv_nsec = (long)
diff --git a/src/os_posix/os_time.c b/src/os_posix/os_time.c
index fe337fea7cf..25a08d62355 100644
--- a/src/os_posix/os_time.c
+++ b/src/os_posix/os_time.c
@@ -9,14 +9,12 @@
#include "wt_internal.h"
/*
- * __wt_epoch --
- * Return the time since the Epoch.
+ * __wt_epoch_raw --
+ * Return the time since the Epoch as reported by a system call.
*/
void
-__wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
- WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+__wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp)
{
- struct timespec tmp;
WT_DECL_RET;
/*
@@ -28,19 +26,10 @@ __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
tsp->tv_sec = 0;
tsp->tv_nsec = 0;
- /*
- * Read into a local variable so that we're comparing the correct
- * value when we check for monotonic increasing time. There are
- * many places we read into an unlocked global variable.
- */
#if defined(HAVE_CLOCK_GETTIME)
- WT_SYSCALL_RETRY(clock_gettime(CLOCK_REALTIME, &tmp), ret);
- if (ret == 0) {
- __wt_time_check_monotonic(session, &tmp);
- tsp->tv_sec = tmp.tv_sec;
- tsp->tv_nsec = tmp.tv_nsec;
+ WT_SYSCALL_RETRY(clock_gettime(CLOCK_REALTIME, tsp), ret);
+ if (ret == 0)
return;
- }
WT_PANIC_MSG(session, ret, "clock_gettime");
#elif defined(HAVE_GETTIMEOFDAY)
{
@@ -48,10 +37,8 @@ __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
WT_SYSCALL_RETRY(gettimeofday(&v, NULL), ret);
if (ret == 0) {
- tmp.tv_sec = v.tv_sec;
- tmp.tv_nsec = v.tv_usec * WT_THOUSAND;
- __wt_time_check_monotonic(session, &tmp);
- *tsp = tmp;
+ tsp->tv_sec = v.tv_sec;
+ tsp->tv_nsec = v.tv_usec * WT_THOUSAND;
return;
}
WT_PANIC_MSG(session, ret, "gettimeofday");
diff --git a/src/os_win/os_time.c b/src/os_win/os_time.c
index ba71341ab22..84c06bed6e5 100644
--- a/src/os_win/os_time.c
+++ b/src/os_win/os_time.c
@@ -9,24 +9,23 @@
#include "wt_internal.h"
/*
- * __wt_epoch --
- * Return the time since the Epoch.
+ * __wt_epoch_raw --
+ * Return the time since the Epoch as reported by the system.
*/
void
-__wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
+__wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp)
{
- struct timespec tmp;
FILETIME time;
uint64_t ns100;
+ WT_UNUSED(session);
+
GetSystemTimeAsFileTime(&time);
ns100 = (((int64_t)time.dwHighDateTime << 32) + time.dwLowDateTime)
- 116444736000000000LL;
- tmp.tv_sec = ns100 / 10000000;
- tmp.tv_nsec = (long)((ns100 % 10000000) * 100);
- __wt_time_check_monotonic(session, &tmp);
- *tsp = tmp;
+ tsp->tv_sec = ns100 / 10000000;
+ tsp->tv_nsec = (long)((ns100 % 10000000) * 100);
}
/*
diff --git a/src/support/time.c b/src/support/time.c
new file mode 100644
index 00000000000..0e4562c0234
--- /dev/null
+++ b/src/support/time.c
@@ -0,0 +1,89 @@
+/*-
+ * Public Domain 2014-2017 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __time_check_monotonic --
+ * Check and prevent time running backward. If we detect that it has, we
+ * set the time structure to the previous values, making time stand still
+ * until we see a time in the future of the highest value seen so far.
+ */
+static void
+__time_check_monotonic(WT_SESSION_IMPL *session, struct timespec *tsp)
+{
+ /*
+ * Detect time going backward. If so, use the last
+ * saved timestamp.
+ */
+ if (session == NULL)
+ return;
+
+ if (tsp->tv_sec < session->last_epoch.tv_sec ||
+ (tsp->tv_sec == session->last_epoch.tv_sec &&
+ tsp->tv_nsec < session->last_epoch.tv_nsec)) {
+ WT_STAT_CONN_INCR(session, time_travel);
+ *tsp = session->last_epoch;
+ } else
+ session->last_epoch = *tsp;
+}
+
+/*
+ * __wt_epoch --
+ * Return the time since the Epoch, adjusted so it never appears to go
+ * backwards.
+ */
+void
+__wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+{
+ struct timespec tmp;
+
+ /*
+ * Read into a local variable so that we're comparing the correct
+ * value when we check for monotonic increasing time. There are
+ * many places we read into an unlocked global variable.
+ */
+ __wt_epoch_raw(session, &tmp);
+ __time_check_monotonic(session, &tmp);
+ *tsp = tmp;
+}
+
+/*
+ * __wt_seconds --
+ * Return the seconds since the Epoch.
+ */
+void
+__wt_seconds(WT_SESSION_IMPL *session, time_t *timep)
+{
+ struct timespec t;
+
+ __wt_epoch(session, &t);
+
+ *timep = t.tv_sec;
+}