summaryrefslogtreecommitdiff
path: root/src/third_party
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2019-12-03 03:01:41 +0000
committerevergreen <evergreen@mongodb.com>2019-12-03 03:01:41 +0000
commit587f15f0f823924c852b261497110e4b78dca7fe (patch)
treed2e92233b4d39b061729597b938c42b67502eaa7 /src/third_party
parent2e948c4e94b17089ab56a5437447f9988c31103d (diff)
downloadmongo-587f15f0f823924c852b261497110e4b78dca7fe.tar.gz
Import wiredtiger: d47dcd1f0ea992775be3d60456593c575451c435 from branch mongodb-4.4
ref: 58115abb6f..d47dcd1f0e for: 4.3.3 WT-4996 Migrate Jenkins “wiredtiger-test-check-long” job to Evergreen WT-5082 Application threads are tasked with eviction even when pinning the oldest transaction ID WT-5232 Create a wrapper script to support format stress tests in Evergreen WT-5265 Remove pip install gcovr from coverage-report test WT-5274 format.sh must handle core-dump signals and "gdb attach" build mode
Diffstat (limited to 'src/third_party')
-rwxr-xr-x[-rw-r--r--]src/third_party/wiredtiger/bench/wtperf/runners/wtperf_track.sh0
-rwxr-xr-x[-rw-r--r--]src/third_party/wiredtiger/bench/wtperf/runners/wtperf_xray.sh0
-rw-r--r--src/third_party/wiredtiger/build_posix/configure.ac.in4
-rw-r--r--src/third_party/wiredtiger/build_win/wiredtiger_config.h3
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c4
-rw-r--r--src/third_party/wiredtiger/src/include/api.h1
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h5
-rw-r--r--src/third_party/wiredtiger/src/include/time.i26
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h3
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c93
-rwxr-xr-xsrc/third_party/wiredtiger/test/evergreen.yml136
-rw-r--r--src/third_party/wiredtiger/test/format/format.h4
-rwxr-xr-xsrc/third_party/wiredtiger/test/format/format.sh442
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c45
-rw-r--r--src/third_party/wiredtiger/test/format/snap.c28
-rw-r--r--src/third_party/wiredtiger/test/format/t.c30
17 files changed, 713 insertions, 113 deletions
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_track.sh b/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_track.sh
index db92cb95931..db92cb95931 100644..100755
--- a/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_track.sh
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_track.sh
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_xray.sh b/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_xray.sh
index 398c6a9bcf5..398c6a9bcf5 100644..100755
--- a/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_xray.sh
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_xray.sh
diff --git a/src/third_party/wiredtiger/build_posix/configure.ac.in b/src/third_party/wiredtiger/build_posix/configure.ac.in
index c50d86678e6..30cdd4e5d06 100644
--- a/src/third_party/wiredtiger/build_posix/configure.ac.in
+++ b/src/third_party/wiredtiger/build_posix/configure.ac.in
@@ -172,8 +172,8 @@ AC_CHECK_LIB(dl, dlopen)
AC_CHECK_LIB(rt, sched_yield)
AC_CHECK_FUNCS([\
- clock_gettime fallocate ftruncate gettimeofday posix_fadvise\
- posix_fallocate posix_madvise strtouq sync_file_range timer_create])
+ clock_gettime fallocate ftruncate gettimeofday posix_fadvise posix_fallocate\
+ posix_madvise setrlimit strtouq sync_file_range timer_create])
# OS X wrongly reports that it has fdatasync
AS_CASE([$host_os], [darwin*], [], [AC_CHECK_FUNCS([fdatasync])])
diff --git a/src/third_party/wiredtiger/build_win/wiredtiger_config.h b/src/third_party/wiredtiger/build_win/wiredtiger_config.h
index 7b2d3fd63bf..c5c0dfda580 100644
--- a/src/third_party/wiredtiger/build_win/wiredtiger_config.h
+++ b/src/third_party/wiredtiger/build_win/wiredtiger_config.h
@@ -79,6 +79,9 @@
/* Define to 1 if pthread condition variables support monotonic clocks. */
/* #undef HAVE_PTHREAD_COND_MONOTONIC */
+/* Define to 1 if you have the `setrlimit' function. */
+/* #undef HAVE_SETRLIMIT */
+
/* Define to 1 if you have the `posix_fadvise' function. */
/* #undef HAVE_POSIX_FADVISE */
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 362efcebaff..10065020dd8 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "58115abb6fbb3c1cc7bfd087d41a47347bce9a69",
+ "commit": "d47dcd1f0ea992775be3d60456593c575451c435",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-4.4"
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 7f729c2e661..0faaacc710c 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -2296,9 +2296,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, bool readonly, d
* rolled back. Ignore if in recovery, those transactions can't be rolled back.
*/
if (!F_ISSET(conn, WT_CONN_RECOVERING) && __wt_cache_stuck(session)) {
- ret = __wt_txn_is_blocking_old(session);
- if (ret == 0)
- ret = __wt_txn_is_blocking_pin(session);
+ ret = __wt_txn_is_blocking(session);
if (ret == WT_ROLLBACK) {
--cache->evict_aggressive_score;
WT_STAT_CONN_INCR(session, txn_fail_cache);
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h
index 36cefa8dc68..533f276b15c 100644
--- a/src/third_party/wiredtiger/src/include/api.h
+++ b/src/third_party/wiredtiger/src/include/api.h
@@ -70,6 +70,7 @@
if ((ret) != 0 && (ret) != WT_NOTFOUND && (ret) != WT_DUPLICATE_KEY && \
(ret) != WT_PREPARE_CONFLICT && F_ISSET(&(s)->txn, WT_TXN_RUNNING)) \
F_SET(&(s)->txn, WT_TXN_ERROR); \
+ __wt_op_timer_stop(s); \
/* \
* No code after this line, otherwise error handling \
* won't be correct. \
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index d42e0d43d9d..2b00f07ae07 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -1426,9 +1426,7 @@ extern int __wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char *config
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_is_blocking_old(WT_SESSION_IMPL *session)
- WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_is_blocking_pin(WT_SESSION_IMPL *session)
+extern int __wt_txn_is_blocking(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_log_commit(WT_SESSION_IMPL *session, const char *cfg[])
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -2109,6 +2107,7 @@ static inline void __wt_cursor_dhandle_decr_use(WT_SESSION_IMPL *session);
static inline void __wt_cursor_dhandle_incr_use(WT_SESSION_IMPL *session);
static inline void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp);
static inline void __wt_op_timer_start(WT_SESSION_IMPL *session);
+static inline void __wt_op_timer_stop(WT_SESSION_IMPL *session);
static inline void __wt_page_evict_soon(WT_SESSION_IMPL *session, WT_REF *ref);
static inline void __wt_page_modify_clear(WT_SESSION_IMPL *session, WT_PAGE *page);
static inline void __wt_page_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page);
diff --git a/src/third_party/wiredtiger/src/include/time.i b/src/third_party/wiredtiger/src/include/time.i
index 0dd6781216e..208243ef612 100644
--- a/src/third_party/wiredtiger/src/include/time.i
+++ b/src/third_party/wiredtiger/src/include/time.i
@@ -160,7 +160,18 @@ __wt_clock_to_nsec(uint64_t end, uint64_t begin)
static inline void
__wt_op_timer_start(WT_SESSION_IMPL *session)
{
- session->operation_start_us = session->operation_timeout_us == 0 ? 0 : __wt_clock(session);
+ uint64_t timeout_us;
+
+ /* Timer can be configured per-transaction, and defaults to per-connection. */
+ if ((timeout_us = session->txn.operation_timeout_us) == 0)
+ timeout_us = S2C(session)->operation_timeout_us;
+ if (timeout_us == 0)
+ session->operation_start_us = session->operation_timeout_us = 0;
+ else {
+ session->operation_start_us = __wt_clock(session);
+ session->operation_timeout_us = timeout_us;
+ }
+
#ifdef HAVE_DIAGNOSTIC
/*
* This is called at the beginning of each API call. We need to clear out any old values from
@@ -172,6 +183,16 @@ __wt_op_timer_start(WT_SESSION_IMPL *session)
}
/*
+ * __wt_op_timer_stop --
+ * Stop the operations timer.
+ */
+static inline void
+__wt_op_timer_stop(WT_SESSION_IMPL *session)
+{
+ session->operation_start_us = session->operation_timeout_us = 0;
+}
+
+/*
* __wt_op_timer_fired --
* Check the operations timers.
*/
@@ -180,8 +201,7 @@ __wt_op_timer_fired(WT_SESSION_IMPL *session)
{
uint64_t diff, now;
- /* Check for both a timeout and a start time to avoid any future configuration races. */
- if (session->operation_timeout_us == 0 || session->operation_start_us == 0)
+ if (session->operation_start_us == 0 || session->operation_timeout_us == 0)
return (false);
now = __wt_clock(session);
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index bdda7a4eae9..59d201e5110 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -320,6 +320,9 @@ struct __wt_txn {
WT_ITEM *ckpt_snapshot;
bool full_ckpt;
+ /* Timeout */
+ uint64_t operation_timeout_us;
+
const char *rollback_reason; /* If rollback, the reason */
/*
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 5d4f4f8495d..8962d268459 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -469,9 +469,7 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
/* Retrieve the maximum operation time, defaulting to the database-wide configuration. */
WT_RET(__wt_config_gets(session, cfg, "operation_timeout_ms", &cval));
- session->operation_timeout_us = (uint64_t)(cval.val * WT_THOUSAND);
- if (session->operation_timeout_us == 0)
- session->operation_timeout_us = S2C(session)->operation_timeout_us;
+ txn->operation_timeout_us = (uint64_t)(cval.val * WT_THOUSAND);
/*
* The default sync setting is inherited from the connection, but can be overridden by an
@@ -621,7 +619,7 @@ __wt_txn_release(WT_SESSION_IMPL *session)
txn->prepare_timestamp = WT_TS_NONE;
/* Clear operation timer. */
- session->operation_timeout_us = 0;
+ txn->operation_timeout_us = 0;
}
/*
@@ -1589,90 +1587,43 @@ __wt_txn_global_shutdown(WT_SESSION_IMPL *session, const char *config, const cha
}
/*
- * __wt_txn_is_blocking_old --
- * Return if this transaction is the oldest transaction in the system, called by eviction to
- * determine if a worker thread should be released from eviction.
+ * __wt_txn_is_blocking --
+ * Return if this transaction is likely blocking eviction because of a pinned transaction ID,
+ * called by eviction to determine if a worker thread should be released from eviction.
*/
int
-__wt_txn_is_blocking_old(WT_SESSION_IMPL *session)
+__wt_txn_is_blocking(WT_SESSION_IMPL *session)
{
WT_CONNECTION_IMPL *conn;
WT_TXN *txn;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *state;
- uint64_t id;
- uint32_t i, session_cnt;
+ uint64_t txn_oldest;
conn = S2C(session);
txn = &session->txn;
- txn_global = &conn->txn_global;
- if (txn->id == WT_TXN_NONE || F_ISSET(txn, WT_TXN_PREPARE))
+ /* We can't roll back prepared transactions. */
+ if (F_ISSET(txn, WT_TXN_PREPARE))
return (false);
- WT_ORDERED_READ(session_cnt, conn->session_cnt);
-
/*
- * Check if the transaction is oldest one in the system. It's safe to ignore sessions allocating
- * transaction IDs, since we already have an ID, they are guaranteed to be newer.
+ * Check the oldest transaction ID of either the current transaction ID or the snapshot. Using
+ * the snapshot potentially means rolling back a read-only transaction, which MongoDB can't
+ * (yet) handle. For this reason, don't use the snapshot unless there's also a transaction ID
+ * or we're configured to time out thread operations (a way to confirm our caller is prepared
+ * for rollback).
*/
- for (i = 0, state = txn_global->states; i < session_cnt; i++, state++) {
- if (state->is_allocating)
- continue;
-
- WT_ORDERED_READ(id, state->id);
- if (id != WT_TXN_NONE && WT_TXNID_LT(id, txn->id))
- break;
- }
- return (i == session_cnt ?
- __wt_txn_rollback_required(session, "oldest transaction ID rolled back for eviction") :
+ txn_oldest = txn->id;
+ if (F_ISSET(txn, WT_TXN_HAS_SNAPSHOT) && txn->snap_min != WT_TXN_NONE &&
+ (txn_oldest != WT_TXN_NONE || __wt_op_timer_fired(session)) &&
+ (txn_oldest == WT_TXN_NONE || WT_TXNID_LT(txn->snap_min, txn_oldest)))
+ txn_oldest = txn->snap_min;
+ return (txn_oldest == conn->txn_global.oldest_id ?
+ __wt_txn_rollback_required(
+ session, "oldest pinned transaction ID rolled back for eviction") :
0);
}
/*
- * __wt_txn_is_blocking_pin --
- * Return if this transaction is likely blocking eviction because of a pinned transaction ID,
- * called by eviction to determine if a worker thread should be released from eviction.
- */
-int
-__wt_txn_is_blocking_pin(WT_SESSION_IMPL *session)
-{
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_IMPL *s;
- WT_TXN *txn;
- uint64_t snap_min;
- uint32_t i, session_cnt;
-
- conn = S2C(session);
- txn = &session->txn;
-
- /*
- * Check if we hold the oldest pinned transaction ID in the system. This potentially means
- * rolling back a read-only transaction, which MongoDB can't (yet) handle. For this reason,
- * don't check unless we're configured to time out thread operations, a way to confirm our
- * caller is prepared for rollback.
- */
- if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT) || txn->snap_min == WT_TXN_NONE)
- return (0);
- if (!__wt_op_timer_fired(session))
- return (0);
-
- WT_ORDERED_READ(session_cnt, conn->session_cnt);
-
- for (s = conn->sessions, i = 0; i < session_cnt; ++s, ++i) {
- if (F_ISSET(s, WT_SESSION_INTERNAL) || !F_ISSET(&s->txn, WT_TXN_HAS_SNAPSHOT))
- continue;
-
- WT_ORDERED_READ(snap_min, s->txn.snap_min);
- if (snap_min != WT_TXN_NONE && snap_min < txn->snap_min)
- break;
- }
- return (i == session_cnt ? __wt_txn_rollback_required(
- session, "oldest pinned transaction ID rolled back for eviction") :
- 0);
-}
-
-/*
* __wt_verbose_dump_txn_one --
* Output diagnostic information about a transaction structure.
*/
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index b4677e3293d..e28772c915b 100755
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -134,6 +134,30 @@ functions:
for i in $(seq ${times|1}); do
./t -1 -c ${config|../../../test/format/CONFIG.stress} ${extra_args|}
done
+ "many dbs test":
+ command: shell.exec
+ parms:
+ working_dir: "wiredtiger/build_posix/test/manydbs"
+ script: |
+ set -o errexit
+ set -o verbose
+ ${test_env_vars|} ./t ${many_db_args|}
+ "thread test":
+ command: shell.exec
+ parms:
+ working_dir: "wiredtiger/build_posix/test/thread"
+ script: |
+ set -o errexit
+ set -o verbose
+ ${test_env_vars|} ./t ${thread_test_args|}
+ "random abort test":
+ command: shell.exec
+ parms:
+ working_dir: "wiredtiger/build_posix/test/csuite"
+ script: |
+ set -o errexit
+ set -o verbose
+ ${test_env_vars|} ./test_random_abort ${random_abort_args|}
"upload artifact":
- command: archive.targz_pack
params:
@@ -1579,9 +1603,8 @@ tasks:
script: |
set -o errexit
set -o verbose
- # FIX ME Remove once BUILD-5025 is done
- pip install gcovr --user
- GCOV=/opt/mongodbtoolchain/v3/bin/gcov /home/ubuntu/.local/bin/gcovr -r .. -e '.*/bt_(debug|dump|misc|salvage|vrfy).*' -e '.*/(log|progress|verify_build|strerror|env_msg|err_file|cur_config|os_abort)\..*' -e '.*_stat\..*' --html -o ../coverage_report.html
+
+ GCOV=/opt/mongodbtoolchain/v3/bin/gcov gcovr -r .. -e '.*/bt_(debug|dump|misc|salvage|vrfy).*' -e '.*/(log|progress|verify_build|strerror|env_msg|err_file|cur_config|os_abort)\..*' -e '.*_stat\..*' --html -o ../coverage_report.html
- command: s3.put
params:
aws_secret: ${aws_secret}
@@ -1638,6 +1661,111 @@ tasks:
cp -rf WT_TEST WT_TEST_$file
done
+ - name: ftruncate-test
+ commands:
+ - func: "get project"
+ - func: "compile wiredtiger"
+ vars:
+ posix_configure_flags: ac_cv_func_ftruncate=no
+ - command: shell.exec
+ params:
+ working_dir: "wiredtiger/build_posix"
+ script: |
+ set -o errexit
+ set -o verbose
+ ${test_env_vars|} $(pwd)/../test/csuite/random_abort/smoke.sh 2>&1
+ ${test_env_vars|} $(pwd)/../test/csuite/timestamp_abort/smoke.sh 2>&1
+ ${test_env_vars|} $(pwd)/test/csuite/test_truncated_log 2>&1
+
+ - name: long-test
+ commands:
+ - func: "get project"
+ - func: "configure wiredtiger"
+ vars:
+ configure_env_vars: CC=/opt/mongodbtoolchain/v3/bin/gcc CXX=/opt/mongodbtoolchain/v3/bin/g++ PATH=/opt/mongodbtoolchain/v3/bin:$PATH CFLAGS="-g -Werror"
+ posix_configure_flags: --enable-silent-rules --enable-diagnostic --disable-static
+ - func: "make wiredtiger"
+
+ # Run the long version of make check, that includes the full csuite tests
+ - func: "make check all"
+ vars:
+ test_env_vars: ${test_env_vars} TESTUTIL_ENABLE_LONG_TESTS=1
+ - command: shell.exec
+ params:
+ working_dir: "wiredtiger/build_posix"
+ script: |
+ set -o errexit
+ set -o verbose
+
+ WT3363_CHECKPOINT_OP_RACES=1 test/csuite/./test_wt3363_checkpoint_op_races 2>&1
+
+ # Many dbs test - Run with:
+ # 1. The defaults
+ - func: "many dbs test"
+ # 2. Set idle flag to turn off operations.
+ - func: "many dbs test"
+ vars:
+ many_db_args: -I
+ # 3. More dbs.
+ - func: "many dbs test"
+ vars:
+ many_db_args: -D 40
+ # 4. With idle flag and more dbs.
+ - func: "many dbs test"
+ vars:
+ many_db_args: -I -D 40
+
+ # extended test/thread runs
+ - func: "thread test"
+ vars:
+ thread_test_args: -t f
+ - func: "thread test"
+ vars:
+ thread_test_args: -S -F -n 100000 -t f
+ - func: "thread test"
+ vars:
+ thread_test_args: -t r
+ - func: "thread test"
+ vars:
+ thread_test_args: -S -F -n 100000 -t r
+ - func: "thread test"
+ vars:
+ thread_test_args: -t v
+ - func: "thread test"
+ vars:
+ thread_test_args: -S -F -n 100000 -t v
+
+ # random-abort - default (random time and number of threads)
+ - func: "random abort test"
+ # random-abort - minimum time, random number of threads
+ - func: "random abort test"
+ vars:
+ random_abort_args: -t 10
+ # random-abort - maximum time, random number of threads
+ - func: "random abort test"
+ vars:
+ random_abort_args: -t 40
+
+ # truncated-log
+ - command: shell.exec
+ params:
+ working_dir: "wiredtiger/build_posix/test/csuite/"
+ script: |
+ set -o errexit
+ set -o verbose
+
+ ./test_truncated_log
+
+ # format test
+ - func: "test format"
+ vars:
+ extra_args: file_type=fix
+ - func: "test format"
+ vars:
+ extra_args: file_type=row
+
+ #FIXME: Add wtperf testing from Jenkin "wiredtiger-test-check-long" after fixing WT-5270
+
- name: time-shift-sensitivity-test
depends_on:
- name: compile
@@ -1683,6 +1811,8 @@ buildvariants:
- name: spinlock-pthread-adaptive-test
- name: compile-wtperf
- name: wtperf-test
+ - name: ftruncate-test
+ - name: long-test
- name: ubuntu1804-python3
display_name: Ubuntu 18.04 (Python3)
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index bae89f7e2f6..66c770cc809 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -28,6 +28,9 @@
#include "test_util.h"
+#ifdef HAVE_SETRLIMIT
+#include <sys/resource.h>
+#endif
#include <signal.h>
#define EXTPATH "../../ext/" /* Extensions path */
@@ -349,6 +352,7 @@ WT_THREAD_RET random_kv(void *);
void path_setup(const char *);
int read_row_worker(WT_CURSOR *, uint64_t, WT_ITEM *, WT_ITEM *, bool);
uint32_t rng(WT_RAND_STATE *);
+void set_core_off(void);
void snap_init(TINFO *, uint64_t, bool);
void snap_repeat_single(WT_CURSOR *, TINFO *);
int snap_repeat_txn(WT_CURSOR *, TINFO *);
diff --git a/src/third_party/wiredtiger/test/format/format.sh b/src/third_party/wiredtiger/test/format/format.sh
new file mode 100755
index 00000000000..722df756afe
--- /dev/null
+++ b/src/third_party/wiredtiger/test/format/format.sh
@@ -0,0 +1,442 @@
+#! /bin/bash
+
+[ -z $BASH_VERSION ] && {
+ echo "$0 is a bash script: \$BASH_VERSION not set, exiting"
+ exit 1
+}
+
+name=$(basename $0)
+
+quit=0
+force_quit=0
+onintr()
+{
+ echo "$name: interrupted, cleaning up..."
+ force_quit=1
+}
+trap 'onintr' 2
+
+usage() {
+ echo "usage: $0 [-aFSv] [-c config] "
+ echo " [-h home] [-j parallel-jobs] [-n total-jobs] [-t minutes] [format-configuration]"
+ echo
+ echo " -a abort/recovery testing (defaults to off)"
+ echo " -c config format configuration file (defaults to CONFIG.stress)"
+ echo " -F quit on first failure (defaults to off)"
+ echo " -h home run directory (defaults to .)"
+ echo " -j parallel jobs to execute in parallel (defaults to 8)"
+ echo " -n total total jobs to execute (defaults to no limit)"
+ echo " -S run smoke-test configurations (defaults to off)"
+ echo " -t minutes minutes to run (defaults to no limit)"
+ echo " -v verbose output (defaults to off)"
+ echo " -- separates $name arguments from format arguments"
+
+ exit 1
+}
+
+# Smoke-tests.
+smoke_base_1="data_source=table rows=100000 threads=6 timer=4"
+smoke_base_2="$smoke_base_1 leaf_page_max=9 internal_page_max=9"
+smoke_list=(
+ # Three access methods.
+ "$smoke_base_1 file_type=fix"
+ "$smoke_base_1 file_type=row"
+ "$smoke_base_1 file_type=var"
+
+ # Huffman key/value encoding.
+ "$smoke_base_1 file_type=row huffman_key=1 huffman_value=1"
+ "$smoke_base_1 file_type=var huffman_key=1 huffman_value=1"
+
+ # Abort/recovery test.
+ "$smoke_base_1 file_type=row abort=1"
+
+ # LSM
+ "$smoke_base_1 file_type=row data_source=lsm"
+
+ # Force tree rebalance and the statistics server.
+ "$smoke_base_1 file_type=row statistics_server=1 rebalance=1"
+
+ # Overflow testing.
+ "$smoke_base_2 file_type=var value_min=256"
+ "$smoke_base_2 file_type=row key_min=256"
+ "$smoke_base_2 file_type=row key_min=256 value_min=256"
+)
+smoke_next=0
+
+abort_test=0
+build=""
+config="CONFIG.stress"
+first_failure=0
+format_args=""
+home="."
+minutes=0
+parallel_jobs=8
+smoke_test=0
+total_jobs=0
+verbose=0
+
+while :; do
+ case "$1" in
+ -a)
+ abort_test=1
+ shift ;;
+ -c)
+ config="$2"
+ shift ; shift ;;
+ -F)
+ first_failure=1
+ shift ;;
+ -h)
+ home="$2"
+ shift ; shift ;;
+ -j)
+ parallel_jobs="$2"
+ [[ "$parallel_jobs" =~ ^[1-9][0-9]*$ ]] || {
+ echo "$name: -j option argument must be a non-zero integer"
+ exit 1
+ }
+ shift ; shift ;;
+ -n)
+ total_jobs="$2"
+ [[ "$total_jobs" =~ ^[1-9][0-9]*$ ]] || {
+ echo "$name: -n option argument must be an non-zero integer"
+ exit 1
+ }
+ shift ; shift ;;
+ -S)
+ smoke_test=1
+ shift ;;
+ -t)
+ minutes="$2"
+ [[ "$minutes" =~ ^[1-9][0-9]*$ ]] || {
+ echo "$name: -t option argument must be a non-zero integer"
+ exit 1
+ }
+ shift ; shift ;;
+ -v)
+ verbose=1
+ shift ;;
+ --)
+ shift; break;;
+ -*)
+ usage ;;
+ *)
+ break ;;
+ esac
+done
+format_args="$*"
+
+verbose()
+{
+ [[ $verbose -ne 0 ]] && echo "$@"
+}
+
+verbose "$name: run starting at $(date)"
+
+# Find a component we need.
+# $1 name to find
+find_file()
+{
+ # Get the directory path to format.sh, which is always in wiredtiger/test/format, then
+ # use that as the base for all the other places we check.
+ d=$(dirname $0)
+
+ # Check wiredtiger/test/format/, likely location of the format binary and the CONFIG file.
+ f="$d/$1"
+ if [[ -f "$f" ]]; then
+ echo "$f"
+ return
+ fi
+
+ # Check wiredtiger/build_posix/test/format/, likely location of the format binary and the
+ # CONFIG file.
+ f="$d/../../build_posix/test/format/$1"
+ if [[ -f "$f" ]]; then
+ echo "$f"
+ return
+ fi
+
+ # Check wiredtiger/, likely location of the wt binary.
+ f="$d/../../$1"
+ if [[ -f "$f" ]]; then
+ echo "$f"
+ return
+ fi
+
+ # Check wiredtiger/build_posix/, likely location of the wt binary.
+ f="$d/../../build_posix/$1"
+ if [[ -f "$f" ]]; then
+ echo "$f"
+ return
+ fi
+
+ echo "./$1"
+}
+
+# Find the format and wt binaries (the latter is only required for abort/recovery testing),
+# the configuration file and the run directory.
+format_binary=$(find_file "t")
+[[ ! -x "$format_binary" ]] && {
+ echo "$name: format program \"$format_binary\" not found"
+ exit 1
+}
+[[ $abort_test -ne 0 ]] || [[ $smoke_test -ne 0 ]] && {
+ wt_binary=$(find_file "wt")
+ [[ ! -x "$wt_binary" ]] && {
+ echo "$name: wt program \"$wt_binary\" not found"
+ exit 1
+ }
+}
+config=$(find_file "$config")
+[[ -f "$config" ]] || {
+ echo "$name: configuration file \"$config\" not found"
+ exit 1
+}
+[[ -d "$home" ]] || {
+ echo "$name: directory \"$home\" not found"
+ exit 1
+}
+
+verbose "$name configuration: $format_binary [-c $config]\
+[-h $home] [-j $parallel_jobs] [-n $total_jobs] [-t $minutes] $format_args"
+
+failure=0
+success=0
+running=0
+status="format.sh-status"
+
+# Report a failure.
+# $1 directory name
+report_failure()
+{
+ dir=$1
+ log="$dir.log"
+
+ echo "$name: failure status reported" > $dir/$status
+ failure=$(($failure + 1))
+
+ # Forcibly quit if first-failure configured.
+ [[ $first_failure -ne 0 ]] && force_quit=1
+
+ echo "$name: job in $dir failed"
+ echo "$name: $dir log:"
+ sed 's/^/ > /' < $log
+}
+
+# Resolve/cleanup completed jobs.
+resolve()
+{
+ running=0
+ list=$(ls $home | grep '^RUNDIR.[0-9]*$')
+ for i in $list; do
+ dir="$home/$i"
+ log="$dir.log"
+
+ # Skip directories that aren't ours.
+ [[ ! -f "$log" ]] && continue
+
+ # Skip failures we've already reported.
+ [[ -f "$dir/$status" ]] && continue
+
+ # Get the process ID, ignore any jobs that aren't yet running.
+ pid=`grep -E 'process.*running' $log | awk '{print $3}'`
+ [[ "$pid" =~ ^[1-9][0-9]*$ ]] || continue
+
+ # Leave any process waiting for a gdb attach running, but report it as a failure.
+ grep -E 'waiting for debugger' $log > /dev/null && {
+ report_failure $dir
+ continue
+ }
+
+ # If the job is still running, ignore it unless we're forcibly quitting.
+ kill -s 0 $pid > /dev/null 2>&1 && {
+ [[ $force_quit -eq 0 ]] && {
+ running=$((running + 1))
+ continue
+ }
+ kill -s TERM $pid
+ }
+
+ # Wait for the job and get an exit status.
+ wait $pid
+ eret=$?
+
+ # Remove successful jobs.
+ grep 'successful run completed' $log > /dev/null && {
+ rm -rf $dir $log
+ success=$(($success + 1))
+ verbose "$name: job in $dir successfully completed"
+ continue
+ }
+
+ # Remove jobs we killed.
+ grep 'caught signal' $log > /dev/null && {
+ rm -rf $dir $log
+ verbose "$name: job in $dir signalled"
+ continue
+ }
+
+ # Test recovery on jobs configured for random abort. */
+ grep 'aborting to test recovery' $log > /dev/null && {
+ cp -pr $dir $dir.RECOVER
+
+ (echo
+ echo "$name: running recovery after abort test"
+ echo "$name: original directory copied into $dir.RECOVER"
+ echo) >> $log
+
+ # Everything is a table unless explicitly a file.
+ uri="table:wt"
+ grep 'data_source=file' $dir/CONFIG > /dev/null && uri="file:wt"
+
+ # Use the wt utility to recover & verify the object.
+ if $($wt_binary -R -h $dir verify $uri >> $log 2>&1); then
+ rm -rf $dir $dir.RECOVER $log
+ success=$(($success + 1))
+ verbose "$name: job in $dir successfully completed"
+ else
+ echo "$name: job in $dir failed abort/recovery testing"
+ report_failure $dir
+ fi
+ continue
+ }
+
+ # Check for the library abort message, or an error from format.
+ grep -E 'aborting WiredTiger library|run FAILED' $log > /dev/null && {
+ report_failure $dir
+ continue
+ }
+
+ # There's some chance we just dropped core. We have the exit status of the process,
+ # but there's no way to be sure. There are reasons the process' exit status looks
+ # like a core dump was created (format deliberately causes a segfault in the case
+ # of abort/recovery testing, and does work that can often segfault in the case of a
+ # snapshot-isolation mismatch failure), but those cases have already been handled,
+ # format is responsible for logging a failure before the core can happen. If the
+ # process exited with a likely failure, call it a failure.
+ signame=""
+ case $eret in
+ $((128 + 3)))
+ signame="SIGQUIT";;
+ $((128 + 4)))
+ signame="SIGILL";;
+ $((128 + 6)))
+ signame="SIGABRT";;
+ $((128 + 7)))
+ signame="SIGBUS";;
+ $((128 + 8)))
+ signame="SIGFPE";;
+ $((128 + 11)))
+ signame="SIGSEGV";;
+ $((128 + 24)))
+ signame="SIGXCPU";;
+ $((128 + 25)))
+ signame="SIGXFSZ";;
+ $((128 + 31)))
+ signame="SIGSYS";;
+ esac
+ [[ ! -z $signame ]] && {
+ (echo
+ echo "$name: job in $dir killed with signal $signame"
+ echo "$name: there may be a core dump associated with this failure"
+ echo) >> $log
+
+ echo "$name: job in $dir killed with signal $signame"
+ echo "$name: there may be a core dump associated with this failure"
+
+ report_failure $dir
+ continue
+ }
+
+ done
+ return 0
+}
+
+# Start a single job.
+count_jobs=0
+format()
+{
+ count_jobs=$(($count_jobs + 1))
+ dir="$home/RUNDIR.$count_jobs"
+ log="$dir.log"
+
+ if [[ $smoke_test -ne 0 ]]; then
+ args=${smoke_list[$smoke_next]}
+ smoke_next=$(($smoke_next + 1))
+ echo "$name: starting smoke-test job in $dir"
+ else
+ args=$format_args
+
+ # If abort/recovery testing is configured, do it 5% of the time.
+ [[ $abort_test -ne 0 ]] && [[ $(($count_jobs % 20)) -eq 0 ]] && args="$args abort=1"
+
+ echo "$name: starting job in $dir"
+ fi
+
+ cmd="$format_binary -c "$config" -h "$dir" -1 $args quiet=1"
+ verbose "$name: $cmd"
+
+ # Disassociate the command from the shell script so we can exit and let the command
+ # continue to run.
+ nohup $cmd > $log 2>&1 &
+}
+
+seconds=$((minutes * 60))
+start_time="$(date -u +%s)"
+while :; do
+ # Check if our time has expired.
+ [[ $seconds -ne 0 ]] && {
+ now="$(date -u +%s)"
+ elapsed=$(($now - $start_time))
+
+ # If we've run out of time, terminate all running jobs.
+ [[ $elapsed -ge $seconds ]] && {
+ verbose "$name: run timed out at $(date)"
+ force_quit=1
+ }
+ }
+
+ # Start more jobs.
+ while :; do
+ # Check if we're only running the smoke-tests and we're done.
+ [[ $smoke_test -ne 0 ]] && [[ $smoke_next -ge ${#smoke_list[@]} ]] && quit=1
+
+ # Check if the total number of jobs has been reached.
+ [[ $total_jobs -ne 0 ]] && [[ $count_jobs -ge $total_jobs ]] && quit=1
+
+ # Check if less than 60 seconds left on any timer. The goal is to avoid killing
+ # jobs that haven't yet configured signal handlers, because we rely on handler
+ # output to determine their final status.
+ [[ $seconds -ne 0 ]] && [[ $(($seconds - $elapsed)) -lt 60 ]] && quit=1
+
+ # Don't create more jobs if we're quitting for any reason.
+ [[ $force_quit -ne 0 ]] || [[ $quit -ne 0 ]] && break;
+
+ # Check if the maximum number of jobs in parallel has been reached.
+ [[ $running -ge $parallel_jobs ]] && break
+ running=$(($running + 1))
+
+ # Start another job, but don't pound on the system.
+ format
+ sleep 2
+ done
+
+ # Clean up and update status.
+ success_save=$success
+ failure_save=$failure
+ resolve
+ [[ $success -ne $success_save ]] || [[ $failure -ne $failure_save ]] &&
+ echo "$name: $success successful jobs, $failure failed jobs"
+
+ # Quit if we're done and there aren't any jobs left to wait for.
+ [[ $quit -ne 0 ]] || [[ $force_quit -ne 0 ]] && [[ $running -eq 0 ]] && break
+
+ # Wait for awhile, unless there are jobs to start.
+ [[ $running -ge $parallel_jobs ]] && sleep 10
+done
+
+echo "$name: $success successful jobs, $failure failed jobs"
+
+verbose "$name: run ending at $(date)"
+[[ $failure -ne 0 ]] && exit 1
+exit 0
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index f136372260c..d74e5cda0c0 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -60,6 +60,10 @@ modify_repl_init(void)
modify_repl[i] = "zyxwvutsrqponmlkjihgfedcba"[i % 26];
}
+/*
+ * set_alarm --
+ * Set a timer.
+ */
static void
set_alarm(void)
{
@@ -75,6 +79,41 @@ set_alarm(void)
#endif
}
+/*
+ * set_core_off --
+ * Turn off core dumps.
+ */
+void
+set_core_off(void)
+{
+#ifdef HAVE_SETRLIMIT
+ struct rlimit rlim;
+
+ rlim.rlim_cur = rlim.rlim_max = 0;
+ testutil_check(setrlimit(RLIMIT_CORE, &rlim));
+#endif
+}
+
+/*
+ * random_failure --
+ * Fail the process.
+ */
+static void
+random_failure(void)
+{
+ static char *core = NULL;
+
+ /* Let our caller know. */
+ printf("%s: aborting to test recovery\n", progname);
+ fflush(stdout);
+
+ /* Turn off core dumps. */
+ set_core_off();
+
+ /* Fail at a random moment. */
+ *core = 0;
+}
+
TINFO **tinfo_list;
/*
@@ -222,10 +261,8 @@ wts_ops(bool lastrun)
/*
* On the last execution, optionally drop core for recovery testing.
*/
- if (lastrun && g.c_abort) {
- static char *core = NULL;
- *core = 0;
- }
+ if (lastrun && g.c_abort)
+ random_failure();
tinfo->quit = true;
}
}
diff --git a/src/third_party/wiredtiger/test/format/snap.c b/src/third_party/wiredtiger/test/format/snap.c
index eed296e212f..15df14b71dc 100644
--- a/src/third_party/wiredtiger/test/format/snap.c
+++ b/src/third_party/wiredtiger/test/format/snap.c
@@ -229,14 +229,16 @@ snap_verify(WT_CURSOR *cursor, TINFO *tinfo, SNAP_OPS *snap)
#ifdef HAVE_DIAGNOSTIC
/*
* We have a mismatch. Try to print out as much information as we can. In doing so, we are
- * calling into the debug code directly and that does not take locks. So it is possible that the
- * calls may crash in some way.
- *
- * The most important information is the key/value mismatch information. Then try to dump out
- * the other information. Right now we dump the entire lookaside table including what is on
- * disk. That can potentially be very large. If it becomes a problem, this can be modified to
- * just dump out the page this key is on.
+ * calling into the debug code directly and that does not take locks, so it's possible we will
+ * simply drop core. The most important information is the key/value mismatch information. Then
+ * try to dump out the other information. Right now we dump the entire lookaside table including
+ * what is on disk. That can potentially be very large. If it becomes a problem, this can be
+ * modified to just dump out the page this key is on. Write a failure message into the log file
+ * first so format.sh knows we failed, and turn off core dumps.
*/
+ fprintf(stderr, "\n%s: run FAILED\n", progname);
+ set_core_off();
+
fprintf(stderr, "snapshot-isolation error: Dumping page to %s\n", g.home_pagedump);
testutil_check(__wt_debug_cursor_page(cursor, g.home_pagedump));
fprintf(stderr, "snapshot-isolation error: Dumping LAS to %s\n", g.home_lasdump);
@@ -244,16 +246,8 @@ snap_verify(WT_CURSOR *cursor, TINFO *tinfo, SNAP_OPS *snap)
if (g.logging)
testutil_check(cursor->session->log_flush(cursor->session, "sync=off"));
#endif
- switch (g.type) {
- case FIX:
- case VAR:
- testutil_die(ret, "snapshot-isolation: %" PRIu64 " search mismatch", keyno);
- /* NOTREACHED */
- case ROW:
- testutil_die(
- ret, "snapshot-isolation: %.*s search mismatch", (int)key->size, (char *)key->data);
- /* NOTREACHED */
- }
+
+ testutil_assert(0);
/* NOTREACHED */
return (1);
diff --git a/src/third_party/wiredtiger/test/format/t.c b/src/third_party/wiredtiger/test/format/t.c
index 7a43ca9f9b4..7ddfe37191c 100644
--- a/src/third_party/wiredtiger/test/format/t.c
+++ b/src/third_party/wiredtiger/test/format/t.c
@@ -38,15 +38,29 @@ extern int __wt_optind;
extern char *__wt_optarg;
/*
+ * signal_timer --
+ * Alarm signal handler, report the signal and drop core.
+ */
+static void signal_timer(int signo) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void
+signal_timer(int signo)
+{
+ fprintf(stderr, "format caught signal %d, aborting the process\n", signo);
+ fflush(stderr);
+ __wt_abort(NULL);
+}
+
+/*
* signal_handler --
- * Handle signals.
+ * Generic signal handler, report the signal and exit.
*/
static void signal_handler(int signo) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
static void
signal_handler(int signo)
{
- fprintf(stderr, "format caught signal %d, aborting the process\n", signo);
- __wt_abort(NULL);
+ fprintf(stderr, "format caught signal %d, exiting\n", signo);
+ fflush(stderr);
+ exit(0);
}
int
@@ -64,9 +78,10 @@ main(int argc, char *argv[])
/*
* Windows and Linux support different sets of signals, be conservative about installing handlers.
+ * If we time out, we want a core dump, otherwise, just exit.
*/
#ifdef SIGALRM
- (void)signal(SIGALRM, signal_handler);
+ (void)signal(SIGALRM, signal_timer);
#endif
#ifdef SIGHUP
(void)signal(SIGHUP, signal_handler);
@@ -179,7 +194,8 @@ main(int argc, char *argv[])
testutil_check(pthread_rwlock_init(&g.death_lock, NULL));
testutil_check(pthread_rwlock_init(&g.ts_lock, NULL));
- printf("%s: process %" PRIdMAX "\n", progname, (intmax_t)getpid());
+ printf("%s: process %" PRIdMAX " running\n", progname, (intmax_t)getpid());
+ fflush(stdout);
while (++g.run_cnt <= g.c_runs || g.c_runs == 0) {
startup(); /* Start a run */
@@ -260,6 +276,8 @@ main(int argc, char *argv[])
config_clear();
+ printf("%s: successful run completed\n", progname);
+
return (EXIT_SUCCESS);
}
@@ -314,7 +332,7 @@ format_die(void)
fclose_and_clear(&g.logfp);
fclose_and_clear(&g.randfp);
- fprintf(stderr, "\n");
+ fprintf(stderr, "\n%s: run FAILED\n", progname);
/* Display the configuration that failed. */
if (g.run_cnt)