summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Hows <howsdav@gmail.com>2017-06-27 12:12:12 +1000
committerGitHub <noreply@github.com>2017-06-27 12:12:12 +1000
commitbf72bb44985c49c2d80f47500958469ec56d71b6 (patch)
tree3025dd6fbc30a25a50b97d36adcfdcdf71a6d761
parenta23f520a8496986c60fd6496b27b21291ff29af7 (diff)
downloadmongo-bf72bb44985c49c2d80f47500958469ec56d71b6.tar.gz
WT-3363 Add a test to confirm if operations block waiting for checkpoints to complete (#3462)
* Add a test to confirm if operations conflict with checkpoints * Move handler functions into test/utility * Ensure that the checkpoint delays only affect external checkpoints and skip internal ones * Rename to timing stress, remove an un-needed ifdef
-rw-r--r--dist/api_data.py9
-rw-r--r--dist/flags.py3
-rw-r--r--src/config/config_def.c116
-rw-r--r--src/conn/conn_api.c46
-rw-r--r--src/include/connection.h6
-rw-r--r--src/include/extern.h1
-rw-r--r--src/include/flags.h1
-rw-r--r--src/txn/txn_ckpt.c29
-rw-r--r--test/csuite/Makefile.am3
-rw-r--r--test/csuite/wt3363_checkpoint_op_races/main.c268
-rw-r--r--test/utility/test_util.h25
-rw-r--r--test/utility/thread.c282
12 files changed, 739 insertions, 50 deletions
diff --git a/dist/api_data.py b/dist/api_data.py
index 2cc6267d31e..4a4ffae535d 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -401,6 +401,15 @@ connection_runtime_config = [
above 0 configures periodic checkpoints''',
min='0', max='100000'),
]),
+ Config('diagnostic_timing_stress', '', r'''
+ enable insertion of code that interrupts the usual timing of
+ operations with a goal of uncovering race conditions and unexpected
+ blocking. This option is intended for use with internal stress
+ testing of WiredTiger. Only available if WiredTiger is configured
+ with --enable-diagnostic. Options are given as a list, such as
+ <code>"diagnostic_timing_stress=[checkpoint_slow]"</code>''',
+ type='list', undoc=True, choices=[
+ 'checkpoint_slow']),
Config('error_prefix', '', r'''
prefix string for error messages'''),
Config('eviction', '', r'''
diff --git a/dist/flags.py b/dist/flags.py
index 8edabd69648..43db7a67054 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -8,6 +8,9 @@ flags = {
###################################################
# Internal routine flag declarations
###################################################
+ 'diagnostic_timing_stress' : [
+ 'TIMING_STRESS_CHECKPOINT_SLOW',
+ ],
'log_scan' : [
'LOGSCAN_FIRST',
'LOGSCAN_FROM_CKP',
diff --git a/src/config/config_def.c b/src/config/config_def.c
index f737b49866c..30b2b3e0e4d 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -117,6 +117,9 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
{ "checkpoint", "category",
NULL, NULL,
confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
+ { "diagnostic_timing_stress", "list",
+ NULL, "choices=[\"checkpoint_slow\"]",
+ NULL, 0 },
{ "error_prefix", "string", NULL, NULL, NULL, 0 },
{ "eviction", "category",
NULL, NULL,
@@ -711,6 +714,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
{ "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 },
{ "config_base", "boolean", NULL, NULL, NULL, 0 },
{ "create", "boolean", NULL, NULL, NULL, 0 },
+ { "diagnostic_timing_stress", "list",
+ NULL, "choices=[\"checkpoint_slow\"]",
+ NULL, 0 },
{ "direct_io", "list",
NULL, "choices=[\"checkpoint\",\"data\",\"log\"]",
NULL, 0 },
@@ -799,6 +805,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
{ "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 },
{ "config_base", "boolean", NULL, NULL, NULL, 0 },
{ "create", "boolean", NULL, NULL, NULL, 0 },
+ { "diagnostic_timing_stress", "list",
+ NULL, "choices=[\"checkpoint_slow\"]",
+ NULL, 0 },
{ "direct_io", "list",
NULL, "choices=[\"checkpoint\",\"data\",\"log\"]",
NULL, 0 },
@@ -886,6 +895,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
NULL, NULL,
confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
{ "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 },
+ { "diagnostic_timing_stress", "list",
+ NULL, "choices=[\"checkpoint_slow\"]",
+ NULL, 0 },
{ "direct_io", "list",
NULL, "choices=[\"checkpoint\",\"data\",\"log\"]",
NULL, 0 },
@@ -969,6 +981,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
NULL, NULL,
confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
{ "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 },
+ { "diagnostic_timing_stress", "list",
+ NULL, "choices=[\"checkpoint_slow\"]",
+ NULL, 0 },
{ "direct_io", "list",
NULL, "choices=[\"checkpoint\",\"data\",\"log\"]",
NULL, 0 },
@@ -1083,18 +1098,19 @@ static const WT_CONFIG_ENTRY config_entries[] = {
},
{ "WT_CONNECTION.reconfigure",
"async=(enabled=false,ops_max=1024,threads=2),cache_overhead=8,"
- "cache_size=100MB,checkpoint=(log_size=0,wait=0),error_prefix=,"
- "eviction=(threads_max=8,threads_min=1),"
- "eviction_checkpoint_target=5,eviction_dirty_target=5,"
- "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
- ",file_manager=(close_handle_minimum=250,close_idle_time=30,"
+ "cache_size=100MB,checkpoint=(log_size=0,wait=0),"
+ "diagnostic_timing_stress=,error_prefix=,eviction=(threads_max=8,"
+ "threads_min=1),eviction_checkpoint_target=5,"
+ "eviction_dirty_target=5,eviction_dirty_trigger=20,"
+ "eviction_target=80,eviction_trigger=95,"
+ "file_manager=(close_handle_minimum=250,close_idle_time=30,"
"close_scan_interval=10),log=(archive=true,prealloc=true,"
"zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4),"
"lsm_merge=true,shared_cache=(chunk=10MB,name=,quota=0,reserve=0,"
"size=500MB),statistics=none,statistics_log=(json=false,"
"on_close=false,sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
"verbose=",
- confchk_WT_CONNECTION_reconfigure, 19
+ confchk_WT_CONNECTION_reconfigure, 20
},
{ "WT_CONNECTION.set_file_system",
"",
@@ -1302,57 +1318,57 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
",builtin_extension_config=,cache_overhead=8,cache_size=100MB,"
"checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
- "config_base=true,create=false,direct_io=,encryption=(keyid=,"
- "name=,secretkey=),error_prefix=,eviction=(threads_max=8,"
- "threads_min=1),eviction_checkpoint_target=5,"
- "eviction_dirty_target=5,eviction_dirty_trigger=20,"
- "eviction_target=80,eviction_trigger=95,exclusive=false,"
- "extensions=,file_extend=,file_manager=(close_handle_minimum=250,"
- "close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
- "in_memory=false,log=(archive=true,compressor=,enabled=false,"
- "file_max=100MB,path=\".\",prealloc=true,recover=on,"
- "zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4),"
- "lsm_merge=true,mmap=true,multiprocess=false,readonly=false,"
- "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
- ",name=,quota=0,reserve=0,size=500MB),statistics=none,"
- "statistics_log=(json=false,on_close=false,path=\".\",sources=,"
- "timestamp=\"%b %d %H:%M:%S\",wait=0),"
- "transaction_sync=(enabled=false,method=fsync),"
+ "config_base=true,create=false,diagnostic_timing_stress=,"
+ "direct_io=,encryption=(keyid=,name=,secretkey=),error_prefix=,"
+ "eviction=(threads_max=8,threads_min=1),"
+ "eviction_checkpoint_target=5,eviction_dirty_target=5,"
+ "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
+ ",exclusive=false,extensions=,file_extend=,"
+ "file_manager=(close_handle_minimum=250,close_idle_time=30,"
+ "close_scan_interval=10),hazard_max=1000,in_memory=false,"
+ "log=(archive=true,compressor=,enabled=false,file_max=100MB,"
+ "path=\".\",prealloc=true,recover=on,zero_fill=false),"
+ "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
+ "mmap=true,multiprocess=false,readonly=false,session_max=100,"
+ "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,quota=0,"
+ "reserve=0,size=500MB),statistics=none,statistics_log=(json=false"
+ ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\""
+ ",wait=0),transaction_sync=(enabled=false,method=fsync),"
"use_environment=true,use_environment_priv=false,verbose=,"
"write_through=",
- confchk_wiredtiger_open, 40
+ confchk_wiredtiger_open, 41
},
{ "wiredtiger_open_all",
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
",builtin_extension_config=,cache_overhead=8,cache_size=100MB,"
"checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
- "config_base=true,create=false,direct_io=,encryption=(keyid=,"
- "name=,secretkey=),error_prefix=,eviction=(threads_max=8,"
- "threads_min=1),eviction_checkpoint_target=5,"
- "eviction_dirty_target=5,eviction_dirty_trigger=20,"
- "eviction_target=80,eviction_trigger=95,exclusive=false,"
- "extensions=,file_extend=,file_manager=(close_handle_minimum=250,"
- "close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
- "in_memory=false,log=(archive=true,compressor=,enabled=false,"
- "file_max=100MB,path=\".\",prealloc=true,recover=on,"
- "zero_fill=false),lsm_manager=(merge=true,worker_thread_max=4),"
- "lsm_merge=true,mmap=true,multiprocess=false,readonly=false,"
- "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
- ",name=,quota=0,reserve=0,size=500MB),statistics=none,"
- "statistics_log=(json=false,on_close=false,path=\".\",sources=,"
- "timestamp=\"%b %d %H:%M:%S\",wait=0),"
- "transaction_sync=(enabled=false,method=fsync),"
+ "config_base=true,create=false,diagnostic_timing_stress=,"
+ "direct_io=,encryption=(keyid=,name=,secretkey=),error_prefix=,"
+ "eviction=(threads_max=8,threads_min=1),"
+ "eviction_checkpoint_target=5,eviction_dirty_target=5,"
+ "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
+ ",exclusive=false,extensions=,file_extend=,"
+ "file_manager=(close_handle_minimum=250,close_idle_time=30,"
+ "close_scan_interval=10),hazard_max=1000,in_memory=false,"
+ "log=(archive=true,compressor=,enabled=false,file_max=100MB,"
+ "path=\".\",prealloc=true,recover=on,zero_fill=false),"
+ "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
+ "mmap=true,multiprocess=false,readonly=false,session_max=100,"
+ "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,quota=0,"
+ "reserve=0,size=500MB),statistics=none,statistics_log=(json=false"
+ ",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\""
+ ",wait=0),transaction_sync=(enabled=false,method=fsync),"
"use_environment=true,use_environment_priv=false,verbose=,"
"version=(major=0,minor=0),write_through=",
- confchk_wiredtiger_open_all, 41
+ confchk_wiredtiger_open_all, 42
},
{ "wiredtiger_open_basecfg",
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
",builtin_extension_config=,cache_overhead=8,cache_size=100MB,"
- "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,direct_io=,"
- "encryption=(keyid=,name=,secretkey=),error_prefix=,"
- "eviction=(threads_max=8,threads_min=1),"
- "eviction_checkpoint_target=5,eviction_dirty_target=5,"
+ "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
+ "diagnostic_timing_stress=,direct_io=,encryption=(keyid=,name=,"
+ "secretkey=),error_prefix=,eviction=(threads_max=8,threads_min=1)"
+ ",eviction_checkpoint_target=5,eviction_dirty_target=5,"
"eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
@@ -1365,15 +1381,15 @@ static const WT_CONFIG_ENTRY config_entries[] = {
",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\""
",wait=0),transaction_sync=(enabled=false,method=fsync),verbose=,"
"version=(major=0,minor=0),write_through=",
- confchk_wiredtiger_open_basecfg, 35
+ confchk_wiredtiger_open_basecfg, 36
},
{ "wiredtiger_open_usercfg",
"async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
",builtin_extension_config=,cache_overhead=8,cache_size=100MB,"
- "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,direct_io=,"
- "encryption=(keyid=,name=,secretkey=),error_prefix=,"
- "eviction=(threads_max=8,threads_min=1),"
- "eviction_checkpoint_target=5,eviction_dirty_target=5,"
+ "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
+ "diagnostic_timing_stress=,direct_io=,encryption=(keyid=,name=,"
+ "secretkey=),error_prefix=,eviction=(threads_max=8,threads_min=1)"
+ ",eviction_checkpoint_target=5,eviction_dirty_target=5,"
"eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
@@ -1386,7 +1402,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
",on_close=false,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\""
",wait=0),transaction_sync=(enabled=false,method=fsync),verbose=,"
"write_through=",
- confchk_wiredtiger_open_usercfg, 34
+ confchk_wiredtiger_open_usercfg, 35
},
{ NULL, NULL, NULL, 0 }
};
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index 46c170d2dad..cb0aa17791e 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -1143,6 +1143,7 @@ __conn_reconfigure(WT_CONNECTION *wt_conn, const char *config)
WT_ERR(__wt_statlog_create(session, cfg));
WT_ERR(__wt_sweep_config(session, cfg));
WT_ERR(__wt_verbose_config(session, cfg));
+ WT_ERR(__wt_timing_stress_config(session, cfg));
/* Third, merge everything together, creating a new connection state. */
WT_ERR(__wt_config_merge(session, cfg, NULL, &p));
@@ -1894,6 +1895,50 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
}
/*
+ * __wt_timing_stress_config --
+ * Set diagnostic stress timing delay configuration.
+ */
+int
+__wt_timing_stress_config(WT_SESSION_IMPL *session, const char *cfg[])
+{
+ static const WT_NAME_FLAG stress_types[] = {
+ { "checkpoint_slow", WT_TIMING_STRESS_CHECKPOINT_SLOW },
+ { NULL, 0 }
+ };
+ WT_CONFIG_ITEM cval, sval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ const WT_NAME_FLAG *ft;
+ uint32_t flags;
+
+ conn = S2C(session);
+
+ WT_RET(__wt_config_gets(
+ session, cfg, "diagnostic_timing_stress", &cval));
+
+ flags = 0;
+ for (ft = stress_types; ft->name != NULL; ft++) {
+ if ((ret = __wt_config_subgets(
+ session, &cval, ft->name, &sval)) == 0 && sval.val != 0) {
+#ifdef HAVE_DIAGNOSTIC
+ LF_SET(ft->flag);
+#else
+ WT_RET_MSG(session, EINVAL,
+ "diagnostic_timing_stress option specified when "
+ "WiredTiger built without diagnostic support. Add "
+ "--enable-diagnostic to configure command and "
+ "rebuild to include support for diagnostic stress "
+ "timing delays");
+#endif
+ }
+ WT_RET_NOTFOUND_OK(ret);
+ }
+
+ conn->timing_stress_flags = flags;
+ return (0);
+}
+
+/*
* __conn_write_base_config --
* Save the base configuration used to create a database.
*/
@@ -2359,6 +2404,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
session, cval.str, cval.len, &conn->error_prefix));
}
WT_ERR(__wt_verbose_config(session, cfg));
+ WT_ERR(__wt_timing_stress_config(session, cfg));
WT_ERR(__wt_config_gets(session, cfg, "session_max", &cval));
conn->session_size = (uint32_t)cval.val + WT_EXTRA_INTERNAL_SESSIONS;
diff --git a/src/include/connection.h b/src/include/connection.h
index 56d801cd361..94abe48ffe5 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -410,6 +410,12 @@ struct __wt_connection_impl {
int page_size; /* OS page size for mmap alignment */
uint32_t verbose;
+ /*
+ * Variable with flags for which subsystems the diagnostic stress timing
+ * delays have been requested.
+ */
+ uint32_t timing_stress_flags;
+
#define WT_STDERR(s) (&S2C(s)->wt_stderr)
#define WT_STDOUT(s) (&S2C(s)->wt_stdout)
WT_FSTREAM wt_stderr, wt_stdout;
diff --git a/src/include/extern.h b/src/include/extern.h
index e7be1493875..59e853d75f4 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -244,6 +244,7 @@ extern int __wt_conn_remove_encryptor(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL
extern int __wt_extractor_config(WT_SESSION_IMPL *session, const char *uri, const char *config, WT_EXTRACTOR **extractorp, int *ownp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_conn_remove_extractor(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_timing_stress_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_cache_config(WT_SESSION_IMPL *session, bool reconfigure, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_cache_stats_update(WT_SESSION_IMPL *session);
diff --git a/src/include/flags.h b/src/include/flags.h
index 919c0dd2f98..ef66a186fa4 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -78,6 +78,7 @@
#define WT_STAT_TYPE_FAST 0x00000020
#define WT_STAT_TYPE_SIZE 0x00000040
#define WT_STAT_TYPE_TREE_WALK 0x00000080
+#define WT_TIMING_STRESS_CHECKPOINT_SLOW 0x00000001
#define WT_TXN_LOG_CKPT_CLEANUP 0x00000001
#define WT_TXN_LOG_CKPT_PREPARE 0x00000002
#define WT_TXN_LOG_CKPT_START 0x00000004
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index 0cbbeedfaaf..73ad96b5518 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -8,6 +8,7 @@
#include "wt_internal.h"
+static void __checkpoint_timing_stress(WT_SESSION_IMPL *);
static int __checkpoint_lock_dirty_tree(
WT_SESSION_IMPL *, bool, bool, bool, const char *[]);
static int __checkpoint_mark_skip(WT_SESSION_IMPL *, WT_CKPT *, bool);
@@ -780,6 +781,8 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_txn_checkpoint_log(
session, full, WT_TXN_LOG_CKPT_START, NULL));
+ __checkpoint_timing_stress(session);
+
WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_tree_helper));
/*
@@ -1707,3 +1710,29 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final)
return (ret);
}
+
+/*
+ * __checkpoint_timing_stress --
+ * Optionally add a 10 second delay to a checkpoint to simulate a long
+ * running checkpoint for debug purposes. The reason for this option is
+ * finding operations that can block while waiting for a checkpoint to
+ * complete.
+ */
+static void
+__checkpoint_timing_stress(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+
+ /*
+ * We only want to sleep if the flag is set and the checkpoint comes
+ * from the API, so check if the session used is either of the two
+ * sessions set aside for internal checkpoints.
+ */
+ if (conn->ckpt_session != session &&
+ conn->meta_ckpt_session != session &&
+ FLD_ISSET(conn->timing_stress_flags,
+ WT_TIMING_STRESS_CHECKPOINT_SLOW))
+ __wt_sleep(10, 0);
+}
diff --git a/test/csuite/Makefile.am b/test/csuite/Makefile.am
index f2b4fcacdc8..0b117a6588b 100644
--- a/test/csuite/Makefile.am
+++ b/test/csuite/Makefile.am
@@ -57,6 +57,9 @@ noinst_PROGRAMS += test_wt3135_search_near_collator
test_wt3184_dup_index_collator_SOURCES = wt3184_dup_index_collator/main.c
noinst_PROGRAMS += test_wt3184_dup_index_collator
+test_wt3363_checkpoint_op_races_SOURCES = wt3363_checkpoint_op_races/main.c
+noinst_PROGRAMS += test_wt3363_checkpoint_op_races
+
test_rwlock_SOURCES = rwlock/main.c
noinst_PROGRAMS += test_rwlock
diff --git a/test/csuite/wt3363_checkpoint_op_races/main.c b/test/csuite/wt3363_checkpoint_op_races/main.c
new file mode 100644
index 00000000000..bfe3ef2d140
--- /dev/null
+++ b/test/csuite/wt3363_checkpoint_op_races/main.c
@@ -0,0 +1,268 @@
+/*-
+ * Public Domain 2014-2017 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "test_util.h"
+
+/*
+ * JIRA ticket reference: WT-3363
+ *
+ * Test case description: There are a number of operations that we run that we
+ * expect not to conflict with or block against a running checkpoint. This test
+ * aims to run repeated checkpoints in a thread, while running an assortment
+ * of operations that we expect to execute quickly on further threads. To
+ * ensure that we catch any blockages we introduce a very large delay into the
+ * checkpoint and measure that no operation takes 1/2 the length of this delay.
+ *
+ * Failure mode: We monitor the execution time of all operations and if we find
+ * any operation taking longer than 1/2 the delay time, we abort dumping a core
+ * file which can be used to determine what operation was blocked.
+ */
+void* do_checkpoints(void *);
+void* do_ops(void *);
+void* monitor(void *);
+
+/*
+ * Time delay to introduce into checkpoints in seconds. Should be at-least
+ * double the maximum time that any one of the operations should take. Currently
+ * this is set to 10 seconds and we expect no single operation to take longer
+ * than 5 seconds.
+ */
+#define MAX_EXECUTION_TIME 10
+#define N_THREADS 10
+
+/*
+ * Number of seconds to execute for. Initially set to 15 minutes, as we need to
+ * run long enough to be certain we have captured any blockages. In initial
+ * testing 5 minutes was enough to reproduce the issue, so we run for 3x that
+ * here to ensure we reproduce before declaring success.
+ */
+#define RUNTIME 900.0
+
+static WT_EVENT_HANDLER event_handler = {
+ handle_op_error,
+ handle_op_message,
+ NULL,
+ NULL
+};
+
+int
+main(int argc, char *argv[])
+{
+ TEST_PER_THREAD_OPTS thread_args[N_THREADS];
+ TEST_OPTS *opts, _opts;
+ pthread_t ckpt_thread, mon_thread, threads[N_THREADS];
+ int i;
+ bool diagnostic;
+
+#ifdef HAVE_DIAGNOSTIC
+ diagnostic = true;
+#else
+ diagnostic = false;
+#endif
+
+ /*
+ * This test should not run unless we have compiled with diagnostic
+ * support and the long tests flag is set. The test will fail when
+ * attempting to set the option to add the delays to checkpoints if
+ * diagnostic mode is not enable and runs for 15 minutes.
+ */
+ if (!testutil_enable_long_tests() || !diagnostic)
+ return (EXIT_SUCCESS);
+
+ opts = &_opts;
+ opts->unique_id = 0;
+ memset(opts, 0, sizeof(*opts));
+
+ testutil_check(testutil_parse_opts(argc, argv, opts));
+ testutil_make_work_dir(opts->home);
+
+ testutil_check(wiredtiger_open(opts->home, &event_handler,
+ "create,cache_size=1G,diagnostic_timing_stress=[checkpoint_slow]",
+ &opts->conn));
+
+ testutil_check(pthread_create(
+ &ckpt_thread, NULL, do_checkpoints, (void *)opts));
+
+ for (i = 0; i < N_THREADS; ++i) {
+ thread_args[i].testopts = opts;
+ thread_args[i].thread_counter = 0;
+ thread_args[i].threadnum = i;
+ testutil_check(pthread_create(
+ &threads[i], NULL, do_ops, (void *)&thread_args[i]));
+ }
+
+ /*
+ * Pass the whole array of thread arguments to the monitoring thread.
+ * This thread will need to monitor each threads counter to track if it
+ * is stuck.
+ */
+ testutil_check(
+ pthread_create(&mon_thread, NULL, monitor, &thread_args));
+
+ for (i = 0; i < N_THREADS; ++i)
+ testutil_check(pthread_join(threads[i], NULL));
+
+ testutil_check(pthread_join(mon_thread, NULL));
+
+ testutil_check(pthread_join(ckpt_thread, NULL));
+
+ printf("Success\n");
+
+ testutil_cleanup(opts);
+ return (EXIT_SUCCESS);
+}
+
+/*
+ * Function for repeatedly running checkpoint operations.
+ */
+void *
+do_checkpoints(void *_opts)
+{
+ TEST_OPTS *opts;
+ WT_SESSION *session;
+ time_t now, start;
+ int ret;
+
+ opts = (TEST_OPTS *)_opts;
+ (void)time(&start);
+ (void)time(&now);
+
+ while (difftime(now, start) < RUNTIME) {
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ if ((ret = session->checkpoint(session, "force")) != 0)
+ if (ret != EBUSY && ret != ENOENT)
+ testutil_die(ret, "session.checkpoint");
+
+ testutil_check(session->close(session, NULL));
+
+ /*
+ * A short sleep to let operations process and avoid back to
+ * back checkpoints locking up resources.
+ */
+ sleep(1);
+ (void)time(&now);
+ }
+
+ return (NULL);
+}
+
+/*
+ * Function to monitor running operations and abort to dump core in the event
+ * that we catch an operation running long.
+ */
+void *
+monitor(void *args)
+{
+ TEST_PER_THREAD_OPTS *thread_args;
+ time_t now, start;
+ int ctr, i, last_ops[N_THREADS];
+
+ thread_args = (TEST_PER_THREAD_OPTS*)args;
+
+ (void)time(&start);
+ (void)time(&now);
+
+ memset(last_ops, 0, sizeof(int) + N_THREADS);
+
+ while (difftime(now, start) < RUNTIME) {
+
+ /*
+ * Checkpoints will run for slightly over MAX_EXECUTION_TIME.
+ * MAX_EXECUTION_TIME should always be long enough that we can
+ * complete any single operation in 1/2 that time.
+ */
+ sleep(MAX_EXECUTION_TIME/2);
+
+ for (i = 0; i < N_THREADS; i++) {
+ ctr = thread_args[i].thread_counter;
+
+ /* Ignore any threads which may not have started yet. */
+ if (ctr == 0)
+ continue;
+
+ /*
+ * We track how many operations each thread has done. If
+ * we have slept and the counter remains the same for a
+ * thread it is stuck and should drop a core so the
+ * cause of the hang can be investigated.
+ */
+ if (ctr != last_ops[i])
+ last_ops[i] = ctr;
+ else {
+ printf("Thread %d had a task running"
+ " for more than %d seconds\n",
+ i, MAX_EXECUTION_TIME/2);
+ abort();
+ }
+ }
+ (void)time(&now);
+ }
+
+ return (NULL);
+}
+
+/*
+ * Worker thread. Executes random operations from the set of 6.
+ */
+void *
+do_ops(void *args)
+{
+ WT_RAND_STATE rnd;
+ time_t now, start;
+
+ __wt_random_init_seed(NULL, &rnd);
+ (void)time(&start);
+ (void)time(&now);
+
+ while (difftime(now, start) < RUNTIME) {
+ switch (__wt_random(&rnd) % 6) {
+ case 0:
+ op_bulk(args);
+ break;
+ case 1:
+ op_create(args);
+ break;
+ case 2:
+ op_cursor(args);
+ break;
+ case 3:
+ op_drop(args);
+ break;
+ case 4:
+ op_bulk_unique(args);
+ break;
+ case 5:
+ op_create_unique(args);
+ break;
+ }
+ (void)time(&now);
+ }
+
+ return (NULL);
+}
diff --git a/test/utility/test_util.h b/test/utility/test_util.h
index dd198fa0081..2e3c43dca40 100644
--- a/test/utility/test_util.h
+++ b/test/utility/test_util.h
@@ -72,10 +72,21 @@ typedef struct {
bool running;
char *uri;
volatile uint64_t next_threadid;
+ uint64_t unique_id;
uint64_t max_inserted_id;
} TEST_OPTS;
/*
+ * A structure for the data specific to a single thread of those used by the
+ * group of threads defined below.
+ */
+typedef struct {
+ TEST_OPTS *testopts;
+ int threadnum;
+ int thread_counter;
+} TEST_PER_THREAD_OPTS;
+
+/*
* testutil_assert --
* Complain and quit if something isn't true.
*/
@@ -198,6 +209,20 @@ void *drealloc(void *, size_t);
void *dstrdup(const void *);
void *dstrndup(const char *, size_t);
const char *example_setup(int, char * const *);
+
+/*
+ * The functions below can generate errors that we wish to ignore. We have
+ * handler functions available for them here, to avoid making tests crash
+ * prematurely.
+ */
+int handle_op_error(WT_EVENT_HANDLER *, WT_SESSION *, int, const char *);
+int handle_op_message(WT_EVENT_HANDLER *, WT_SESSION *, const char *);
+void *op_bulk(void *);
+void *op_bulk_unique(void *);
+void *op_cursor(void *);
+void *op_create(void *);
+void *op_create_unique(void *);
+void *op_drop(void *);
void testutil_clean_work_dir(const char *);
void testutil_cleanup(TEST_OPTS *);
bool testutil_enable_long_tests(void);
diff --git a/test/utility/thread.c b/test/utility/thread.c
index 08f49c54c5e..4c5202016a5 100644
--- a/test/utility/thread.c
+++ b/test/utility/thread.c
@@ -140,3 +140,285 @@ thread_prev(void *arg)
testutil_check(session->close(session, NULL));
return (NULL);
}
+
+/*
+ * Below are a series of functions originally designed for test/fops. These
+ * threads perform a series of simple API access calls, such as opening and
+ * closing sessions and cursors. These functions require use of the
+ * TEST_PER_THREAD_OPTS structure in test_util.h. Additionally there are two
+ * event handler functions that should be used to suppress "expected" errors
+ * that these functions generate. An example of the use of these functions and
+ * structures is in the csuite test wt3363_checkpoint_op_races.
+ */
+
+/*
+ * Handle errors that generated by series of functions below that we can safely
+ * ignore.
+ */
+int
+handle_op_error(WT_EVENT_HANDLER *handler,
+ WT_SESSION *session, int error, const char *errmsg)
+{
+ (void)(handler);
+ (void)(session);
+
+ /*
+ * Ignore complaints about missing files. It's unlikely but possible
+ * that checkpoints and cursor open operations can return this due to
+ * the sequencing of the various ops.
+ */
+ if (error == ENOENT)
+ return (0);
+
+ /* Ignore complaints about failure to open bulk cursors. */
+ if (strstr(
+ errmsg, "bulk-load is only supported on newly created") != NULL)
+ return (0);
+
+ return (fprintf(stderr, "%s\n", errmsg) < 0 ? -1 : 0);
+}
+
+/*
+ * Handle messages generated by the functions below that we can safely ignore.
+ */
+int
+handle_op_message(WT_EVENT_HANDLER *handler,
+ WT_SESSION *session, const char *message)
+{
+ (void)(handler);
+ (void)(session);
+
+ /* Ignore messages about failing to create forced checkpoints. */
+ if (strstr(message, "forced or named checkpoint") != NULL)
+ return (0);
+
+ return (printf("%s\n", message) < 0 ? -1 : 0);
+}
+
+/*
+ * Create a table and open a bulk cursor on it.
+ */
+void *
+op_bulk(void *arg)
+{
+ TEST_OPTS *opts;
+ TEST_PER_THREAD_OPTS *args;
+ WT_CURSOR *c;
+ WT_SESSION *session;
+ int ret;
+
+ args = (TEST_PER_THREAD_OPTS *)arg;
+ opts = args->testopts;
+
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ if ((ret = session->create(session, opts->uri, NULL)) != 0)
+ if (ret != EEXIST && ret != EBUSY)
+ testutil_die(ret, "session.create");
+
+ if (ret == 0) {
+ __wt_yield();
+ if ((ret = session->open_cursor(session,
+ opts->uri, NULL, "bulk,checkpoint_wait=false", &c)) == 0) {
+ testutil_check(c->close(c));
+ } else if (ret != ENOENT && ret != EBUSY && ret != EINVAL)
+ testutil_die(ret, "session.open_cursor bulk");
+ }
+
+ testutil_check(session->close(session, NULL));
+ args->thread_counter++;
+
+ return (NULL);
+}
+
+/*
+ * Create a guaranteed unique table and open and close a bulk cursor on it.
+ */
+void *
+op_bulk_unique(void *arg)
+{
+ TEST_OPTS *opts;
+ TEST_PER_THREAD_OPTS *args;
+ WT_CURSOR *c;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ int ret;
+ char new_uri[64];
+
+ args = (TEST_PER_THREAD_OPTS *)arg;
+ opts = args->testopts;
+ __wt_random_init_seed(NULL, &rnd);
+
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ /* Generate a unique object name. */
+ testutil_check(__wt_snprintf(
+ new_uri, sizeof(new_uri), "%s.%u",
+ opts->uri, __wt_atomic_add64(&opts->unique_id, 1)));
+ testutil_check(session->create(session, new_uri, NULL));
+
+ __wt_yield();
+
+ /*
+ * Opening a bulk cursor may have raced with a forced checkpoint
+ * which created a checkpoint of the empty file, and triggers an EINVAL.
+ */
+ if ((ret = session->open_cursor(
+ session, new_uri, NULL, "bulk,checkpoint_wait=false", &c)) == 0) {
+ testutil_check(c->close(c));
+ } else if (ret != EINVAL && ret != EBUSY)
+ testutil_die(ret,
+ "session.open_cursor bulk unique: %s", new_uri);
+
+ while ((ret = session->drop(session, new_uri, __wt_random(&rnd) & 1 ?
+ "force,checkpoint_wait=false" : "checkpoint_wait=false")) != 0)
+ if (ret != EBUSY)
+ testutil_die(ret, "session.drop: %s", new_uri);
+ else
+ /*
+ * The EBUSY is expected when we run with
+ * checkpoint_wait set to false, so we increment the
+ * counter while in this loop to avoid false positives.
+ */
+ args->thread_counter++;
+
+ testutil_check(session->close(session, NULL));
+ args->thread_counter++;
+
+ return (NULL);
+}
+
+/*
+ * Open and close cursor on a table.
+ */
+void *
+op_cursor(void *arg)
+{
+ TEST_OPTS *opts;
+ TEST_PER_THREAD_OPTS *args;
+ WT_SESSION *session;
+ WT_CURSOR *cursor;
+ int ret;
+
+ args = (TEST_PER_THREAD_OPTS *)arg;
+ opts = args->testopts;
+
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ if ((ret = session->open_cursor(
+ session, opts->uri, NULL, NULL, &cursor)) != 0) {
+ if (ret != ENOENT && ret != EBUSY)
+ testutil_die(ret, "session.open_cursor");
+ } else
+ testutil_check(cursor->close(cursor));
+
+ testutil_check(session->close(session, NULL));
+ args->thread_counter++;
+
+ return (NULL);
+}
+
+/*
+ * Create a table.
+ */
+void *
+op_create(void *arg)
+{
+ TEST_OPTS *opts;
+ TEST_PER_THREAD_OPTS *args;
+ WT_SESSION *session;
+ int ret;
+
+ args = (TEST_PER_THREAD_OPTS *)arg;
+ opts = args->testopts;
+
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ if ((ret = session->create(session, opts->uri, NULL)) != 0)
+ if (ret != EEXIST && ret != EBUSY)
+ testutil_die(ret, "session.create");
+
+ testutil_check(session->close(session, NULL));
+ args->thread_counter++;
+
+ return (NULL);
+}
+
+/*
+ * Create and drop a unique guaranteed table.
+ */
+void *
+op_create_unique(void *arg)
+{
+ TEST_OPTS *opts;
+ TEST_PER_THREAD_OPTS *args;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ int ret;
+ char new_uri[64];
+
+ args = (TEST_PER_THREAD_OPTS *)arg;
+ opts = args->testopts;
+ __wt_random_init_seed(NULL, &rnd);
+
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ /* Generate a unique object name. */
+ testutil_check(__wt_snprintf(
+ new_uri, sizeof(new_uri), "%s.%u",
+ opts->uri, __wt_atomic_add64(&opts->unique_id, 1)));
+ testutil_check(session->create(session, new_uri, NULL));
+
+ __wt_yield();
+ while ((ret = session->drop(session, new_uri, __wt_random(&rnd) & 1 ?
+ "force,checkpoint_wait=false" : "checkpoint_wait=false")) != 0)
+ if (ret != EBUSY)
+ testutil_die(ret, "session.drop: %s", new_uri);
+ else
+ /*
+ * The EBUSY is expected when we run with
+ * checkpoint_wait set to false, so we increment the
+ * counter while in this loop to avoid false positives.
+ */
+ args->thread_counter++;
+
+ testutil_check(session->close(session, NULL));
+ args->thread_counter++;
+
+ return (NULL);
+}
+
+/*
+ * Drop a table.
+ */
+void *
+op_drop(void *arg)
+{
+ TEST_PER_THREAD_OPTS *args;
+ TEST_OPTS *opts;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ int ret;
+
+ args = (TEST_PER_THREAD_OPTS *)arg;
+ opts = args->testopts;
+ __wt_random_init_seed(NULL, &rnd);
+
+ testutil_check(
+ opts->conn->open_session(opts->conn, NULL, NULL, &session));
+
+ if ((ret = session->drop(session, opts->uri, __wt_random(&rnd) & 1 ?
+ "force,checkpoint_wait=false" : "checkpoint_wait=false")) != 0)
+ if (ret != ENOENT && ret != EBUSY)
+ testutil_die(ret, "session.drop");
+
+ testutil_check(session->close(session, NULL));
+ args->thread_counter++;
+
+ return (NULL);
+}