summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexander.gorrod@mongodb.com>2017-08-21 09:26:14 +1000
committerAlex Gorrod <alexander.gorrod@mongodb.com>2017-08-21 09:26:14 +1000
commit0cfe4dfc2cf371f9e8196cb79414c3432b95b5af (patch)
tree3ea1dfc60609b4b6c424144f02af5e8045d1fe40
parent1cbfc673303260c725ef428eb0f2b6372feb5ec1 (diff)
downloadmongo-0cfe4dfc2cf371f9e8196cb79414c3432b95b5af.tar.gz
Import wiredtiger: b055251678e6b4fcc74a1f651432aadbfeecc0e4 from branch mongodb-3.6
ref: 698847557c..b055251678 for: 3.5.12 WT-3328 Enhance docs around when schema operations can get EBUSY WT-3358 LSM will hang if the manager fails to start WT-3365 Understand how timestamps interact with LSM chunk switching WT-3399 Add new checkpoint blocking test case to automated testing WT-3417 Drain transactions during upgrade/downgrade. WT-3441 test_timestamp01 doesn't account for a large WT_TIMESTAMP_SIZE WT-3450 Add verbose option that tracks timestamp state and information WT-3452 Enhance existing recovery test to exercise timestamp API WT-3455 Enhance eviction to be aware of stable timestamp WT-3459 Test WiredTiger with clock shifting WT-3460 Add support for rollback_to_stable to column store WT-3465 Optimize performance when timestamp size is 8 bytes WT-3483 WT_SESSION::checkpoint returning WT_ROLLBACK WT-3492 ex_all.c not calling transaction_ops WT-3493 wt_verbose_dump_txn should display timestamp information WT-3497 Improve logging message when hitting the WT session limits WT-3498 Incorrect data read after caching overflow items WT-3499 Checkpoint can miss not yet committed item WT-3500 New timestamp-abort test is too chatty WT-3502 Only keep 10 delta updates between full copies WT-3503 Coverity 1379333: unchecked return value, full-build Friday WT-3508 timestamp-abort bug in verification phase WT-3509 __wt_illegal_value doesn't always provide a failure location WT-3514 WT_SESSION.checkpoint: read timestamp 6373c older than oldest timestamp WT-3517 WT_SESSION::reset doesn't need to call out EBUSY specially WT-3521 Unstable updates should not be written by lookaside eviction
-rw-r--r--src/third_party/wiredtiger/.gitignore1
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py8
-rw-r--r--src/third_party/wiredtiger/dist/filelist1
-rw-r--r--src/third_party/wiredtiger/dist/flags.py1
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_all2
-rw-r--r--src/third_party/wiredtiger/dist/s_copyright.list2
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_function (renamed from src/third_party/wiredtiger/dist/s_label)37
-rw-r--r--src/third_party/wiredtiger/dist/s_function_loop.py (renamed from src/third_party/wiredtiger/dist/s_label_loop.py)0
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok5
-rw-r--r--src/third_party/wiredtiger/examples/c/ex_all.c285
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/block/block_ckpt.c5
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c13
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c100
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_discard.c3
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_ovfl.c176
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c8
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_walk.c7
-rw-r--r--src/third_party/wiredtiger/src/btree/row_modify.c14
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c26
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c186
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_reconfig.c210
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_ds.c10
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_join.c4
-rw-r--r--src/third_party/wiredtiger/src/docs/Doxyfile4
-rw-r--r--src/third_party/wiredtiger/src/docs/error-handling.dox23
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h16
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h12
-rw-r--r--src/third_party/wiredtiger/src/include/flags.h9
-rw-r--r--src/third_party/wiredtiger/src/include/lsm.h14
-rw-r--r--src/third_party/wiredtiger/src/include/misc.h4
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h4
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i56
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in40
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_cursor.c18
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_manager.c27
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_tree.c7
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_work_unit.c61
-rw-r--r--src/third_party/wiredtiger/src/os_common/os_abort.c14
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c222
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c6
-rw-r--r--src/third_party/wiredtiger/src/support/err.c13
-rw-r--r--src/third_party/wiredtiger/src/support/global.c21
-rw-r--r--src/third_party/wiredtiger/src/support/time.c14
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c135
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c60
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ext.c3
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_log.c11
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c112
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c148
-rwxr-xr-xsrc/third_party/wiredtiger/test/csuite/time_shift_test.sh116
-rw-r--r--src/third_party/wiredtiger/test/csuite/wt3363_checkpoint_op_races/main.c2
-rw-r--r--src/third_party/wiredtiger/test/format/backup.c12
-rw-r--r--src/third_party/wiredtiger/test/format/wts.c10
-rw-r--r--src/third_party/wiredtiger/test/recovery/Makefile.am7
-rwxr-xr-xsrc/third_party/wiredtiger/test/recovery/smoke.sh4
-rw-r--r--src/third_party/wiredtiger/test/recovery/timestamp-abort.c722
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp01.py2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp03.py1
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp04.py26
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp05.py106
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp07.py284
-rw-r--r--src/third_party/wiredtiger/test/utility/test_util.h12
64 files changed, 2567 insertions, 901 deletions
diff --git a/src/third_party/wiredtiger/.gitignore b/src/third_party/wiredtiger/.gitignore
index 204cd421fd1..e81c037a1ac 100644
--- a/src/third_party/wiredtiger/.gitignore
+++ b/src/third_party/wiredtiger/.gitignore
@@ -134,6 +134,7 @@ _wiredtiger.pyd
**/test/packing/packing-test
**/test/readonly/t
**/test/recovery/random-abort
+**/test/recovery/timestamp-abort
**/test/recovery/truncated-log
**/test/salvage/t
**/test/syscall/test_wt2336_base
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index ee2f14b980b..37f9baedc70 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -402,7 +402,9 @@ connection_runtime_config = [
min='0', max='100000'),
]),
Config('compatibility', '', r'''
- set compatibility version of database''',
+ set compatibility version of database. Changing the compatibility
+ version requires that there are no active operations for the duration
+ of the call.''',
type='category', subconfig=[
Config('release', '', r'''
compatibility release version string'''),
@@ -560,6 +562,7 @@ connection_runtime_config = [
'split',
'temporary',
'thread_group',
+ 'timestamp',
'transaction',
'verify',
'version',
@@ -1146,9 +1149,6 @@ methods = {
Config('name', '', r'''
if set, specify a name for the checkpoint (note that checkpoints
including LSM trees may not be named)'''),
- Config('read_timestamp', '', r'''
- if set, create the checkpoint as of the specified timestamp''',
- undoc=True),
Config('target', '', r'''
if non-empty, checkpoint the list of objects''', type='list'),
Config('use_timestamp', 'true', r'''
diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist
index 33ede795c69..9755e24f3c7 100644
--- a/src/third_party/wiredtiger/dist/filelist
+++ b/src/third_party/wiredtiger/dist/filelist
@@ -72,6 +72,7 @@ src/conn/conn_dhandle.c
src/conn/conn_handle.c
src/conn/conn_log.c
src/conn/conn_open.c
+src/conn/conn_reconfig.c
src/conn/conn_stat.c
src/conn/conn_sweep.c
src/cursor/cur_backup.c
diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py
index 05ffb8851a2..8c0448b27c1 100644
--- a/src/third_party/wiredtiger/dist/flags.py
+++ b/src/third_party/wiredtiger/dist/flags.py
@@ -87,6 +87,7 @@ flags = {
'VERB_SPLIT',
'VERB_TEMPORARY',
'VERB_THREAD_GROUP',
+ 'VERB_TIMESTAMP',
'VERB_TRANSACTION',
'VERB_VERIFY',
'VERB_VERSION',
diff --git a/src/third_party/wiredtiger/dist/s_all b/src/third_party/wiredtiger/dist/s_all
index be33657e640..1809cef3aa5 100755
--- a/src/third_party/wiredtiger/dist/s_all
+++ b/src/third_party/wiredtiger/dist/s_all
@@ -88,8 +88,8 @@ COMMANDS="
2>&1 ./s_docs > ${t_pfx}s_docs
2>&1 ./s_export > ${t_pfx}s_export
2>&1 ./s_funcs > ${t_pfx}s_funcs
+2>&1 ./s_function > ${t_pfx}s_function
2>&1 ./s_getopt > ${t_pfx}s_getopt
-2>&1 ./s_label > ${t_pfx}s_label
2>&1 ./s_lang > ${t_pfx}s_lang
2>&1 ./s_longlines > ${t_pfx}s_longlines
2>&1 ./s_python > ${t_pfx}s_python
diff --git a/src/third_party/wiredtiger/dist/s_copyright.list b/src/third_party/wiredtiger/dist/s_copyright.list
index 2ac63bcb159..71ffa446eed 100644
--- a/src/third_party/wiredtiger/dist/s_copyright.list
+++ b/src/third_party/wiredtiger/dist/s_copyright.list
@@ -12,7 +12,7 @@ skip dist/flags.py
skip dist/java_doc.py
skip dist/log.py
skip dist/log_data.py
-skip dist/s_label_loop.py
+skip dist/s_function_loop.py
skip dist/stat.py
skip dist/stat_data.py
skip dist/style.py
diff --git a/src/third_party/wiredtiger/dist/s_label b/src/third_party/wiredtiger/dist/s_function
index c7b63d9d5b3..3259e215d0c 100755
--- a/src/third_party/wiredtiger/dist/s_label
+++ b/src/third_party/wiredtiger/dist/s_function
@@ -1,6 +1,6 @@
#! /bin/sh
-# Check WiredTiger error/return macros.
+# Check various WiredTiger function behaviors.
t=__wt.$$
trap 'rm -f $t' 0 1 2 3 13 15
@@ -36,7 +36,7 @@ done
# Jumps before returns have already been detected above.
for f in `find bench examples ext src test -name '*.[ci]'`; do
file_parse $f | sed "s=^=$f:="
-done | python dist/s_label_loop.py |
+done | python dist/s_function_loop.py |
egrep '\{@[^@]*(WT_ILLEGAL_VALUE|WT_RET[_A-Z]*)\([^@]*(WT_ERR[_A-Z]*|WT_ILLEGAL_VALUE_ERR)\(.*err:' |
sed -e 's/^\([^:]*\): *\([^:]*\):.*/\1:\2: mix of returns and jump to the error label within a loop/'
@@ -80,4 +80,37 @@ for f in `find bench examples ext src test -name '*.[ci]'`; do
done
+# API_END with a return
+for f in `find bench examples ext src test -name '*.[ci]'`; do
+ file_parse $f |
+ egrep '[^A-Z_]API_END.*return' |
+ sed 's/:.*//' > $t
+ test -s $t && {
+ echo "$f: API_END followed by return."
+ sed 's/^/function @ line:/' < $t
+ }
+done
+
+# S2C with a local WT_CONNECTION_IMPL variable.
+for f in `find bench examples ext src test -name '*.[ci]'`; do
+ file_parse $f |
+ egrep 'conn = S2C.*S2C' |
+ sed 's/:.*//' > $t
+ test -s $t && {
+ echo "$f: S2C with a local WT_CONNECTION_IMPL variable."
+ sed 's/^/function @ line:/' < $t
+ }
+done
+
+# S2B with a local WT_BTREE variable.
+for f in `find bench examples ext src test -name '*.[ci]'`; do
+ file_parse $f |
+ egrep 'btree = S2B.*S2B' |
+ sed 's/:.*//' > $t
+ test -s $t && {
+ echo "$f: S2B with a local WT_BTREE variable."
+ sed 's/^/function @ line:/' < $t
+ }
+done
+
exit 0
diff --git a/src/third_party/wiredtiger/dist/s_label_loop.py b/src/third_party/wiredtiger/dist/s_function_loop.py
index 5cc222a4250..5cc222a4250 100644
--- a/src/third_party/wiredtiger/dist/s_label_loop.py
+++ b/src/third_party/wiredtiger/dist/s_function_loop.py
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index c79dc5129a5..58b8137cad9 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -74,6 +74,7 @@ Checksum
Checksums
CityHash
CloseHandle
+Cmvz
Collet
Comparator
Config
@@ -264,6 +265,7 @@ NoAddr
Noll
Nul
OOB
+OPLOG
OPTYPE
OUTBUFF
OVFL
@@ -528,6 +530,7 @@ checkpointer
checkpointing
checksum
checksums
+checksys
checkvalue
children's
chk
@@ -940,6 +943,7 @@ msvc
multi
multiblock
multicore
+multicycle
multiprocess
multisocket
multithreaded
@@ -1284,6 +1288,7 @@ vtype
vunpack
vw
vxr
+vz
waitpid
waker
wakeup
diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c
index 02237faf4e9..a0c6f87ceda 100644
--- a/src/third_party/wiredtiger/examples/c/ex_all.c
+++ b/src/third_party/wiredtiger/examples/c/ex_all.c
@@ -36,20 +36,20 @@
static const char *home;
-void add_collator(WT_CONNECTION *conn);
-void add_extractor(WT_CONNECTION *conn);
-void backup(WT_SESSION *session);
-void checkpoint_ops(WT_SESSION *session);
-void connection_ops(WT_CONNECTION *conn);
-int cursor_ops(WT_SESSION *session);
-void cursor_search_near(WT_CURSOR *cursor);
-void cursor_statistics(WT_SESSION *session);
-void named_snapshot_ops(WT_SESSION *session);
-void pack_ops(WT_SESSION *session);
-void session_ops(WT_SESSION *session);
-void transaction_ops(WT_CONNECTION *conn, WT_SESSION *session);
+static void add_collator(WT_CONNECTION *conn);
+static void add_extractor(WT_CONNECTION *conn);
+static void backup(WT_SESSION *session);
+static void checkpoint_ops(WT_SESSION *session);
+static void connection_ops(WT_CONNECTION *conn);
+static int cursor_ops(WT_SESSION *session);
+static void cursor_search_near(WT_CURSOR *cursor);
+static void cursor_statistics(WT_SESSION *session);
+static void named_snapshot_ops(WT_SESSION *session);
+static void pack_ops(WT_SESSION *session);
+static void session_ops(WT_SESSION *session);
+static void transaction_ops(WT_SESSION *session);
-int
+static int
cursor_ops(WT_SESSION *session)
{
WT_CURSOR *cursor;
@@ -66,6 +66,21 @@ cursor_ops(WT_SESSION *session)
/*! [Open a cursor on the metadata] */
{
+ const char *key = "some key", *value = "some value";
+ /*! [Reconfigure a cursor] */
+ error_check(session->open_cursor(
+ session, "table:mytable", NULL, "overwrite=false", &cursor));
+
+ /* Reconfigure the cursor to overwrite the record. */
+ error_check(cursor->reconfigure(cursor, "overwrite=true"));
+
+ cursor->set_key(cursor, key);
+ cursor->set_value(cursor, value);
+ error_check(cursor->insert(cursor));
+ /*! [Reconfigure a cursor] */
+ }
+
+ {
WT_CURSOR *duplicate;
const char *key = "some key";
/*! [Duplicate a cursor] */
@@ -81,20 +96,6 @@ cursor_ops(WT_SESSION *session)
}
{
- const char *key = "some key", *value = "some value";
- /*! [Reconfigure a cursor] */
- error_check(session->open_cursor(
- session, "table:mytable", NULL, "overwrite=false", &cursor));
- cursor->set_key(cursor, key);
- cursor->set_value(cursor, value);
-
- /* Reconfigure the cursor to overwrite the record. */
- error_check(cursor->reconfigure(cursor, "overwrite=true"));
- error_check(cursor->insert(cursor));
- /*! [Reconfigure a cursor] */
- }
-
- {
/*! [boolean configuration string example] */
error_check(session->open_cursor(
session, "table:mytable", NULL, "overwrite", &cursor));
@@ -105,6 +106,8 @@ cursor_ops(WT_SESSION *session)
/*! [boolean configuration string example] */
}
+ error_check(session->checkpoint(session, "name=midnight"));
+
{
/*! [open a named checkpoint] */
error_check(session->open_cursor(session,
@@ -120,18 +123,31 @@ cursor_ops(WT_SESSION *session)
}
{
+ /*! [Set the cursor's string key] */
+ /* Set the cursor's string key. */
+ const char *key = "another key";
+ cursor->set_key(cursor, key);
+ /*! [Set the cursor's string key] */
+ }
+
+ {
/*! [Get the cursor's string key] */
const char *key; /* Get the cursor's string key. */
error_check(cursor->get_key(cursor, &key));
/*! [Get the cursor's string key] */
}
+ /* Switch to a recno table. */
+ error_check(session->create(
+ session, "table:recno", "key_format=r,value_format=S"));
+ error_check(session->open_cursor(
+ session, "table:recno", NULL, NULL, &cursor));
+
{
- /*! [Set the cursor's string key] */
- /* Set the cursor's string key. */
- const char *key = "another key";
- cursor->set_key(cursor, key);
- /*! [Set the cursor's string key] */
+ /*! [Set the cursor's record number key] */
+ uint64_t recno = 37; /* Set the cursor's record number key. */
+ cursor->set_key(cursor, recno);
+ /*! [Set the cursor's record number key] */
}
{
@@ -141,11 +157,17 @@ cursor_ops(WT_SESSION *session)
/*! [Get the cursor's record number key] */
}
+ /* Switch to a composite table. */
+ error_check(session->create(
+ session, "table:composite", "key_format=SiH,value_format=S"));
+ error_check(session->open_cursor(
+ session, "table:recno", NULL, NULL, &cursor));
+
{
- /*! [Set the cursor's record number key] */
- uint64_t recno = 37; /* Set the cursor's record number key. */
- cursor->set_key(cursor, recno);
- /*! [Set the cursor's record number key] */
+ /*! [Set the cursor's composite key] */
+ /* Set the cursor's "SiH" format composite key. */
+ cursor->set_key(cursor, "first", (int32_t)5, (uint16_t)7);
+ /*! [Set the cursor's composite key] */
}
{
@@ -159,10 +181,11 @@ cursor_ops(WT_SESSION *session)
}
{
- /*! [Set the cursor's composite key] */
- /* Set the cursor's "SiH" format composite key. */
- cursor->set_key(cursor, "first", (int32_t)5, (uint16_t)7);
- /*! [Set the cursor's composite key] */
+ /*! [Set the cursor's string value] */
+ /* Set the cursor's string value. */
+ const char *value = "another value";
+ cursor->set_value(cursor, value);
+ /*! [Set the cursor's string value] */
}
{
@@ -173,14 +196,6 @@ cursor_ops(WT_SESSION *session)
}
{
- /*! [Set the cursor's string value] */
- /* Set the cursor's string value. */
- const char *value = "another value";
- cursor->set_value(cursor, value);
- /*! [Set the cursor's string value] */
- }
-
- {
/*! [Get the cursor's raw value] */
WT_ITEM value; /* Get the cursor's raw value. */
error_check(cursor->get_value(cursor, &value));
@@ -196,20 +211,26 @@ cursor_ops(WT_SESSION *session)
/*! [Set the cursor's raw value] */
}
+ error_check(cursor->insert(cursor));
+
/*! [Return the next record] */
error_check(cursor->next(cursor));
/*! [Return the next record] */
- /*! [Return the previous record] */
- error_check(cursor->prev(cursor));
- /*! [Return the previous record] */
-
/*! [Reset the cursor] */
error_check(cursor->reset(cursor));
/*! [Reset the cursor] */
+ /*! [Return the previous record] */
+ error_check(cursor->prev(cursor));
+ /*! [Return the previous record] */
+
{
WT_CURSOR *other = NULL;
+ error_check(
+ session->open_cursor(session, NULL, cursor, NULL, &other));
+
+ {
/*! [Cursor comparison] */
int compare;
error_check(cursor->compare(cursor, other, &compare));
@@ -224,7 +245,6 @@ cursor_ops(WT_SESSION *session)
}
{
- WT_CURSOR *other = NULL;
/*! [Cursor equality] */
int equal;
error_check(cursor->equals(cursor, other, &equal));
@@ -235,17 +255,8 @@ cursor_ops(WT_SESSION *session)
}
/*! [Cursor equality] */
}
-
- {
- /*! [Search for an exact match] */
- const char *key = "some key";
- cursor->set_key(cursor, key);
- error_check(cursor->search(cursor));
- /*! [Search for an exact match] */
}
- cursor_search_near(cursor);
-
{
/*! [Insert a new record or overwrite an existing record] */
/* Insert a new record or overwrite an existing record. */
@@ -259,9 +270,19 @@ cursor_ops(WT_SESSION *session)
}
{
+ /*! [Search for an exact match] */
+ const char *key = "some key";
+ cursor->set_key(cursor, key);
+ error_check(cursor->search(cursor));
+ /*! [Search for an exact match] */
+ }
+
+ cursor_search_near(cursor);
+
+ {
/*! [Insert a new record and fail if the record exists] */
/* Insert a new record and fail if the record exists. */
- const char *key = "some key", *value = "some value";
+ const char *key = "new key", *value = "some value";
error_check(session->open_cursor(
session, "table:mytable", NULL, "overwrite=false", &cursor));
cursor->set_key(cursor, key);
@@ -270,35 +291,52 @@ cursor_ops(WT_SESSION *session)
/*! [Insert a new record and fail if the record exists] */
}
+ error_check(session->open_cursor(
+ session, "table:recno", NULL, "append", &cursor));
+
{
/*! [Insert a new record and assign a record number] */
/* Insert a new record and assign a record number. */
uint64_t recno;
const char *value = "some value";
- error_check(session->open_cursor(
- session, "table:mytable", NULL, "append", &cursor));
cursor->set_value(cursor, value);
error_check(cursor->insert(cursor));
error_check(cursor->get_key(cursor, &recno));
/*! [Insert a new record and assign a record number] */
}
+ error_check(session->open_cursor(
+ session, "table:mytable", NULL, NULL, &cursor));
+
{
/*! [Reserve a record] */
const char *key = "some key";
- error_check(session->open_cursor(
- session, "table:mytable", NULL, NULL, &cursor));
+ error_check(session->begin_transaction(session, NULL));
cursor->set_key(cursor, key);
error_check(cursor->reserve(cursor));
+ error_check(session->commit_transaction(session, NULL));
/*! [Reserve a record] */
}
+ error_check(session->create(
+ session, "table:blob", "key_format=S,value_format=u"));
+ error_check(session->open_cursor(
+ session, "table:blob", NULL, NULL, &cursor));
+ {
+ WT_ITEM value;
+ value.data = "abcdefghijklmnopqrstuvwxyz"
+ "abcdefghijklmnopqrstuvwxyz"
+ "abcdefghijklmnopqrstuvwxyz";
+ value.size = strlen(value.data);
+ cursor->set_key(cursor, "some key");
+ cursor->set_value(cursor, &value);
+ error_check(cursor->insert(cursor));
+ }
+
{
/*! [Modify an existing record] */
WT_MODIFY entries[3];
const char *key = "some key";
- error_check(session->open_cursor(
- session, "table:mytable", NULL, NULL, &cursor));
/* Position the cursor. */
cursor->set_key(cursor, key);
@@ -349,23 +387,23 @@ cursor_ops(WT_SESSION *session)
}
{
- /*! [Remove a record] */
+ /*! [Remove a record and fail if DNE] */
const char *key = "some key";
error_check(session->open_cursor(
- session, "table:mytable", NULL, NULL, &cursor));
+ session, "table:mytable", NULL, "overwrite=false", &cursor));
cursor->set_key(cursor, key);
error_check(cursor->remove(cursor));
- /*! [Remove a record] */
+ /*! [Remove a record and fail if DNE] */
}
{
- /*! [Remove a record and fail if DNE] */
+ /*! [Remove a record] */
const char *key = "some key";
error_check(session->open_cursor(
- session, "table:mytable", NULL, "overwrite=false", &cursor));
+ session, "table:mytable", NULL, NULL, &cursor));
cursor->set_key(cursor, key);
error_check(cursor->remove(cursor));
- /*! [Remove a record and fail if DNE] */
+ /*! [Remove a record] */
}
{
@@ -400,7 +438,7 @@ cursor_ops(WT_SESSION *session)
return (0);
}
-void
+static void
cursor_search_near(WT_CURSOR *cursor)
{
int exact, ret;
@@ -445,9 +483,12 @@ cursor_search_near(WT_CURSOR *cursor)
/*! [Backward scan less than] */
}
-void
+static void
checkpoint_ops(WT_SESSION *session)
{
+ error_check(session->create(session, "table:table1", NULL));
+ error_check(session->create(session, "table:table2", NULL));
+
/*! [Checkpoint examples] */
/* Checkpoint the database. */
error_check(session->checkpoint(session, NULL));
@@ -506,7 +547,7 @@ checkpoint_ops(WT_SESSION *session)
/*! [JSON quoting example] */
}
-void
+static void
cursor_statistics(WT_SESSION *session)
{
WT_CURSOR *cursor;
@@ -538,7 +579,7 @@ cursor_statistics(WT_SESSION *session)
/*! [Statistics cursor clear configuration] */
}
-void
+static void
named_snapshot_ops(WT_SESSION *session)
{
/*! [Snapshot examples] */
@@ -551,11 +592,17 @@ named_snapshot_ops(WT_SESSION *session)
/* Drop all named snapshots */
error_check(session->snapshot(session, "drop=(all)"));
/*! [Snapshot examples] */
+
+ error_check(session->rollback_transaction(session, NULL));
}
-void
+static void
session_ops(WT_SESSION *session)
{
+ WT_CONNECTION *conn;
+
+ conn = session->connection;
+
/*! [Reconfigure a session] */
error_check(session->reconfigure(session, "isolation=snapshot"));
/*! [Reconfigure a session] */
@@ -765,20 +812,47 @@ session_ops(WT_SESSION *session)
error_check(session->verify(session, "table:mytable", NULL));
/*! [Verify a table] */
- /*! [Drop a table] */
- error_check(session->drop(session, "table:mytable", NULL));
- /*! [Drop a table] */
- }
+ /*
+ * We can't call the backup function because it includes absolute paths
+ * for documentation purposes that don't exist on test systems. That
+ * said, we have to reference the function to avoid build warnings
+ * about unused static code.
+ */
+ (void)backup;
+
+ /* Call other functions, where possible. */
+ checkpoint_ops(session);
+ error_check(cursor_ops(session));
+ cursor_statistics(session);
+ named_snapshot_ops(session);
+ pack_ops(session);
+ transaction_ops(session);
/*! [Close a session] */
error_check(session->close(session, NULL));
/*! [Close a session] */
+
+ /*
+ * We close the old session first to close all cursors, open a new one
+ * for the drop.
+ */
+ error_check(conn->open_session(conn, NULL, NULL, &session));
+
+ /*! [Drop a table] */
+ error_check(session->drop(session, "table:mytable", NULL));
+ /*! [Drop a table] */
+ }
}
-void
-transaction_ops(WT_CONNECTION *conn, WT_SESSION *session)
+static void
+transaction_ops(WT_SESSION *session_arg)
{
+ WT_CONNECTION *conn;
WT_CURSOR *cursor;
+ WT_SESSION *session;
+
+ session = session_arg;
+ conn = session->connection;
/*! [transaction commit/rollback] */
/*
@@ -829,7 +903,7 @@ transaction_ops(WT_CONNECTION *conn, WT_SESSION *session)
/*! [session isolation configuration] */
/* Open a session configured for read-uncommitted isolation. */
error_check(conn->open_session(
- conn, NULL, "isolation=read_uncommitted", &session));
+ conn, NULL, "isolation=read-uncommitted", &session));
/*! [session isolation configuration] */
/*! [session isolation re-configuration] */
@@ -837,6 +911,9 @@ transaction_ops(WT_CONNECTION *conn, WT_SESSION *session)
error_check(session->reconfigure(session, "isolation=snapshot"));
/*! [session isolation re-configuration] */
+ error_check(session->close(session, NULL));
+ session = session_arg;
+
{
/*! [transaction pinned range] */
/* Check the transaction ID range pinned by the session handle. */
@@ -846,17 +923,19 @@ transaction_ops(WT_CONNECTION *conn, WT_SESSION *session)
/*! [transaction pinned range] */
}
+ error_check(session->begin_transaction(session, NULL));
+
+#ifdef HAVE_TIMESTAMPS
+ {
+ /*! [query timestamp] */
+ char timestamp_buf[2 * WT_TIMESTAMP_SIZE + 1];
+
/*! [transaction timestamp] */
error_check(
session->timestamp_transaction(session, "commit_timestamp=2a"));
/*! [transaction timestamp] */
- {
-#ifndef WT_TIMESTAMP_SIZE
-#define WT_TIMESTAMP_SIZE 8
-#endif
- /*! [query timestamp] */
- char timestamp_buf[2 * WT_TIMESTAMP_SIZE + 1];
+ error_check(session->commit_transaction(session, NULL));
error_check(conn->query_timestamp(
conn, timestamp_buf, "get=all_committed"));
@@ -871,9 +950,14 @@ transaction_ops(WT_CONNECTION *conn, WT_SESSION *session)
error_check(conn->set_timestamp(conn, "oldest_timestamp=2a"));
/*! [set oldest timestamp] */
+ /*! [set stable timestamp] */
+ error_check(conn->set_timestamp(conn, "stable_timestamp=2a"));
+ /*! [set stable timestamp] */
+
/*! [rollback to stable] */
- error_check(conn->rollback_to_stable(conn, ""));
+ error_check(conn->rollback_to_stable(conn, NULL));
/*! [rollback to stable] */
+#endif
}
/*! [Implement WT_COLLATOR] */
@@ -900,7 +984,7 @@ my_compare(WT_COLLATOR *collator, WT_SESSION *session,
}
/*! [Implement WT_COLLATOR] */
-void
+static void
add_collator(WT_CONNECTION *conn)
{
/*! [WT_COLLATOR register] */
@@ -926,7 +1010,7 @@ my_extract(WT_EXTRACTOR *extractor, WT_SESSION *session,
}
/*! [WT_EXTRACTOR] */
-void
+static void
add_extractor(WT_CONNECTION *conn)
{
/*! [WT_EXTRACTOR register] */
@@ -937,7 +1021,7 @@ add_extractor(WT_CONNECTION *conn)
/*! [WT_EXTRACTOR register] */
}
-void
+static void
connection_ops(WT_CONNECTION *conn)
{
#ifdef MIGHT_NOT_RUN
@@ -1017,7 +1101,7 @@ connection_ops(WT_CONNECTION *conn)
/*! [Close a connection] */
}
-void
+static void
pack_ops(WT_SESSION *session)
{
{
@@ -1047,7 +1131,7 @@ pack_ops(WT_SESSION *session)
}
}
-void
+static void
backup(WT_SESSION *session)
{
char buf[1024];
@@ -1099,7 +1183,8 @@ main(int argc, char *argv[])
/*! [Open a connection] */
error_check(wiredtiger_open(home, NULL,
- "create,cache_size=5GB,log=(enabled,recover=on)", &conn));
+ "create,cache_size=5GB,log=(enabled,recover=on),statistics=(all)",
+ &conn));
/*! [Open a connection] */
connection_ops(conn);
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 4d1ae59f448..c0f667140d0 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "698847557ce7b3a938bbc8334d64a9430e4dc786",
+ "commit": "b055251678e6b4fcc74a1f651432aadbfeecc0e4",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-3.6"
diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c
index c20a294c07b..98cc10a6de1 100644
--- a/src/third_party/wiredtiger/src/block/block_ckpt.c
+++ b/src/third_party/wiredtiger/src/block/block_ckpt.c
@@ -342,6 +342,11 @@ __ckpt_verify(WT_SESSION_IMPL *session, WT_CKPT *ckptbase)
break;
/* FALLTHROUGH */
default:
+ /*
+ * Don't convert to WT_ILLEGAL_VALUE, it won't compile
+ * on some gcc compilers because they don't understand
+ * FALLTHROUGH as part of a macro.
+ */
return (
__wt_illegal_value(session, "checkpoint array"));
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index d58dc78fbed..6e1ab526e52 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -1197,8 +1197,7 @@ __cursor_chain_exceeded(WT_CURSOR_BTREE *cbt)
upd = page->modify->mod_row_update[cbt->slot];
for (i = 0; upd != NULL; ++i, upd = upd->next) {
- if (upd->type == WT_UPDATE_DELETED ||
- upd->type == WT_UPDATE_STANDARD)
+ if (WT_UPDATE_DATA_VALUE(upd))
return (false);
if (i >= WT_MAX_MODIFY_UPDATE)
return (true);
@@ -1219,7 +1218,7 @@ __wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentries)
WT_DECL_RET;
WT_SESSION_IMPL *session;
size_t orig, new;
- bool chain_exceeded, overwrite;
+ bool overwrite;
cursor = &cbt->iface;
session = (WT_SESSION_IMPL *)cursor->session;
@@ -1259,13 +1258,13 @@ __wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentries)
/*
* WT_CURSOR.modify is update-without-overwrite.
*
- * Use the modify buffer as the update if under the limit, else use the
- * complete value.
+ * Use the modify buffer as the update if the data package saves us some
+ * memory and the update chain is under the limit, else use the complete
+ * value.
*/
overwrite = F_ISSET(cursor, WT_CURSTD_OVERWRITE);
F_CLR(cursor, WT_CURSTD_OVERWRITE);
- chain_exceeded = __cursor_chain_exceeded(cbt);
- if (chain_exceeded)
+ if (cursor->value.size <= 64 || __cursor_chain_exceeded(cbt))
ret = __btcur_update(cbt, &cursor->value, WT_UPDATE_STANDARD);
else if ((ret =
__wt_modify_pack(session, &modify, entries, nentries)) == 0)
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index b8d11be7b3e..d91ac027738 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -39,7 +39,6 @@ static int __debug_col_skip(WT_DBG *, WT_INSERT_HEAD *, const char *, bool);
static int __debug_config(WT_SESSION_IMPL *, WT_DBG *, const char *);
static int __debug_dsk_cell(WT_DBG *, const WT_PAGE_HEADER *);
static int __debug_dsk_col_fix(WT_DBG *, const WT_PAGE_HEADER *);
-static int __debug_item(WT_DBG *, const char *, const void *, size_t);
static int __debug_page(WT_DBG *, WT_REF *, uint32_t);
static int __debug_page_col_fix(WT_DBG *, WT_REF *);
static int __debug_page_col_int(WT_DBG *, WT_PAGE *, uint32_t);
@@ -81,6 +80,41 @@ __debug_hex_byte(WT_DBG *ds, uint8_t v)
}
/*
+ * __debug_bytes --
+ * Dump a single set of bytes.
+ */
+static int
+__debug_bytes(WT_DBG *ds, const void *data_arg, size_t size)
+{
+ size_t i;
+ u_char ch;
+ const uint8_t *data;
+
+ for (data = data_arg, i = 0; i < size; ++i, ++data) {
+ ch = data[0];
+ if (__wt_isprint(ch))
+ WT_RET(ds->f(ds, "%c", (int)ch));
+ else
+ WT_RET(__debug_hex_byte(ds, data[0]));
+ }
+ return (0);
+}
+
+/*
+ * __debug_item --
+ * Dump a single data/size pair, with an optional tag.
+ */
+static int
+__debug_item(WT_DBG *ds, const char *tag, const void *data_arg, size_t size)
+{
+ WT_RET(ds->f(ds,
+ "\t%s%s{", tag == NULL ? "" : tag, tag == NULL ? "" : " "));
+ WT_RET(__debug_bytes(ds, data_arg, size));
+ WT_RET(ds->f(ds, "}\n"));
+ return (0);
+}
+
+/*
* __dmsg_event --
* Send a debug message to the event handler.
*/
@@ -993,23 +1027,26 @@ static int
__debug_modified(WT_DBG *ds, WT_UPDATE *upd)
{
const size_t *p;
- int nentries;
+ size_t nentries, data_size, offset, size;
const uint8_t *data;
- void *modify;
-
- modify = upd->data;
- p = modify;
- nentries = (int)*p++;
- data = (uint8_t *)modify +
+ p = (size_t *)upd->data;
+ memcpy(&nentries, p++, sizeof(size_t));
+ data = upd->data +
sizeof(size_t) + ((size_t)nentries * 3 * sizeof(size_t));
- WT_RET(ds->f(ds, "%d: ", nentries));
- for (; nentries-- > 0; data += p[0], p += 3)
+ WT_RET(ds->f(ds, "%" WT_SIZET_FMT ": ", nentries));
+ for (; nentries-- > 0; data += data_size) {
+ memcpy(&data_size, p++, sizeof(size_t));
+ memcpy(&offset, p++, sizeof(size_t));
+ memcpy(&size, p++, sizeof(size_t));
WT_RET(ds->f(ds,
"{%" WT_SIZET_FMT ", %" WT_SIZET_FMT ", %" WT_SIZET_FMT
- ", %.*s}%s", p[0], p[1], p[2],
- (int)p[2], data, nentries == 0 ? "" : ", "));
+ ", ",
+ data_size, offset, size));
+ WT_RET(__debug_bytes(ds, data, data_size));
+ WT_RET(ds->f(ds, "}%s", nentries == 0 ? "" : ", "));
+ }
return (0);
}
@@ -1052,17 +1089,10 @@ __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte)
#ifdef HAVE_TIMESTAMPS
if (!__wt_timestamp_iszero(
WT_TIMESTAMP_NULL(&upd->timestamp))) {
-#if WT_TIMESTAMP_SIZE == 8
- WT_RET(ds->f(ds,
- ", stamp %" PRIu64, upd->timestamp.val));
-#else
- int i;
-
- WT_RET(ds->f(ds, ", stamp 0x"));
- for (i = 0; i < WT_TIMESTAMP_SIZE; ++i)
- WT_RET(ds->f(ds,
- "%" PRIx8, upd->timestamp.ts[i]));
-#endif
+ char hex_timestamp[2 * WT_TIMESTAMP_SIZE + 1];
+ WT_RET(__wt_timestamp_to_hex_string(
+ ds->session, hex_timestamp, &upd->timestamp));
+ WT_RET(ds->f(ds, ", stamp %s", hex_timestamp));
}
#endif
WT_RET(ds->f(ds, "\n"));
@@ -1250,28 +1280,4 @@ __debug_cell_data(WT_DBG *ds,
return (ret);
}
-
-/*
- * __debug_item --
- * Dump a single data/size pair, with an optional tag.
- */
-static int
-__debug_item(WT_DBG *ds, const char *tag, const void *data_arg, size_t size)
-{
- size_t i;
- u_char ch;
- const uint8_t *data;
-
- WT_RET(ds->f(ds,
- "\t%s%s{", tag == NULL ? "" : tag, tag == NULL ? "" : " "));
- for (data = data_arg, i = 0; i < size; ++i, ++data) {
- ch = data[0];
- if (__wt_isprint(ch))
- WT_RET(ds->f(ds, "%c", (int)ch));
- else
- WT_RET(__debug_hex_byte(ds, data[0]));
- }
- WT_RET(ds->f(ds, "}\n"));
- return (0);
-}
#endif
diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c
index bc9356e2669..806a9770057 100644
--- a/src/third_party/wiredtiger/src/btree/bt_discard.c
+++ b/src/third_party/wiredtiger/src/btree/bt_discard.c
@@ -249,9 +249,8 @@ __free_page_modify(WT_SESSION_IMPL *session, WT_PAGE *page)
/* Free the overflow on-page, reuse and transaction-cache skiplists. */
__wt_ovfl_reuse_free(session, page);
- if (mod->ovfl_track != NULL)
- __wt_free(session, mod->ovfl_track->remove);
__wt_ovfl_discard_free(session, page);
+ __wt_ovfl_discard_remove(session, page);
__wt_free(session, page->modify->ovfl_track);
__wt_spin_destroy(session, &page->modify->page_lock);
diff --git a/src/third_party/wiredtiger/src/btree/bt_ovfl.c b/src/third_party/wiredtiger/src/btree/bt_ovfl.c
index f933245eaef..fab38f3cc8d 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ovfl.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ovfl.c
@@ -49,7 +49,6 @@ __wt_ovfl_read(WT_SESSION_IMPL *session,
{
WT_DECL_RET;
WT_OVFL_TRACK *track;
- WT_UPDATE *upd;
size_t i;
*decoded = false;
@@ -74,14 +73,13 @@ __wt_ovfl_read(WT_SESSION_IMPL *session,
__wt_readlock(session, &S2BT(session)->ovfl_lock);
if (__wt_cell_type_raw(unpack->cell) == WT_CELL_VALUE_OVFL_RM) {
track = page->modify->ovfl_track;
- for (upd = NULL, i = 0; i < track->remove_next; ++i)
+ for (i = 0; i < track->remove_next; ++i)
if (track->remove[i].cell == unpack->cell) {
- upd = track->remove[i].upd;
+ store->data = track->remove[i].data;
+ store->size = track->remove[i].size;
break;
}
WT_ASSERT(session, i < track->remove_next);
- store->data = upd->data;
- store->size = upd->size;
*decoded = true;
} else
ret = __ovfl_read(session, unpack->data, unpack->size, store);
@@ -91,134 +89,56 @@ __wt_ovfl_read(WT_SESSION_IMPL *session,
}
/*
- * __ovfl_cache_col_visible --
- * column-store: check for a globally visible update.
+ * __wt_ovfl_discard_remove --
+ * Free the on-page overflow value cache.
*/
-static bool
-__ovfl_cache_col_visible(
- WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_CELL_UNPACK *unpack)
+void
+__wt_ovfl_discard_remove(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- /*
- * Column-store is harder than row_store: we're here because there's a
- * reader in the system that might read the original version of an
- * overflow record, which might match a number of records. For example,
- * the original overflow value was for records 100-200, we've replaced
- * each of those records individually, but there exists a reader that
- * might read any one of those records, and all of those records have
- * different update entries with different transaction IDs. Since it's
- * infeasible to determine if there's a globally visible update for each
- * reader for each record, we test the simple case where a single record
- * has a single, globally visible update. If that's not the case, cache
- * the value.
- */
- if (__wt_cell_rle(unpack) == 1 &&
- WT_UPDATE_DATA_VALUE(upd) && __wt_txn_upd_visible_all(session, upd))
- return (true);
- return (false);
-}
-
-/*
- * __ovfl_cache_row_visible --
- * row-store: check for a globally visible update.
- */
-static bool
-__ovfl_cache_row_visible(WT_SESSION_IMPL *session, WT_UPDATE *upd)
-{
- /* Check to see if there's a globally visible update. */
- for (; upd != NULL; upd = upd->next)
- if (WT_UPDATE_DATA_VALUE(upd) &&
- __wt_txn_upd_visible_all(session, upd))
- return (true);
-
- return (false);
+ WT_OVFL_TRACK *track;
+ uint32_t i;
+
+ if (page->modify != NULL &&
+ (track = page->modify->ovfl_track) != NULL) {
+ for (i = 0; i < track->remove_next; ++i)
+ __wt_free(session, track->remove[i].data);
+ __wt_free(session, page->modify->ovfl_track->remove);
+ track->remove_allocated = 0;
+ track->remove_next = 0;
+ }
}
/*
- * __ovfl_cache_append_update --
- * Append an overflow value to the update list.
+ * __ovfl_cache --
+ * Cache an overflow value.
*/
static int
-__ovfl_cache_append_update(WT_SESSION_IMPL *session, WT_PAGE *page,
- WT_UPDATE *upd_list, WT_CELL_UNPACK *unpack, WT_UPDATE **updp)
+__ovfl_cache(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack)
{
WT_DECL_ITEM(tmp);
WT_DECL_RET;
- WT_UPDATE *append, *upd;
- size_t size;
-
- *updp = NULL;
+ WT_OVFL_TRACK *track;
/* Read the overflow value. */
WT_RET(__wt_scr_alloc(session, 1024, &tmp));
WT_ERR(__wt_dsk_cell_data_ref(session, page->type, unpack, tmp));
- /*
- * Create an update entry with no transaction ID to ensure global
- * visibility, append it to the update list.
- *
- * We don't need locks or barriers in this function: any thread reading
- * the update list will see our newly appended record or not, it doesn't
- * matter until the on-page cell is set to WT_CELL_VALUE_OVFL_RM. That
- * involves atomic operations which will act as our barrier. Regardless,
- * we update the page footprint as part of this operation, which acts as
- * a barrier as well.
- *
- * The update transaction ID choice is tricky, to work around an issue
- * in variable-length column store. Imagine an overflow value with an
- * RLE greater than 1. We append a copy to the end of an update chain,
- * but it's possible it's the overflow value for more than one record,
- * and appending it to the end of one record's update chain means a
- * subsequent enter of a globally visible value to one of the records
- * would allow the truncation of the overflow chain that leaves other
- * records without a value. If appending such an overflow record, set
- * the transaction ID to the first possible transaction ID. That ID is
- * old enough to be globally visible, but we can use it as a flag if an
- * update record cannot be discarded when truncating an update chain.
- */
- WT_ERR(__wt_update_alloc(
- session, tmp, &append, &size, WT_UPDATE_STANDARD));
- append->txnid = page->type == WT_PAGE_COL_VAR &&
- __wt_cell_rle(unpack) > 1 ? WT_TXN_FIRST : WT_TXN_NONE;
- for (upd = upd_list; upd->next != NULL; upd = upd->next)
- ;
- WT_PUBLISH(upd->next, append);
-
- __wt_cache_page_inmem_incr(session, page, size);
-
- *updp = append;
-
-err: __wt_scr_free(session, &tmp);
- return (ret);
-}
-
-/*
- * __ovfl_cache --
- * Cache an overflow value.
- */
-static int
-__ovfl_cache(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_UPDATE *upd_list, WT_CELL_UNPACK *unpack)
-{
- WT_OVFL_TRACK *track;
- WT_UPDATE *upd;
-
- /* Append a copy of the overflow value to the update list. */
- WT_RET(__ovfl_cache_append_update(
- session, page, upd_list, unpack, &upd));
-
/* Allocating tracking structures as necessary. */
if (page->modify->ovfl_track == NULL)
- WT_RET(__wt_ovfl_track_init(session, page));
+ WT_ERR(__wt_ovfl_track_init(session, page));
track = page->modify->ovfl_track;
- /* Add the value's information to the update list. */
- WT_RET(__wt_realloc_def(session,
+ /* Copy the overflow item into place. */
+ WT_ERR(__wt_realloc_def(session,
&track->remove_allocated, track->remove_next + 1, &track->remove));
track->remove[track->remove_next].cell = unpack->cell;
- track->remove[track->remove_next].upd = upd;
+ WT_ERR(__wt_strndup(session,
+ tmp->data, tmp->size, &track->remove[track->remove_next].data));
+ track->remove[track->remove_next].size = tmp->size;
++track->remove_next;
- return (0);
+err: __wt_scr_free(session, &tmp);
+ return (ret);
}
/*
@@ -227,12 +147,14 @@ __ovfl_cache(WT_SESSION_IMPL *session,
*/
int
__wt_ovfl_remove(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_UPDATE *upd_list, WT_CELL_UNPACK *unpack)
+ WT_PAGE *page, WT_CELL_UNPACK *unpack, bool checkpoint)
{
- bool visible;
-
/*
- * This function solves a problem in reconciliation. The scenario is:
+ * This function solves two problems in reconciliation.
+ *
+ * The first problem is snapshot readers needing on-page overflow values
+ * that have been removed. The scenario is as follows:
+ *
* - reconciling a leaf page that references an overflow item
* - the item is updated and the update committed
* - a checkpoint runs, freeing the backing overflow blocks
@@ -263,28 +185,16 @@ __wt_ovfl_remove(WT_SESSION_IMPL *session,
* per overflow item. We don't do any of that because overflow values
* are supposed to be rare and we shouldn't see contention for the lock.
*
- * Check for a globally visible update. If there is a globally visible
- * update, we don't need to cache the item because it's not possible for
- * a running thread to have moved past it.
- */
- switch (page->type) {
- case WT_PAGE_COL_VAR:
- visible = __ovfl_cache_col_visible(session, upd_list, unpack);
- break;
- case WT_PAGE_ROW_LEAF:
- visible = __ovfl_cache_row_visible(session, upd_list);
- break;
- WT_ILLEGAL_VALUE(session);
- }
-
- /*
- * If there's no globally visible update, there's a reader in the system
- * that might try and read the old value, cache it.
+ * We only have to do this for checkpoints: in any eviction mode, there
+ * can't be threads sitting in our update lists.
*/
- if (!visible)
- WT_RET(__ovfl_cache(session, page, upd_list, unpack));
+ if (checkpoint)
+ WT_RET(__ovfl_cache(session, page, unpack));
/*
+ * The second problem is to only remove the underlying blocks once,
+ * solved by the WT_CELL_VALUE_OVFL_RM flag.
+ *
* Queue the on-page cell to be set to WT_CELL_VALUE_OVFL_RM and the
* underlying overflow value's blocks to be freed when reconciliation
* completes.
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index 49b12b2d4e9..0c3cb026421 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -96,7 +96,7 @@ __col_instantiate(WT_SESSION_IMPL *session,
/* Search the page and add updates. */
WT_RET(__wt_col_search(session, recno, ref, cbt));
WT_RET(__wt_col_modify(
- session, cbt, recno, NULL, updlist, updlist->type, false));
+ session, cbt, recno, NULL, updlist, WT_UPDATE_INVALID, false));
return (0);
}
@@ -121,7 +121,7 @@ __row_instantiate(WT_SESSION_IMPL *session,
/* Search the page and add updates. */
WT_RET(__wt_row_search(session, key, ref, cbt, true));
WT_RET(__wt_row_modify(
- session, cbt, key, NULL, updlist, updlist->type, false));
+ session, cbt, key, NULL, updlist, WT_UPDATE_INVALID, false));
return (0);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index a0db4457f62..ac90d6693d3 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -1446,8 +1446,8 @@ __split_multi_inmem(
WT_ERR(__wt_col_search(session, recno, ref, &cbt));
/* Apply the modification. */
- WT_ERR(__wt_col_modify(
- session, &cbt, recno, NULL, upd, upd->type, true));
+ WT_ERR(__wt_col_modify(session, &cbt,
+ recno, NULL, upd, WT_UPDATE_INVALID, true));
break;
case WT_PAGE_ROW_LEAF:
/* Build a key. */
@@ -1468,8 +1468,8 @@ __split_multi_inmem(
WT_ERR(__wt_row_search(session, key, ref, &cbt, true));
/* Apply the modification. */
- WT_ERR(__wt_row_modify(
- session, &cbt, key, NULL, upd, upd->type, true));
+ WT_ERR(__wt_row_modify(session,
+ &cbt, key, NULL, upd, WT_UPDATE_INVALID, true));
break;
WT_ILLEGAL_VALUE_ERR(session);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index 3fdafcebfb9..261c0fc1937 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -634,8 +634,7 @@ err: WT_LEAVE_PAGE_INDEX(session);
int
__wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags)
{
- return (__tree_walk_internal(
- session, refp, NULL, NULL, NULL, flags));
+ return (__tree_walk_internal(session, refp, NULL, NULL, NULL, flags));
}
/*
@@ -661,8 +660,8 @@ __wt_tree_walk_custom_skip(
int (*skip_func)(WT_SESSION_IMPL *, WT_REF *, void *, bool *),
void *func_cookie, uint32_t flags)
{
- return (__tree_walk_internal(session, refp,
- NULL, skip_func, func_cookie, flags));
+ return (__tree_walk_internal(
+ session, refp, NULL, skip_func, func_cookie, flags));
}
/*
diff --git a/src/third_party/wiredtiger/src/btree/row_modify.c b/src/third_party/wiredtiger/src/btree/row_modify.c
index 6e610b86376..5e84899999a 100644
--- a/src/third_party/wiredtiger/src/btree/row_modify.c
+++ b/src/third_party/wiredtiger/src/btree/row_modify.c
@@ -263,6 +263,13 @@ __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value,
*updp = NULL;
/*
+ * The code paths leading here are convoluted: assert we never attempt
+ * to allocate an update structure if only intending to insert one we
+ * already have.
+ */
+ WT_ASSERT(session, modify_type != WT_UPDATE_INVALID);
+
+ /*
* Allocate the WT_UPDATE structure and room for the value, then copy
* the value into place.
*/
@@ -304,14 +311,11 @@ __wt_update_obsolete_check(
* Walk the list of updates, looking for obsolete updates at the end.
*
* Only updates with globally visible, self-contained data can terminate
- * update chains, ignore modified and reserved updates. Special case the
- * first transaction ID, it flags column-store overflow values which can
- * never be discarded.
+ * update chains.
*/
for (first = NULL, count = 0; upd != NULL; upd = upd->next, count++)
if (WT_UPDATE_DATA_VALUE(upd) &&
- __wt_txn_upd_visible_all(session, upd) &&
- upd->txnid != WT_TXN_FIRST) {
+ __wt_txn_upd_visible_all(session, upd)) {
if (first == NULL)
first = upd;
} else if (upd->txnid != WT_TXN_ABORTED)
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index c53a63ccb25..764006b024d 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -172,8 +172,8 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
"\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
"\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\","
"\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"transaction\",\"verify\",\"version\","
- "\"write\"]",
+ "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
+ "\"version\",\"write\"]",
NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
};
@@ -225,7 +225,6 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_checkpoint[] = {
{ "drop", "list", NULL, NULL, NULL, 0 },
{ "force", "boolean", NULL, NULL, NULL, 0 },
{ "name", "string", NULL, NULL, NULL, 0 },
- { "read_timestamp", "string", NULL, NULL, NULL, 0 },
{ "target", "list", NULL, NULL, NULL, 0 },
{ "use_timestamp", "boolean", NULL, NULL, NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
@@ -802,8 +801,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
"\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
"\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\","
"\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"transaction\",\"verify\",\"version\","
- "\"write\"]",
+ "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
+ "\"version\",\"write\"]",
NULL, 0 },
{ "write_through", "list",
NULL, "choices=[\"data\",\"log\"]",
@@ -897,8 +896,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
"\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
"\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\","
"\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"transaction\",\"verify\",\"version\","
- "\"write\"]",
+ "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
+ "\"version\",\"write\"]",
NULL, 0 },
{ "version", "string", NULL, NULL, NULL, 0 },
{ "write_through", "list",
@@ -987,8 +986,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
"\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
"\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\","
"\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"transaction\",\"verify\",\"version\","
- "\"write\"]",
+ "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
+ "\"version\",\"write\"]",
NULL, 0 },
{ "version", "string", NULL, NULL, NULL, 0 },
{ "write_through", "list",
@@ -1077,8 +1076,8 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
"\"lsm_manager\",\"metadata\",\"mutex\",\"overflow\",\"read\","
"\"rebalance\",\"reconcile\",\"recovery\",\"recovery_progress\","
"\"salvage\",\"shared_cache\",\"split\",\"temporary\","
- "\"thread_group\",\"transaction\",\"verify\",\"version\","
- "\"write\"]",
+ "\"thread_group\",\"timestamp\",\"transaction\",\"verify\","
+ "\"version\",\"write\"]",
NULL, 0 },
{ "write_through", "list",
NULL, "choices=[\"data\",\"log\"]",
@@ -1173,9 +1172,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
confchk_WT_SESSION_begin_transaction, 6
},
{ "WT_SESSION.checkpoint",
- "drop=,force=false,name=,read_timestamp=,target=,"
- "use_timestamp=true",
- confchk_WT_SESSION_checkpoint, 6
+ "drop=,force=false,name=,target=,use_timestamp=true",
+ confchk_WT_SESSION_checkpoint, 5
},
{ "WT_SESSION.close",
"",
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index df71ddf18f6..b29b6184ce3 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -8,8 +8,6 @@
#include "wt_internal.h"
-static int __conn_statistics_config(WT_SESSION_IMPL *, const char *[]);
-
/*
* ext_collate --
* Call the collation function (external API version).
@@ -190,45 +188,6 @@ __wt_conn_remove_collator(WT_SESSION_IMPL *session)
}
/*
- * __conn_compat_config --
- * Configure compatibility version.
- */
-static int
-__conn_compat_config(WT_SESSION_IMPL *session, const char **cfg)
-{
- WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- uint16_t patch;
-
- conn = S2C(session);
- WT_RET(__wt_config_gets(session, cfg,
- "compatibility.release", &cval));
- if (cval.len != 0) {
- /*
- * Accept either a major.minor release string or a
- * major.minor.patch release string. We ignore the patch
- * value, but allow it in the string.
- */
- if (sscanf(cval.str, "%" SCNu16 ".%" SCNu16,
- &conn->compat_major, &conn->compat_minor) != 2 &&
- sscanf(cval.str, "%" SCNu16 ".%" SCNu16 ".%" SCNu16,
- &conn->compat_major, &conn->compat_minor, &patch) != 3)
- WT_RET_MSG(session,
- EINVAL, "illegal compatibility release");
- if (conn->compat_major > WIREDTIGER_VERSION_MAJOR)
- WT_RET_MSG(session, EINVAL, "unknown major version");
- if (conn->compat_major == WIREDTIGER_VERSION_MAJOR &&
- conn->compat_minor > WIREDTIGER_VERSION_MINOR)
- WT_RET_MSG(session,
- EINVAL, "illegal compatibility version");
- } else {
- conn->compat_major = WIREDTIGER_VERSION_MAJOR;
- conn->compat_minor = WIREDTIGER_VERSION_MINOR;
- }
- return (0);
-}
-
-/*
* __compressor_confchk --
* Validate the compressor.
*/
@@ -1143,57 +1102,12 @@ __conn_reconfigure(WT_CONNECTION *wt_conn, const char *config)
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_SESSION_IMPL *session;
- const char *p;
- bool locked;
conn = (WT_CONNECTION_IMPL *)wt_conn;
- locked = false;
CONNECTION_API_CALL(conn, session, reconfigure, config, cfg);
-
- /* Serialize reconfiguration. */
- __wt_spin_lock(session, &conn->reconfig_lock);
- locked = true;
-
- /*
- * The configuration argument has been checked for validity, update the
- * previous connection configuration.
- *
- * DO NOT merge the configuration before the reconfigure calls. Some
- * of the underlying reconfiguration functions do explicit checks with
- * the second element of the configuration array, knowing the defaults
- * are in slot #1 and the application's modifications are in slot #2.
- *
- * First, replace the base configuration set up by CONNECTION_API_CALL
- * with the current connection configuration, otherwise reconfiguration
- * functions will find the base value instead of previously configured
- * value.
- */
- cfg[0] = conn->cfg;
- cfg[1] = config;
-
- /* Second, reconfigure the system. */
- WT_ERR(__conn_compat_config(session, cfg));
- WT_ERR(__conn_statistics_config(session, cfg));
- WT_ERR(__wt_async_reconfig(session, cfg));
- WT_ERR(__wt_cache_config(session, true, cfg));
- WT_ERR(__wt_checkpoint_server_create(session, cfg));
- WT_ERR(__wt_logmgr_reconfig(session, cfg));
- WT_ERR(__wt_lsm_manager_reconfig(session, cfg));
- WT_ERR(__wt_statlog_create(session, cfg));
- WT_ERR(__wt_sweep_config(session, cfg));
- WT_ERR(__wt_verbose_config(session, cfg));
- WT_ERR(__wt_timing_stress_config(session, cfg));
-
- /* Third, merge everything together, creating a new connection state. */
- WT_ERR(__wt_config_merge(session, cfg, NULL, &p));
- __wt_free(session, conn->cfg);
- conn->cfg = p;
-
-err: if (locked)
- __wt_spin_unlock(session, &conn->reconfig_lock);
-
- API_END_RET(session, ret);
+ ret = __wt_conn_reconfig(session, cfg);
+err: API_END_RET(session, ret);
}
/*
@@ -1274,8 +1188,7 @@ __conn_rollback_to_stable(WT_CONNECTION *wt_conn, const char *config)
conn = (WT_CONNECTION_IMPL *)wt_conn;
- CONNECTION_API_CALL(
- conn, session, rollback_to_stable, config, cfg);
+ CONNECTION_API_CALL(conn, session, rollback_to_stable, config, cfg);
WT_TRET(__wt_txn_rollback_to_stable(session, cfg));
err: API_END_RET(session, ret);
}
@@ -1788,94 +1701,6 @@ err: /*
return (ret);
}
-/*
- * __conn_statistics_config --
- * Set statistics configuration.
- */
-static int
-__conn_statistics_config(WT_SESSION_IMPL *session, const char *cfg[])
-{
- WT_CONFIG_ITEM cval, sval;
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- uint32_t flags;
- int set;
-
- conn = S2C(session);
-
- WT_RET(__wt_config_gets(session, cfg, "statistics", &cval));
-
- flags = 0;
- set = 0;
- if ((ret = __wt_config_subgets(
- session, &cval, "none", &sval)) == 0 && sval.val != 0) {
- flags = 0;
- ++set;
- }
- WT_RET_NOTFOUND_OK(ret);
-
- if ((ret = __wt_config_subgets(
- session, &cval, "fast", &sval)) == 0 && sval.val != 0) {
- LF_SET(WT_STAT_TYPE_FAST);
- ++set;
- }
- WT_RET_NOTFOUND_OK(ret);
-
- if ((ret = __wt_config_subgets(
- session, &cval, "all", &sval)) == 0 && sval.val != 0) {
- LF_SET(
- WT_STAT_TYPE_ALL | WT_STAT_TYPE_CACHE_WALK |
- WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
- ++set;
- }
- WT_RET_NOTFOUND_OK(ret);
-
- if (set > 1)
- WT_RET_MSG(session, EINVAL,
- "Only one of all, fast, none configuration values should "
- "be specified");
-
- /*
- * Now that we've parsed general statistics categories, process
- * sub-categories.
- */
- if ((ret = __wt_config_subgets(
- session, &cval, "cache_walk", &sval)) == 0 && sval.val != 0)
- /*
- * Configuring cache walk statistics implies fast statistics.
- * Keep that knowledge internal for now - it may change in the
- * future.
- */
- LF_SET(WT_STAT_TYPE_FAST | WT_STAT_TYPE_CACHE_WALK);
- WT_RET_NOTFOUND_OK(ret);
-
- if ((ret = __wt_config_subgets(
- session, &cval, "tree_walk", &sval)) == 0 && sval.val != 0)
- /*
- * Configuring tree walk statistics implies fast statistics.
- * Keep that knowledge internal for now - it may change in the
- * future.
- */
- LF_SET(WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
- WT_RET_NOTFOUND_OK(ret);
-
- if ((ret = __wt_config_subgets(
- session, &cval, "clear", &sval)) == 0 && sval.val != 0) {
- if (!LF_ISSET(WT_STAT_TYPE_ALL | WT_STAT_TYPE_CACHE_WALK |
- WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK))
- WT_RET_MSG(session, EINVAL,
- "the value \"clear\" can only be specified if "
- "statistics are enabled");
- LF_SET(WT_STAT_CLEAR);
- }
- WT_RET_NOTFOUND_OK(ret);
-
- /* Configuring statistics clears any existing values. */
- conn->stat_flags = flags;
-
- return (0);
-}
-
/* Simple structure for name and flag configuration searches. */
typedef struct {
const char *name;
@@ -1916,6 +1741,7 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
{ "split", WT_VERB_SPLIT },
{ "temporary", WT_VERB_TEMPORARY },
{ "thread_group", WT_VERB_THREAD_GROUP },
+ { "timestamp", WT_VERB_TIMESTAMP },
{ "transaction", WT_VERB_TRANSACTION },
{ "verify", WT_VERB_VERIFY },
{ "version", WT_VERB_VERSION },
@@ -2344,7 +2170,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
/*
* Set compatibility versions early so that any subsystem sees it.
*/
- WT_ERR(__conn_compat_config(session, cfg));
+ WT_ERR(__wt_conn_compat_config(session, cfg));
/*
* If the application didn't configure its own file system, configure
@@ -2531,7 +2357,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
WT_ERR(__wt_config_gets(session, cfg, "mmap", &cval));
conn->mmap = cval.val != 0;
- WT_ERR(__conn_statistics_config(session, cfg));
+ WT_ERR(__wt_conn_statistics_config(session, cfg));
WT_ERR(__wt_lsm_manager_config(session, cfg));
WT_ERR(__wt_sweep_config(session, cfg));
diff --git a/src/third_party/wiredtiger/src/conn/conn_reconfig.c b/src/third_party/wiredtiger/src/conn/conn_reconfig.c
new file mode 100644
index 00000000000..e67f2c9a18d
--- /dev/null
+++ b/src/third_party/wiredtiger/src/conn/conn_reconfig.c
@@ -0,0 +1,210 @@
+/*-
+ * Copyright (c) 2014-2017 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __wt_conn_compat_config --
+ * Configure compatibility version.
+ */
+int
+__wt_conn_compat_config(WT_SESSION_IMPL *session, const char **cfg)
+{
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ uint16_t patch;
+ bool txn_active;
+
+ conn = S2C(session);
+ WT_RET(__wt_config_gets(session, cfg,
+ "compatibility.release", &cval));
+ if (cval.len == 0) {
+ conn->compat_major = WIREDTIGER_VERSION_MAJOR;
+ conn->compat_minor = WIREDTIGER_VERSION_MINOR;
+ return (0);
+ }
+
+ /*
+ * Accept either a major.minor release string or a
+ * major.minor.patch release string. We ignore the patch
+ * value, but allow it in the string.
+ */
+ if (sscanf(cval.str, "%" SCNu16 ".%" SCNu16,
+ &conn->compat_major, &conn->compat_minor) != 2 &&
+ sscanf(cval.str, "%" SCNu16 ".%" SCNu16 ".%" SCNu16,
+ &conn->compat_major, &conn->compat_minor, &patch) != 3)
+ WT_RET_MSG(session, EINVAL, "illegal compatibility release");
+ if (conn->compat_major > WIREDTIGER_VERSION_MAJOR)
+ WT_RET_MSG(session, EINVAL, "unknown major version");
+ if (conn->compat_major == WIREDTIGER_VERSION_MAJOR &&
+ conn->compat_minor > WIREDTIGER_VERSION_MINOR)
+ WT_RET_MSG(session, EINVAL, "illegal compatibility version");
+
+ /*
+ * We're doing an upgrade or downgrade, check whether transactions are
+ * active.
+ */
+ WT_RET(__wt_txn_activity_check(session, &txn_active));
+ if (txn_active)
+ WT_RET_MSG(session, ENOTSUP,
+ "upgrade / downgrade must run single-threaded");
+ return (0);
+}
+
+/*
+ * __wt_conn_statistics_config --
+ * Set statistics configuration.
+ */
+int
+__wt_conn_statistics_config(WT_SESSION_IMPL *session, const char *cfg[])
+{
+ WT_CONFIG_ITEM cval, sval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ uint32_t flags;
+ int set;
+
+ conn = S2C(session);
+
+ WT_RET(__wt_config_gets(session, cfg, "statistics", &cval));
+
+ flags = 0;
+ set = 0;
+ if ((ret = __wt_config_subgets(
+ session, &cval, "none", &sval)) == 0 && sval.val != 0) {
+ flags = 0;
+ ++set;
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ if ((ret = __wt_config_subgets(
+ session, &cval, "fast", &sval)) == 0 && sval.val != 0) {
+ LF_SET(WT_STAT_TYPE_FAST);
+ ++set;
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ if ((ret = __wt_config_subgets(
+ session, &cval, "all", &sval)) == 0 && sval.val != 0) {
+ LF_SET(
+ WT_STAT_TYPE_ALL | WT_STAT_TYPE_CACHE_WALK |
+ WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
+ ++set;
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ if (set > 1)
+ WT_RET_MSG(session, EINVAL,
+ "Only one of all, fast, none configuration values should "
+ "be specified");
+
+ /*
+ * Now that we've parsed general statistics categories, process
+ * sub-categories.
+ */
+ if ((ret = __wt_config_subgets(
+ session, &cval, "cache_walk", &sval)) == 0 && sval.val != 0)
+ /*
+ * Configuring cache walk statistics implies fast statistics.
+ * Keep that knowledge internal for now - it may change in the
+ * future.
+ */
+ LF_SET(WT_STAT_TYPE_FAST | WT_STAT_TYPE_CACHE_WALK);
+ WT_RET_NOTFOUND_OK(ret);
+
+ if ((ret = __wt_config_subgets(
+ session, &cval, "tree_walk", &sval)) == 0 && sval.val != 0)
+ /*
+ * Configuring tree walk statistics implies fast statistics.
+ * Keep that knowledge internal for now - it may change in the
+ * future.
+ */
+ LF_SET(WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK);
+ WT_RET_NOTFOUND_OK(ret);
+
+ if ((ret = __wt_config_subgets(
+ session, &cval, "clear", &sval)) == 0 && sval.val != 0) {
+ if (!LF_ISSET(WT_STAT_TYPE_ALL | WT_STAT_TYPE_CACHE_WALK |
+ WT_STAT_TYPE_FAST | WT_STAT_TYPE_TREE_WALK))
+ WT_RET_MSG(session, EINVAL,
+ "the value \"clear\" can only be specified if "
+ "statistics are enabled");
+ LF_SET(WT_STAT_CLEAR);
+ }
+ WT_RET_NOTFOUND_OK(ret);
+
+ /* Configuring statistics clears any existing values. */
+ conn->stat_flags = flags;
+
+ return (0);
+}
+
+/*
+ * __wt_conn_reconfig --
+ * Reconfigure a connection (internal version).
+ */
+int
+__wt_conn_reconfig(WT_SESSION_IMPL *session, const char **cfg)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ const char *p;
+
+ conn = S2C(session);
+
+ /* Serialize reconfiguration. */
+ __wt_spin_lock(session, &conn->reconfig_lock);
+
+ /*
+ * The configuration argument has been checked for validity, update the
+ * previous connection configuration.
+ *
+ * DO NOT merge the configuration before the reconfigure calls. Some
+ * of the underlying reconfiguration functions do explicit checks with
+ * the second element of the configuration array, knowing the defaults
+ * are in slot #1 and the application's modifications are in slot #2.
+ *
+ * Replace the base configuration set up by CONNECTION_API_CALL with
+ * the current connection configuration, otherwise reconfiguration
+ * functions will find the base value instead of previously configured
+ * value.
+ */
+ cfg[0] = conn->cfg;
+
+ /*
+ * Reconfigure the system.
+ *
+ * The compatibility version check is special: upgrade / downgrade
+ * cannot be done with transactions active, and checkpoints must not
+ * span a version change. Hold the checkpoint lock to avoid conflicts
+ * with WiredTiger's checkpoint thread, and rely on the documentation
+ * specifying that no new operations can start until the upgrade /
+ * downgrade completes.
+ */
+ WT_WITH_CHECKPOINT_LOCK(session,
+ ret = __wt_conn_compat_config(session, cfg));
+ WT_ERR(__wt_conn_statistics_config(session, cfg));
+ WT_ERR(__wt_async_reconfig(session, cfg));
+ WT_ERR(__wt_cache_config(session, true, cfg));
+ WT_ERR(__wt_checkpoint_server_create(session, cfg));
+ WT_ERR(__wt_logmgr_reconfig(session, cfg));
+ WT_ERR(__wt_lsm_manager_reconfig(session, cfg));
+ WT_ERR(__wt_statlog_create(session, cfg));
+ WT_ERR(__wt_sweep_config(session, cfg));
+ WT_ERR(__wt_verbose_config(session, cfg));
+ WT_ERR(__wt_timing_stress_config(session, cfg));
+
+ /* Third, merge everything together, creating a new connection state. */
+ WT_ERR(__wt_config_merge(session, cfg, NULL, &p));
+ __wt_free(session, conn->cfg);
+ conn->cfg = p;
+
+err: __wt_spin_unlock(session, &conn->reconfig_lock);
+
+ return (ret);
+}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_ds.c b/src/third_party/wiredtiger/src/cursor/cur_ds.c
index 10de133be75..087c811747a 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_ds.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_ds.c
@@ -38,17 +38,16 @@ static int
__curds_key_set(WT_CURSOR *cursor)
{
WT_CURSOR *source;
- WT_DECL_RET;
source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
- WT_ERR(__cursor_needkey(cursor));
+ WT_RET(__cursor_needkey(cursor));
source->recno = cursor->recno;
source->key.data = cursor->key.data;
source->key.size = cursor->key.size;
-err: return (ret);
+ return (0);
}
/*
@@ -59,16 +58,15 @@ static int
__curds_value_set(WT_CURSOR *cursor)
{
WT_CURSOR *source;
- WT_DECL_RET;
source = ((WT_CURSOR_DATA_SOURCE *)cursor)->source;
- WT_ERR(__cursor_needvalue(cursor));
+ WT_RET(__cursor_needvalue(cursor));
source->value.data = cursor->value.data;
source->value.size = cursor->value.size;
-err: return (ret);
+ return (0);
}
/*
diff --git a/src/third_party/wiredtiger/src/cursor/cur_join.c b/src/third_party/wiredtiger/src/cursor/cur_join.c
index 855ad70d6e0..e3ae9dbd9f6 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_join.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_join.c
@@ -499,9 +499,7 @@ __curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry,
passed = (cmp < 0);
break;
- default:
- WT_RET(__wt_illegal_value(session, NULL));
- break;
+ WT_ILLEGAL_VALUE(session);
}
if (!passed) {
diff --git a/src/third_party/wiredtiger/src/docs/Doxyfile b/src/third_party/wiredtiger/src/docs/Doxyfile
index 8292df18e47..e95d8babe48 100644
--- a/src/third_party/wiredtiger/src/docs/Doxyfile
+++ b/src/third_party/wiredtiger/src/docs/Doxyfile
@@ -206,8 +206,8 @@ TAB_SIZE = 8
# You can put \n's in the value part of an alias to insert newlines.
ALIASES = "notyet{1}=Note: <b>"\1"</b> not yet supported in WiredTiger.\n@todo fix when \1 supported\n\n" \
- "errors=@returns zero on success and a non-zero error code on failure. See @ref error_returns \"Error Returns\" for details." \
- "ebusy_errors=@returns zero on success, EBUSY if there are open cursors on the object and a non-zero error code on failure. See @ref error_returns \"Error Returns\" for details." \
+ "errors=@returns zero on success and a non-zero error code on failure. See @ref error_handling \"Error handling\" for details." \
+ "ebusy_errors=@returns zero on success, EBUSY if the object is not available for exclusive access, and a non-zero error code on failure. See @ref error_handling \"Error handling\" for details." \
"ex_ref{1}=@ref \1 \"\1\"" \
"ref_single=@ref" \
"subpage_single=@subpage" \
diff --git a/src/third_party/wiredtiger/src/docs/error-handling.dox b/src/third_party/wiredtiger/src/docs/error-handling.dox
index eb9ca6bb82a..7b7c0cd03d5 100644
--- a/src/third_party/wiredtiger/src/docs/error-handling.dox
+++ b/src/third_party/wiredtiger/src/docs/error-handling.dox
@@ -3,11 +3,11 @@
WiredTiger operations return a value of 0 on success and a non-zero
value on error. Error codes may be either positive or negative:
positive error codes are standard error codes as described for
-POSIX-like systems (for example, EINVAL or EBUSY), negative error codes
-are WiredTiger-specific (for example, WT_ROLLBACK).
+POSIX-like systems (for example, \c EINVAL or \c EBUSY), negative error
+codes are WiredTiger-specific (for example, \c WT_ROLLBACK).
WiredTiger-specific error codes always appear in the -31,800 to -31,999
-range.
+range, inclusive.
@m_if{java}
Informational return values, like <code>wiredtiger.WT_NOTFOUND</code>
@@ -29,11 +29,22 @@ correctly-written WiredTiger application will likely catch
errors. Note that no further WiredTiger calls are required after
\c WiredTigerPanicException is caught (and further calls will themselves
immediately fail).
+@m_endif
+
+WiredTiger returns \c EBUSY for operations requiring exclusive access, when
+an object is not available for exclusive access. For example, the
+WT_SESSION::drop or WT_SESSION::verify methods will fail if the object
+has open cursors. Note that internal WiredTiger threads may temporarily
+open cursors on objects (for example, threads performing operations like
+statistics logging), and operations may temporarily fail and return \c EBUSY
+when there are no application cursors open on the object.
-The following is a complete list of possible WiredTiger-specific
-return values, all constants defined in the com.wiredtiger.db.wiredtiger class:
+@m_if{java}
+The following is a complete list of the WiredTiger-specific return
+values, all constants defined in the com.wiredtiger.db.wiredtiger class:
@m_else
-The following is a list of possible WiredTiger-specific errors:
+The following is a complete list of the WiredTiger-specific return
+values:
@m_endif
@if IGNORE_BUILT_BY_API_ERR_BEGIN
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 01a9179aedc..f0d810281c2 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -386,8 +386,9 @@ struct __wt_page_modify {
/* Cached overflow value cell/update address pairs. */
struct {
- WT_CELL *cell;
- WT_UPDATE *upd;
+ WT_CELL *cell;
+ uint8_t *data;
+ size_t size;
} *remove;
size_t remove_allocated;
uint32_t remove_next;
@@ -895,10 +896,11 @@ struct __wt_update {
uint32_t size; /* data length */
-#define WT_UPDATE_DELETED 0 /* deleted */
-#define WT_UPDATE_MODIFIED 1 /* partial-update modify value */
-#define WT_UPDATE_RESERVED 2 /* reserved */
-#define WT_UPDATE_STANDARD 3 /* complete value */
+#define WT_UPDATE_INVALID 0 /* diagnostic check */
+#define WT_UPDATE_DELETED 1 /* deleted */
+#define WT_UPDATE_MODIFIED 2 /* partial-update modify value */
+#define WT_UPDATE_RESERVED 3 /* reserved */
+#define WT_UPDATE_STANDARD 4 /* complete value */
uint8_t type; /* type (one byte to conserve memory) */
/* If the update includes a complete value. */
@@ -936,7 +938,7 @@ struct __wt_update {
* Limit update chains to a small value to avoid penalizing reads and
* permit truncation.
*/
-#define WT_MAX_MODIFY_UPDATE 100
+#define WT_MAX_MODIFY_UPDATE 10
/*
* WT_INSERT --
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index afd4c874cf1..9a86dbc1a26 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -150,7 +150,8 @@ extern const char *__wt_cell_type_string(uint8_t type);
extern const char *__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf);
extern const char *__wt_addr_string(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, WT_ITEM *buf);
extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store, bool *decoded) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd_list, WT_CELL_UNPACK *unpack) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_ovfl_discard_remove(WT_SESSION_IMPL *session, WT_PAGE *page);
+extern int __wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, bool checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, const void *image, size_t memsize, uint32_t flags, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -278,6 +279,9 @@ extern int __wt_logmgr_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIB
extern int __wt_connection_open(WT_CONNECTION_IMPL *conn, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_connection_close(WT_CONNECTION_IMPL *conn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_compat_config(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_statistics_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_conn_reconfig(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_conn_stat_init(WT_SESSION_IMPL *session);
extern int __wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -475,6 +479,7 @@ extern int __wt_lsm_tree_worker(WT_SESSION_IMPL *session, const char *uri, int (
extern int __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool force, WT_LSM_CHUNK **chunkp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_work_switch( WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **entryp, bool *ran) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_lsm_chunk_visible_all( WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk);
extern int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -664,7 +669,7 @@ __wt_assert(WT_SESSION_IMPL *session,
#endif
WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern int __wt_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_illegal_value(WT_SESSION_IMPL *session, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_illegal_value_func( WT_SESSION_IMPL *session, const char *tag, const char *file, int line) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_inmem_unsupported_op(WT_SESSION_IMPL *session, const char *tag) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_object_unsupported(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_bad_object_type(WT_SESSION_IMPL *session, const char *uri) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -683,7 +688,6 @@ extern int __wt_stash_add(WT_SESSION_IMPL *session, int which, uint64_t generati
extern void __wt_stash_discard_all(WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session);
extern int __wt_library_init(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_breakpoint(void);
-extern void __wt_attach(WT_SESSION_IMPL *session);
extern uint64_t __wt_hash_city64(const void *s, size_t len);
extern uint64_t __wt_hash_fnv64(const void *string, size_t len);
extern int
@@ -809,6 +813,8 @@ extern int __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, const char *
extern void __wt_txn_named_snapshot_destroy(WT_SESSION_IMPL *session);
extern int __wt_txn_recover(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_timestamp_to_hex_string( WT_SESSION_IMPL *session, char *hex_timestamp, const wt_timestamp_t *ts_src) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_verbose_timestamp(WT_SESSION_IMPL *session, const wt_timestamp_t *ts, const char *msg);
extern int __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_global_query_timestamp( WT_SESSION_IMPL *session, char *hex_timestamp, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h
index 243716c2ecb..ccb32900dc4 100644
--- a/src/third_party/wiredtiger/src/include/flags.h
+++ b/src/third_party/wiredtiger/src/include/flags.h
@@ -113,10 +113,11 @@
#define WT_VERB_SPLIT 0x00800000
#define WT_VERB_TEMPORARY 0x01000000
#define WT_VERB_THREAD_GROUP 0x02000000
-#define WT_VERB_TRANSACTION 0x04000000
-#define WT_VERB_VERIFY 0x08000000
-#define WT_VERB_VERSION 0x10000000
-#define WT_VERB_WRITE 0x20000000
+#define WT_VERB_TIMESTAMP 0x04000000
+#define WT_VERB_TRANSACTION 0x08000000
+#define WT_VERB_VERIFY 0x10000000
+#define WT_VERB_VERSION 0x20000000
+#define WT_VERB_WRITE 0x40000000
#define WT_VISIBILITY_ERR 0x00000080
/*
* flags section: END
diff --git a/src/third_party/wiredtiger/src/include/lsm.h b/src/third_party/wiredtiger/src/include/lsm.h
index df7d6c8d5ca..397f17400de 100644
--- a/src/third_party/wiredtiger/src/include/lsm.h
+++ b/src/third_party/wiredtiger/src/include/lsm.h
@@ -97,6 +97,11 @@ struct __wt_lsm_chunk {
* out, or by compact to get the most
* recent chunk flushed.
*/
+ WT_DECL_TIMESTAMP(switch_timestamp)/*
+ * The timestamp used to decide when
+ * updates need to detect conflicts.
+ */
+ WT_SPINLOCK timestamp_spinlock;
uint32_t id; /* ID used to generate URIs */
uint32_t generation; /* Merge generation */
@@ -107,10 +112,11 @@ struct __wt_lsm_chunk {
int8_t evicted; /* 1/0: in-memory chunk was evicted */
uint8_t flushing; /* 1/0: chunk flush in progress */
-#define WT_LSM_CHUNK_BLOOM 0x01
-#define WT_LSM_CHUNK_MERGING 0x02
-#define WT_LSM_CHUNK_ONDISK 0x04
-#define WT_LSM_CHUNK_STABLE 0x08
+#define WT_LSM_CHUNK_BLOOM 0x01
+#define WT_LSM_CHUNK_HAS_TIMESTAMP 0x02
+#define WT_LSM_CHUNK_MERGING 0x04
+#define WT_LSM_CHUNK_ONDISK 0x08
+#define WT_LSM_CHUNK_STABLE 0x10
uint32_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h
index bf7d36e19ca..a6cb56dd852 100644
--- a/src/third_party/wiredtiger/src/include/misc.h
+++ b/src/third_party/wiredtiger/src/include/misc.h
@@ -293,6 +293,10 @@ typedef void wt_timestamp_t;
__wt_page_swap_func(session, held, want, flags)
#endif
+/* Called on unexpected code path: locate the failure. */
+#define __wt_illegal_value(session, msg) \
+ __wt_illegal_value_func(session, msg, __FILE__, __LINE__)
+
/* Random number generator state. */
union __wt_rand_state {
uint64_t v;
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 61ab343151c..e0513a82892 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -69,7 +69,6 @@ struct __wt_named_snapshot {
struct __wt_txn_state {
WT_CACHE_LINE_PAD_BEGIN
- WT_RWLOCK rwlock;
volatile uint64_t id;
volatile uint64_t pinned_id;
volatile uint64_t metadata_pinned;
@@ -105,6 +104,9 @@ struct __wt_txn_global {
/* Protects the active transaction states. */
WT_RWLOCK rwlock;
+ /* Protects logging, checkpoints and transaction visibility. */
+ WT_RWLOCK visibility_rwlock;
+
/* List of transactions sorted by commit timestamp. */
WT_RWLOCK commit_timestamp_rwlock;
TAILQ_HEAD(__wt_txn_cts_qh, __wt_txn) commit_timestamph;
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 30f29e0f5d0..8067b6128c5 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -11,6 +11,8 @@ static inline void __wt_txn_read_last(WT_SESSION_IMPL *session);
#ifdef HAVE_TIMESTAMPS
#if WT_TIMESTAMP_SIZE == 8
+#define WT_WITH_TIMESTAMP_READLOCK(session, l, e) e
+
/*
* __wt_timestamp_cmp --
* Compare two timestamps.
@@ -61,6 +63,12 @@ __wt_timestamp_set_zero(wt_timestamp_t *ts)
ts->val = 0;
}
#else
+#define WT_WITH_TIMESTAMP_READLOCK(s, l, e) do { \
+ __wt_readlock((s), (l)); \
+ e; \
+ __wt_readunlock((s), (l)); \
+} while (0)
+
/*
* __wt_timestamp_cmp --
* Compare two timestamps.
@@ -90,8 +98,7 @@ __wt_timestamp_iszero(const wt_timestamp_t *ts)
{
static const wt_timestamp_t zero_timestamp;
- return (memcmp(ts->ts,
- WT_TIMESTAMP_NULL(&zero_timestamp), WT_TIMESTAMP_SIZE) == 0);
+ return (memcmp(ts->ts, &zero_timestamp, WT_TIMESTAMP_SIZE) == 0);
}
/*
@@ -182,7 +189,17 @@ __wt_txn_modify(WT_SESSION_IMPL *session, WT_UPDATE *upd)
op->type = F_ISSET(session, WT_SESSION_LOGGING_INMEM) ?
WT_TXN_OP_INMEM : WT_TXN_OP_BASIC;
#ifdef HAVE_TIMESTAMPS
- if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) {
+ /*
+ * Mark the update with a timestamp, if we have one.
+ *
+ * Updates in the metadata never get timestamps (either now or at
+ * commit): metadata cannot be read at a point in time, only the most
+ * recently committed data matches files on disk.
+ */
+ if (WT_IS_METADATA(session->dhandle)) {
+ if (!F_ISSET(session, WT_SESSION_LOGGING_INMEM))
+ op->type = WT_TXN_OP_BASIC_TS;
+ } else if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) {
__wt_timestamp_set(&upd->timestamp, &txn->commit_timestamp);
if (!F_ISSET(session, WT_SESSION_LOGGING_INMEM))
op->type = WT_TXN_OP_BASIC_TS;
@@ -285,9 +302,9 @@ __txn_visible_all_id(WT_SESSION_IMPL *session, uint64_t id)
/*
* __wt_txn_visible_all --
- * Check if a given transaction is "globally visible". This is, if
- * all sessions in the system will see the transaction ID including the
- * ID that belongs to a running checkpoint.
+ * Check if a given transaction is "globally visible". This is, if all
+ * sessions in the system will see the transaction ID including the ID
+ * that belongs to a running checkpoint.
*/
static inline bool
__wt_txn_visible_all(
@@ -302,12 +319,18 @@ __wt_txn_visible_all(
int cmp;
/* Timestamp check. */
- if (!txn_global->has_pinned_timestamp || timestamp == NULL)
+ if (timestamp == NULL || __wt_timestamp_iszero(timestamp))
return (true);
- __wt_readlock(session, &txn_global->rwlock);
- cmp = __wt_timestamp_cmp(timestamp, &txn_global->pinned_timestamp);
- __wt_readunlock(session, &txn_global->rwlock);
+ /*
+ * If no oldest timestamp has been supplied, updates have to stay in
+ * cache until we are shutting down.
+ */
+ if (!txn_global->has_pinned_timestamp)
+ return (F_ISSET(S2C(session), WT_CONN_CLOSING));
+
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ cmp = __wt_timestamp_cmp(timestamp, &txn_global->pinned_timestamp));
/*
* We can discard updates with timestamps less than or equal to the
@@ -581,8 +604,7 @@ __wt_txn_id_alloc(WT_SESSION_IMPL *session, bool publish)
/*
* __wt_txn_id_check --
- * A transaction is going to do an update, start an auto commit
- * transaction if required and allocate a transaction ID.
+ * A transaction is going to do an update, allocate a transaction ID.
*/
static inline int
__wt_txn_id_check(WT_SESSION_IMPL *session)
@@ -606,7 +628,7 @@ __wt_txn_id_check(WT_SESSION_IMPL *session)
* more we can do.
*/
if (txn->id == WT_TXN_ABORTED)
- WT_RET_MSG(session, ENOMEM, "Out of transaction IDs");
+ WT_RET_MSG(session, WT_ERROR, "out of transaction IDs");
F_SET(txn, WT_TXN_HAS_ID);
return (0);
@@ -730,11 +752,11 @@ __wt_txn_am_oldest(WT_SESSION_IMPL *session)
}
/*
- * __wt_txn_are_any_active --
+ * __wt_txn_activity_check --
* Check whether there are any running transactions.
*/
static inline int
-__wt_txn_are_any_active(WT_SESSION_IMPL *session, bool *any_active)
+__wt_txn_activity_check(WT_SESSION_IMPL *session, bool *txn_active)
{
WT_TXN_GLOBAL *txn_global;
@@ -747,6 +769,8 @@ __wt_txn_are_any_active(WT_SESSION_IMPL *session, bool *any_active)
WT_RET(__wt_txn_update_oldest(session,
WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
- *any_active = (txn_global->oldest_id != txn_global->current);
+ *txn_active = (txn_global->oldest_id != txn_global->current ||
+ txn_global->metadata_pinned != txn_global->current);
+
return (0);
}
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 5d087447c5a..7825962d89f 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -504,6 +504,12 @@ struct __wt_cursor {
* (as it partially depends on the underlying file configuration), but
* is always a small number of bytes less than 4GB.
*
+ * The WT_CURSOR::modify method stores a change record in cache and
+ * writes a change record to the log, instead of the usual complete
+ * value. This can reduce cache and logging requirements, but may result
+ * in slower reads because the complete value must be assembled during
+ * retrieval.
+ *
* @param cursor the cursor handle
* @param entries an array of modification data structures
* @param nentries the number of modification data structures
@@ -1537,7 +1543,7 @@ struct __wt_session {
* @snippet ex_all.c Reset the session
*
* @param session the session handle
- * @ebusy_errors
+ * @errors
*/
int __F(reset)(WT_SESSION *session);
@@ -1998,8 +2004,10 @@ struct __wt_connection {
* checkpoint; setting this value above 0 configures periodic
* checkpoints., an integer between 0 and 100000; default \c 0.}
* @config{ ),,}
- * @config{compatibility = (, set compatibility version of database., a
- * set of related configuration options defined below.}
+ * @config{compatibility = (, set compatibility version of database.
+ * Changing the compatibility version requires that there are no active
+ * operations for the duration of the call., a set of related
+ * configuration options defined below.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;release, compatibility release
* version string., a string; default empty.}
* @config{ ),,}
@@ -2143,8 +2151,9 @@ struct __wt_connection {
* "metadata"\, \c "mutex"\, \c "overflow"\, \c "read"\, \c
* "rebalance"\, \c "reconcile"\, \c "recovery"\, \c
* "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c "split"\,
- * \c "temporary"\, \c "thread_group"\, \c "transaction"\, \c "verify"\,
- * \c "version"\, \c "write"; default empty.}
+ * \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c
+ * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default
+ * empty.}
* @configend
* @errors
*/
@@ -2258,6 +2267,8 @@ struct __wt_connection {
*
* @snippet ex_all.c set oldest timestamp
*
+ * @snippet ex_all.c set stable timestamp
+ *
* @param connection the connection handle
* @configstart{WT_CONNECTION.set_timestamp, see dist/api_data.py}
* @config{commit_timestamp, reset the maximum commit timestamp tracked
@@ -2292,8 +2303,8 @@ struct __wt_connection {
* WT_CONNECTION::set_timestamp. Any updates to checkpoint durable
* tables that are more recent than the stable timestamp are removed.
*
- * This method requires that there are no active cursor operations
- * for the duration of the call.
+ * This method requires that there are no active operations for the
+ * duration of the call.
*
* Any updates made to logged tables will not be rolled back. Any
* updates made without an associated timestamp will not be rolled
@@ -2527,10 +2538,12 @@ struct __wt_connection {
* @config{ ),,}
* @config{checkpoint_sync, flush files to stable storage when closing or
* writing checkpoints., a boolean flag; default \c true.}
- * @config{compatibility = (, set compatibility version of database., a set of
- * related configuration options defined below.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;release, compatibility release version
- * string., a string; default empty.}
+ * @config{compatibility = (, set compatibility version of database. Changing
+ * the compatibility version requires that there are no active operations for
+ * the duration of the call., a set of related configuration options defined
+ * below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;release, compatibility release
+ * version string., a string; default empty.}
* @config{ ),,}
* @config{config_base, write the base configuration file if creating the
* database. If \c false in the config passed directly to ::wiredtiger_open\,
@@ -2766,8 +2779,8 @@ struct __wt_connection {
* "lookaside_activity"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c
* "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c
* "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c
- * "split"\, \c "temporary"\, \c "thread_group"\, \c "transaction"\, \c
- * "verify"\, \c "version"\, \c "write"; default empty.}
+ * "split"\, \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c
+ * "transaction"\, \c "verify"\, \c "version"\, \c "write"; default empty.}
* @config{write_through, Use \c FILE_FLAG_WRITE_THROUGH on Windows to write to
* files. Ignored on non-Windows systems. Options are given as a list\, such
* as <code>"write_through=[data]"</code>. Configuring \c write_through requires
@@ -3301,7 +3314,6 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp)
* Error returns
*******************************************/
/*!
- * @anchor error_returns
* @name Error returns
* Most functions and methods in WiredTiger return an integer code indicating
* whether the operation succeeded or failed. A return of zero indicates
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
index 6a1709b03f2..39656c17ee0 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
@@ -538,8 +538,7 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
chunk = lsm_tree->chunk[ngood - 1];
clsm->chunks[ngood - 1]->switch_txn =
chunk->switch_txn;
- if (__wt_txn_visible_all(
- session, chunk->switch_txn, NULL))
+ if (__wt_lsm_chunk_visible_all(session, chunk))
break;
}
} else {
@@ -937,10 +936,9 @@ retry: /*
goto retry;
err: __clsm_leave(clsm);
- API_END(session, ret);
if (ret == 0)
__clsm_deleted_decode(clsm, &cursor->value);
- return (ret);
+ API_END_RET(session, ret);
}
/*
@@ -1029,8 +1027,7 @@ __clsm_next_random(WT_CURSOR *cursor)
err: F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
}
__clsm_leave(clsm);
- API_END(session, ret);
- return (ret);
+ API_END_RET(session, ret);
}
/*
@@ -1116,10 +1113,9 @@ retry: /*
goto retry;
err: __clsm_leave(clsm);
- API_END(session, ret);
if (ret == 0)
__clsm_deleted_decode(clsm, &cursor->value);
- return (ret);
+ API_END_RET(session, ret);
}
/*
@@ -1275,10 +1271,9 @@ __clsm_search(WT_CURSOR *cursor)
ret = __clsm_lookup(clsm, &cursor->value);
err: __clsm_leave(clsm);
- API_END(session, ret);
if (ret == 0)
__clsm_deleted_decode(clsm, &cursor->value);
- return (ret);
+ API_END_RET(session, ret);
}
/*
@@ -1418,7 +1413,6 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp)
*exactp = cmp;
err: __clsm_leave(clsm);
- API_END(session, ret);
if (closest != NULL)
WT_TRET(closest->reset(closest));
@@ -1428,7 +1422,7 @@ err: __clsm_leave(clsm);
} else
clsm->current = NULL;
- return (ret);
+ API_END_RET(session, ret);
}
/*
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
index 24a0429a184..3949d88cec4 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
@@ -208,14 +208,20 @@ __wt_lsm_manager_start(WT_SESSION_IMPL *session)
conn = S2C(session);
manager = &conn->lsm_manager;
- if (F_ISSET(conn, WT_CONN_READONLY)) {
- manager->lsm_workers = 0;
- return (0);
- }
/*
- * We need at least a manager, a switch thread and a generic
- * worker.
+ * If readonly or the manager is running, or we've already failed,
+ * there's no work to do.
*/
+ if (F_ISSET(conn, WT_CONN_READONLY) ||
+ manager->lsm_workers != 0 ||
+ F_ISSET(manager, WT_LSM_MANAGER_SHUTDOWN))
+ return (0);
+
+ /* It's possible to race, see if we're the winner. */
+ if (!__wt_atomic_cas32(&manager->lsm_workers, 0, 1))
+ return (0);
+
+ /* We need at least a manager, a switch thread and a generic worker. */
WT_ASSERT(session, manager->lsm_workers_max > 2);
/*
@@ -245,6 +251,15 @@ err: for (i = 0;
i++)
WT_TRET((&worker_session->iface)->close(
&worker_session->iface, NULL));
+
+ /* Make the failure permanent, we won't try again. */
+ F_SET(manager, WT_LSM_MANAGER_SHUTDOWN);
+
+ /*
+ * Reset the workers count (otherwise, LSM destroy will hang
+ * waiting for threads to exit.
+ */
+ WT_PUBLISH(manager->lsm_workers, 0);
}
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
index 18e1f6d3115..e6eccf96467 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
@@ -35,6 +35,7 @@ __lsm_tree_discard_state(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
if ((chunk = lsm_tree->chunk[i]) == NULL)
continue;
+ __wt_spin_destroy(session, &chunk->timestamp_spinlock);
__wt_free(session, chunk->bloom_uri);
__wt_free(session, chunk->uri);
__wt_free(session, chunk);
@@ -44,6 +45,7 @@ __lsm_tree_discard_state(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
chunk = lsm_tree->old_chunks[i];
WT_ASSERT(session, chunk != NULL);
+ __wt_spin_destroy(session, &chunk->timestamp_spinlock);
__wt_free(session, chunk->bloom_uri);
__wt_free(session, chunk->uri);
__wt_free(session, chunk);
@@ -280,6 +282,8 @@ __wt_lsm_tree_setup_chunk(
WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
__wt_epoch(session, &chunk->create_time);
+ __wt_spin_init(session,
+ &chunk->timestamp_spinlock, "LSM chunk timestamp");
WT_RET(__wt_lsm_tree_chunk_name(
session, lsm_tree, chunk->id, &chunk->uri));
@@ -474,8 +478,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session,
F_ISSET(session, WT_SESSION_LOCKED_HANDLE_LIST_WRITE));
/* Start the LSM manager thread if it isn't running. */
- if (__wt_atomic_cas32(&conn->lsm_manager.lsm_workers, 0, 1))
- WT_RET(__wt_lsm_manager_start(session));
+ WT_RET(__wt_lsm_manager_start(session));
/* Make sure no one beat us to it. */
if ((ret = __lsm_tree_find(
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
index 2f21e8acdc3..816eafebe99 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
@@ -256,6 +256,63 @@ err:
}
/*
+ * __wt_lsm_chunk_visible_all --
+ * Setup a timestamp and check visibility for a chunk, can be called
+ * from multiple threads in parallel
+ */
+bool
+__wt_lsm_chunk_visible_all(
+ WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk)
+{
+ /* Once a chunk has been flushed it's contents must be visible */
+ if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK | WT_LSM_CHUNK_STABLE))
+ return (true);
+
+ if (chunk->switch_txn == WT_TXN_NONE ||
+ !__wt_txn_visible_all(session, chunk->switch_txn, NULL))
+ return (false);
+
+#ifdef HAVE_TIMESTAMPS
+ {
+ WT_TXN_GLOBAL *txn_global;
+
+ txn_global = &S2C(session)->txn_global;
+
+ /*
+ * Once all transactions with updates in the chunk are visible all
+ * timestamps associated with those updates are assigned so setup a
+ * timestamp for visibility checking.
+ */
+ if (txn_global->has_commit_timestamp ||
+ txn_global->has_pinned_timestamp) {
+ if (!F_ISSET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP)) {
+ __wt_spin_lock(session, &chunk->timestamp_spinlock);
+ /* Set the timestamp if we won the race */
+ if (!F_ISSET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP)) {
+ __wt_readlock(session, &txn_global->rwlock);
+ __wt_timestamp_set(&chunk->switch_timestamp,
+ &txn_global->commit_timestamp);
+ __wt_readunlock(session, &txn_global->rwlock);
+ F_SET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP);
+ }
+ __wt_spin_unlock(session, &chunk->timestamp_spinlock);
+ }
+ if (!__wt_txn_visible_all(
+ session, chunk->switch_txn, &chunk->switch_timestamp))
+ return (false);
+ } else
+ /*
+ * If timestamps aren't in use when the chunk becomes visible
+ * use the zero timestamp for visibility checks. Otherwise
+ * there could be confusion if timestamps start being used.
+ */
+ F_SET(chunk, WT_LSM_CHUNK_HAS_TIMESTAMP);
+ }
+#endif
+ return (true);
+}
+
+/*
* __wt_lsm_checkpoint_chunk --
* Flush a single LSM chunk to disk.
*/
@@ -295,14 +352,12 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
/* Stop if a running transaction needs the chunk. */
WT_RET(__wt_txn_update_oldest(
session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
- if (chunk->switch_txn == WT_TXN_NONE ||
- !__wt_txn_visible_all(session, chunk->switch_txn, NULL)) {
+ if (!__wt_lsm_chunk_visible_all(session, chunk)) {
__wt_verbose(session, WT_VERB_LSM,
"LSM worker %s: running transaction, return",
chunk->uri);
return (0);
}
-
if (!__wt_atomic_cas8(&chunk->flushing, 0, 1))
return (0);
flush_set = true;
diff --git a/src/third_party/wiredtiger/src/os_common/os_abort.c b/src/third_party/wiredtiger/src/os_common/os_abort.c
index 905f3160acf..ebef001ce67 100644
--- a/src/third_party/wiredtiger/src/os_common/os_abort.c
+++ b/src/third_party/wiredtiger/src/os_common/os_abort.c
@@ -16,12 +16,18 @@ void
__wt_abort(WT_SESSION_IMPL *session)
WT_GCC_FUNC_ATTRIBUTE((noreturn))
{
- __wt_errx(session, "aborting WiredTiger library");
+#ifdef HAVE_ATTACH
+ u_int i;
-#ifdef HAVE_DIAGNOSTIC
- __wt_attach(session);
-#endif
+ __wt_errx(session, "process ID %" PRIdMAX
+ ": waiting for debugger...", (intmax_t)getpid());
+ /* Sleep forever, the debugger will interrupt us when it attaches. */
+ for (i = 0; i < WT_MILLION; ++i)
+ __wt_sleep(10, 0);
+#else
+ __wt_errx(session, "aborting WiredTiger library");
+#endif
abort();
/* NOTREACHED */
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index a3cb6a53a09..10c2c0dc937 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -58,8 +58,12 @@ typedef struct {
uint64_t orig_btree_checkpoint_gen;
uint64_t orig_txn_checkpoint_gen;
- /* Track the oldest transaction running when reconciliation starts. */
+ /*
+ * Track the oldest running transaction and the stable timestamp when
+ * reconciliation starts.
+ */
uint64_t last_running;
+ WT_DECL_TIMESTAMP(stable_timestamp)
/* Track the page's maximum transaction. */
uint64_t max_txn;
@@ -506,6 +510,13 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
WT_TRET(session->block_manager_cleanup(session));
WT_TRET(__rec_destroy_session(session));
+
+ /*
+ * We track removed overflow objects in case there's a reader
+ * in transit when they're removed. Any form of eviction locks
+ * out readers, we can discard them all.
+ */
+ __wt_ovfl_discard_remove(session, page);
}
WT_RET(ret);
@@ -881,6 +892,7 @@ __rec_init(WT_SESSION_IMPL *session,
WT_BTREE *btree;
WT_PAGE *page;
WT_RECONCILE *r;
+ WT_TXN_GLOBAL *txn_global;
btree = S2BT(session);
page = ref->page;
@@ -924,7 +936,13 @@ __rec_init(WT_SESSION_IMPL *session,
* transaction running when reconciliation starts is considered
* uncommitted.
*/
- WT_ORDERED_READ(r->last_running, S2C(session)->txn_global.last_running);
+ txn_global = &S2C(session)->txn_global;
+ WT_ORDERED_READ(r->last_running, txn_global->last_running);
+#ifdef HAVE_TIMESTAMPS
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ __wt_timestamp_set(
+ &r->stable_timestamp, &txn_global->stable_timestamp));
+#endif
/*
* Lookaside table eviction is configured when eviction gets aggressive,
@@ -1194,6 +1212,64 @@ __rec_update_move(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd, WT_SAVE_UPD *supd)
}
/*
+ * __rec_append_orig_value --
+ * Append the key's original value to its update list.
+ */
+static int
+__rec_append_orig_value(WT_SESSION_IMPL *session,
+ WT_PAGE *page, WT_UPDATE *upd_list, WT_CELL_UNPACK *unpack)
+{
+ WT_DECL_ITEM(tmp);
+ WT_DECL_RET;
+ WT_UPDATE *append, *upd;
+ size_t size;
+
+ /* If at least one standard update is globally visible, we're done. */
+ for (upd = upd_list; upd != NULL; upd = upd->next)
+ if (WT_UPDATE_DATA_VALUE(upd) &&
+ __wt_txn_upd_visible_all(session, upd))
+ return (0);
+
+ /*
+ * We need the original on-page value for some reader: get a copy and
+ * append it to the end of the update list with a transaction ID that
+ * guarantees its visibility.
+ *
+ * If we don't have a value cell, it's an insert/append list key/value
+ * pair which simply doesn't exist for some reader; place a deleted
+ * record at the end of the update list.
+ */
+ append = NULL; /* -Wconditional-uninitialized */
+ size = 0; /* -Wconditional-uninitialized */
+ if (unpack == NULL || unpack->type == WT_CELL_DEL)
+ WT_RET(__wt_update_alloc(session,
+ NULL, &append, &size, WT_UPDATE_DELETED));
+ else {
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_page_cell_data_ref(session, page, unpack, tmp));
+ WT_ERR(__wt_update_alloc(
+ session, tmp, &append, &size, WT_UPDATE_STANDARD));
+ }
+
+ /*
+ * Give the entry no transaction ID to ensure global visibility, append
+ * it to the update list.
+ *
+ * Note the change to the actual reader-accessible update list: from now
+ * on, the original on-page value appears at the end of the update list,
+ * even if this reconciliation subsequently fails.
+ */
+ append->txnid = WT_TXN_NONE;
+ for (upd = upd_list; upd->next != NULL; upd = upd->next)
+ ;
+ WT_PUBLISH(upd->next, append);
+ __wt_cache_page_inmem_incr(session, page, size);
+
+err: __wt_scr_free(session, &tmp);
+ return (ret);
+}
+
+/*
* __rec_txn_read --
* Return the update in a list that should be written (or NULL if none can
* be written).
@@ -1203,18 +1279,14 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
WT_INSERT *ins, void *ripcip, WT_CELL_UNPACK *vpack, WT_UPDATE **updp)
{
WT_BTREE *btree;
- WT_DECL_RET;
- WT_DECL_ITEM(tmp);
- WT_DECL_TIMESTAMP(min_timestamp)
WT_DECL_TIMESTAMP(max_timestamp)
WT_PAGE *page;
- WT_UPDATE *append, *upd, *upd_list;
- size_t size, update_mem;
- uint64_t max_txn, min_txn, txnid;
- bool append_origv, skipped;
+ WT_UPDATE *upd, *upd_list;
+ size_t update_mem;
+ uint64_t max_txn, txnid;
+ bool skipped;
*updp = NULL;
- append = NULL; /* -Wconditional-uninitialized */
btree = S2BT(session);
page = r->page;
@@ -1235,9 +1307,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
max_txn = WT_TXN_NONE;
#ifdef HAVE_TIMESTAMPS
__wt_timestamp_set_zero(&max_timestamp);
- __wt_timestamp_set_inf(&min_timestamp);
#endif
- min_txn = UINT64_MAX;
if (F_ISSET(r, WT_EVICTING)) {
/* Discard obsolete updates. */
@@ -1258,8 +1328,6 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
*/
if (WT_TXNID_LT(max_txn, txnid))
max_txn = txnid;
- if (WT_TXNID_LT(txnid, min_txn))
- min_txn = txnid;
/*
* Find the first update we can use.
@@ -1285,17 +1353,13 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
if (*updp == NULL)
*updp = upd;
+
#ifdef HAVE_TIMESTAMPS
/* Track min/max timestamps. */
if (__wt_timestamp_cmp(
- &max_timestamp, &upd->timestamp) < 0)
+ &upd->timestamp, &max_timestamp) > 0)
__wt_timestamp_set(
&max_timestamp, &upd->timestamp);
-
- if (__wt_timestamp_cmp(
- &min_timestamp, &upd->timestamp) > 0)
- __wt_timestamp_set(
- &min_timestamp, &upd->timestamp);
#endif
}
} else
@@ -1325,7 +1389,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
}
}
- /* Reconciliation should never see a reserved update. */
+ /* Reconciliation should never see an aborted or reserved update. */
WT_ASSERT(session, *updp == NULL ||
((*updp)->txnid != WT_TXN_ABORTED &&
(*updp)->type != WT_UPDATE_RESERVED));
@@ -1370,18 +1434,17 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
if (!skipped && (F_ISSET(btree, WT_BTREE_LOOKASIDE) ||
__wt_txn_visible_all(session,
max_txn, WT_TIMESTAMP_NULL(&max_timestamp)))) {
-#ifdef HAVE_DIAGNOSTIC
/*
* The checkpoint transaction is special. Make sure we never
* write (metadata) updates from a checkpoint in a concurrent
* session.
*/
- txnid = *updp == NULL ? WT_TXN_NONE : (*updp)->txnid;
- WT_ASSERT(session, txnid == WT_TXN_NONE ||
- txnid != S2C(session)->txn_global.checkpoint_state.id ||
+ WT_ASSERT(session, *updp == NULL ||
+ (*updp)->txnid !=
+ S2C(session)->txn_global.checkpoint_state.id ||
WT_SESSION_IS_CHECKPOINT(session));
-#endif
- return (0);
+
+ goto check_original_value;
}
/*
@@ -1400,7 +1463,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
*/
if (!F_ISSET(r, WT_EVICTING)) {
r->leave_dirty = true;
- return (0);
+ goto check_original_value;
}
/*
@@ -1441,7 +1504,20 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
if (skipped)
r->update_mem_uncommitted += update_mem;
- append_origv = false;
+#ifdef HAVE_TIMESTAMPS
+ /*
+ * Don't allow lookaside eviction with updates newer than the stable
+ * timestamp. Also don't recommend lookaside eviction in that case.
+ */
+ if (__wt_timestamp_cmp(&max_timestamp, &r->stable_timestamp) > 0) {
+ if (!F_ISSET(r, WT_EVICT_UPDATE_RESTORE))
+ return (EBUSY);
+
+ if (!skipped)
+ r->update_mem_uncommitted += update_mem;
+ }
+#endif
+
if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE)) {
/*
* The save/restore eviction path.
@@ -1456,58 +1532,6 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
/* The page can't be marked clean. */
r->leave_dirty = true;
- } else {
- /*
- * The lookaside table eviction path.
- *
- * If at least one update is globally visible, copy the update
- * list and ignore the current on-page value. If no update is
- * globally visible, readers require the page's original value.
- */
- if (!__wt_txn_visible_all(
- session, min_txn, WT_TIMESTAMP_NULL(&min_timestamp)))
- append_origv = true;
- }
-
- /*
- * We need the original on-page value for some reason: get a copy and
- * append it to the end of the update list with a transaction ID that
- * guarantees its visibility.
- */
- if (append_origv) {
- /*
- * If we don't have a value cell, it's an insert/append list
- * key/value pair which simply doesn't exist for some reader;
- * place a deleted record at the end of the update list.
- */
- size = 0; /* -Wconditional-uninitialized */
- if (vpack == NULL || vpack->type == WT_CELL_DEL)
- WT_RET(__wt_update_alloc(session,
- NULL, &append, &size, WT_UPDATE_DELETED));
- else {
- WT_RET(__wt_scr_alloc(session, 0, &tmp));
- if ((ret = __wt_page_cell_data_ref(
- session, page, vpack, tmp)) == 0)
- ret = __wt_update_alloc(session,
- tmp, &append, &size, WT_UPDATE_STANDARD);
- __wt_scr_free(session, &tmp);
- WT_RET(ret);
- }
-
- /*
- * Give the entry no transaction ID to ensure global visibility,
- * append it to the update list.
- *
- * Note the change to the actual reader-accessible update list:
- * from now on, the original on-page value appears at the end
- * of the update list, even if this reconciliation subsequently
- * fails.
- */
- append->txnid = WT_TXN_NONE;
- for (upd = upd_list; upd->next != NULL; upd = upd->next)
- ;
- WT_PUBLISH(upd->next, append);
- __wt_cache_page_inmem_incr(session, page, size);
}
/*
@@ -1521,7 +1545,23 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
* that transaction ID is globally visible, we know we no longer need
* the lookaside table records, allowing them to be discarded.
*/
- return (__rec_update_save(session, r, ins, ripcip, *updp));
+ WT_RET(__rec_update_save(session, r, ins, ripcip, *updp));
+
+check_original_value:
+ /*
+ * Returning an update means the original on-page value might be lost,
+ * and that's a problem if there's a reader that needs it. There are
+ * two cases: any lookaside table eviction (because the backing disk
+ * image is rewritten), or any reconciliation of a backing overflow
+ * record that will be physically removed once it's no longer needed.
+ */
+ if (*updp != NULL &&
+ (F_ISSET(r, WT_EVICT_LOOKASIDE) ||
+ (vpack != NULL &&
+ vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM)))
+ WT_RET(__rec_append_orig_value(session, page, *updp, vpack));
+
+ return (0);
}
/*
@@ -4708,7 +4748,7 @@ __rec_col_var(WT_SESSION_IMPL *session,
* file, otherwise we'll leak blocks on the checkpoint.
* That's safe because if the backing overflow value is
* still needed by any running transaction, we'll cache
- * a copy in the reconciliation tracking structures.
+ * a copy in the update list.
*
* Regardless, we avoid copying in overflow records: if
* there's a WT_INSERT entry that modifies a reference
@@ -4793,8 +4833,8 @@ record_loop: /*
* The on-page value will never be accessed,
* write a placeholder record.
*/
- data = "@";
- size = 1;
+ data = "ovfl-unused";
+ size = WT_STORE_SIZE(strlen("ovfl-unused"));
} else {
update_no_copy = false; /* Maybe data copy */
@@ -4928,7 +4968,8 @@ compare: /*
*/
if (ovfl_state == OVFL_UNUSED &&
vpack->raw != WT_CELL_VALUE_OVFL_RM)
- WT_ERR(__wt_ovfl_remove(session, page, upd, vpack));
+ WT_ERR(__wt_ovfl_remove(
+ session, page, vpack, !F_ISSET(r, WT_EVICTING)));
}
/* Walk any append list. */
@@ -5535,8 +5576,9 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
* The on-page value will never be accessed,
* write a placeholder record.
*/
- WT_ERR(__rec_cell_build_val(
- session, r, "@", 1, (uint64_t)0));
+ WT_ERR(__rec_cell_build_val(session, r,
+ "ovfl-unused", strlen("ovfl-unused"),
+ (uint64_t)0));
} else {
val->buf.data = val_cell;
val->buf.size = __wt_cell_total_len(vpack);
@@ -5554,8 +5596,8 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
*/
if (vpack != NULL &&
vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM)
- WT_ERR(__wt_ovfl_remove(
- session, page, upd, vpack));
+ WT_ERR(__wt_ovfl_remove(session,
+ page, vpack, !F_ISSET(r, WT_EVICTING)));
switch (upd->type) {
case WT_UPDATE_DELETED:
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index 52d11651191..1a63ed675b5 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -1848,9 +1848,9 @@ __open_session(WT_CONNECTION_IMPL *conn,
if (!session_ret->active)
break;
if (i == conn->session_size)
- WT_ERR_MSG(session, ENOMEM,
- "only configured to support %" PRIu32 " sessions"
- " (including %d additional internal sessions)",
+ WT_ERR_MSG(session, WT_ERROR,
+ "out of sessions, only configured to support %" PRIu32
+ " sessions (including %d additional internal sessions)",
conn->session_size, WT_EXTRA_INTERNAL_SESSIONS);
/*
diff --git a/src/third_party/wiredtiger/src/support/err.c b/src/third_party/wiredtiger/src/support/err.c
index 94ae27628c2..a6ab328864d 100644
--- a/src/third_party/wiredtiger/src/support/err.c
+++ b/src/third_party/wiredtiger/src/support/err.c
@@ -533,17 +533,20 @@ __wt_panic(WT_SESSION_IMPL *session)
}
/*
- * __wt_illegal_value --
+ * __wt_illegal_value_func --
* A standard error message when we detect an illegal value.
*/
int
-__wt_illegal_value(WT_SESSION_IMPL *session, const char *name)
+__wt_illegal_value_func(
+ WT_SESSION_IMPL *session, const char *tag, const char *file, int line)
WT_GCC_FUNC_ATTRIBUTE((cold))
WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- __wt_errx(session, "%s%s%s",
- name == NULL ? "" : name, name == NULL ? "" : ": ",
- "encountered an illegal file format or internal value");
+ __wt_errx(session, "%s%s%s: (%s, %d)",
+ tag == NULL ? "" : tag,
+ tag == NULL ? "" : ": ",
+ "encountered an illegal file format or internal value",
+ file, line);
return (__wt_panic(session));
}
diff --git a/src/third_party/wiredtiger/src/support/global.c b/src/third_party/wiredtiger/src/support/global.c
index 6525fe21809..e425b690a5b 100644
--- a/src/third_party/wiredtiger/src/support/global.c
+++ b/src/third_party/wiredtiger/src/support/global.c
@@ -106,25 +106,4 @@ __wt_breakpoint(void)
*/
__wt_yield();
}
-
-/*
- * __wt_attach --
- * A routine to wait for the debugging to attach.
- */
-void
-__wt_attach(WT_SESSION_IMPL *session)
-{
-#ifdef HAVE_ATTACH
- u_int i;
-
- __wt_errx(session, "process ID %" PRIdMAX
- ": waiting for debugger...", (intmax_t)getpid());
-
- /* Sleep forever, the debugger will interrupt us when it attaches. */
- for (i = 0; i < WT_MILLION; ++i)
- __wt_sleep(10, 0);
-#else
- WT_UNUSED(session);
-#endif
-}
#endif
diff --git a/src/third_party/wiredtiger/src/support/time.c b/src/third_party/wiredtiger/src/support/time.c
index 233bc871e06..240a77591a3 100644
--- a/src/third_party/wiredtiger/src/support/time.c
+++ b/src/third_party/wiredtiger/src/support/time.c
@@ -35,8 +35,7 @@ __time_check_monotonic(WT_SESSION_IMPL *session, struct timespec *tsp)
/*
* __wt_epoch --
- * Return the time since the Epoch, adjusted so it never appears to go
- * backwards.
+ * Return the time since the Epoch.
*/
void
__wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
@@ -45,9 +44,14 @@ __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
struct timespec tmp;
/*
- * Read into a local variable so that we're comparing the correct
- * value when we check for monotonic increasing time. There are
- * many places we read into an unlocked global variable.
+ * Read into a local variable, then check for monotonically increasing
+ * time, ensuring single threads never see time move backward. We don't
+ * prevent multiple threads from seeing time move backwards (even when
+ * reading time serially, the saved last-read time is per thread, not
+ * per timer, so multiple threads can race the time). Nor do we prevent
+ * multiple threads simultaneously reading the time from seeing random
+ * time or time moving backwards (assigning the time structure to the
+ * returned memory location implies multicycle writes to memory).
*/
__wt_epoch_raw(session, &tmp);
__time_check_monotonic(session, &tmp);
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 191f7e0ba0f..09efb2924bf 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -445,12 +445,11 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
WT_RET(__wt_txn_parse_timestamp(
session, "read", &txn->read_timestamp, &cval));
- __wt_readlock(session, &txn_global->rwlock);
- __wt_timestamp_set(
- &oldest_timestamp, &txn_global->oldest_timestamp);
- __wt_timestamp_set(
- &stable_timestamp, &txn_global->stable_timestamp);
- __wt_readunlock(session, &txn_global->rwlock);
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ __wt_timestamp_set(
+ &oldest_timestamp, &txn_global->oldest_timestamp);
+ __wt_timestamp_set(
+ &stable_timestamp, &txn_global->stable_timestamp));
if (__wt_timestamp_cmp(
&txn->read_timestamp, &oldest_timestamp) < 0)
WT_RET_MSG(session, EINVAL,
@@ -568,18 +567,20 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
WT_TXN_OP *op;
+ u_int i;
+ bool did_update, locked;
#ifdef HAVE_TIMESTAMPS
- WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
wt_timestamp_t prev_commit_timestamp;
bool update_timestamp;
#endif
- u_int i;
- bool did_update;
txn = &session->txn;
conn = S2C(session);
+ txn_global = &conn->txn_global;
did_update = txn->mod_count != 0;
+ locked = false;
WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || !did_update);
@@ -665,6 +666,14 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
* This is particularly important for checkpoints.
*/
__wt_txn_release_snapshot(session);
+ /*
+ * We hold the visibility lock for reading from the time
+ * we write our log record until the time we release our
+ * transaction so that the LSN any checkpoint gets will
+ * always reflect visible data.
+ */
+ __wt_readlock(session, &txn_global->visibility_rwlock);
+ locked = true;
WT_ERR(__wt_txn_log_commit(session, cfg));
}
@@ -687,9 +696,12 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
#ifdef HAVE_TIMESTAMPS
if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
- op->type != WT_TXN_OP_BASIC_TS)
+ op->type != WT_TXN_OP_BASIC_TS) {
+ WT_ASSERT(session,
+ op->fileid != WT_METAFILE_ID);
__wt_timestamp_set(&op->u.upd->timestamp,
&txn->commit_timestamp);
+ }
#endif
break;
@@ -724,14 +736,15 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
#endif
__wt_txn_release(session);
+ if (locked)
+ __wt_readunlock(session, &txn_global->visibility_rwlock);
#ifdef HAVE_TIMESTAMPS
/* First check if we've already committed something in the future. */
if (update_timestamp) {
- __wt_readlock(session, &txn_global->rwlock);
- __wt_timestamp_set(
- &prev_commit_timestamp, &txn_global->commit_timestamp);
- __wt_readunlock(session, &txn_global->rwlock);
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ __wt_timestamp_set(
+ &prev_commit_timestamp, &txn_global->commit_timestamp));
update_timestamp = __wt_timestamp_cmp(
&txn->commit_timestamp, &prev_commit_timestamp) > 0;
}
@@ -760,6 +773,8 @@ err: /*
* !!!
* Nothing can fail after this point.
*/
+ if (locked)
+ __wt_readunlock(session, &txn_global->visibility_rwlock);
WT_TRET(__wt_txn_rollback(session, cfg));
return (ret);
}
@@ -930,6 +945,7 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
WT_RET(__wt_spin_init(
session, &txn_global->id_lock, "transaction id lock"));
WT_RET(__wt_rwlock_init(session, &txn_global->rwlock));
+ WT_RET(__wt_rwlock_init(session, &txn_global->visibility_rwlock));
WT_RET(__wt_rwlock_init(session, &txn_global->commit_timestamp_rwlock));
TAILQ_INIT(&txn_global->commit_timestamph);
@@ -971,6 +987,7 @@ __wt_txn_global_destroy(WT_SESSION_IMPL *session)
__wt_rwlock_destroy(session, &txn_global->commit_timestamp_rwlock);
__wt_rwlock_destroy(session, &txn_global->read_timestamp_rwlock);
__wt_rwlock_destroy(session, &txn_global->nsnap_rwlock);
+ __wt_rwlock_destroy(session, &txn_global->visibility_rwlock);
__wt_free(session, txn_global->states);
}
@@ -981,10 +998,7 @@ __wt_txn_global_destroy(WT_SESSION_IMPL *session)
int
__wt_txn_global_shutdown(WT_SESSION_IMPL *session)
{
- WT_DECL_RET;
- WT_TXN_GLOBAL *txn_global;
-
- txn_global = &S2C(session)->txn_global;
+ bool txn_active;
/*
* We're shutting down. Make sure everything gets freed.
@@ -995,10 +1009,8 @@ __wt_txn_global_shutdown(WT_SESSION_IMPL *session)
* transaction ID will catch up with the current ID.
*/
for (;;) {
- WT_TRET(__wt_txn_update_oldest(session,
- WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
- if (txn_global->oldest_id == txn_global->current &&
- txn_global->metadata_pinned == txn_global->current)
+ WT_RET(__wt_txn_activity_check(session, &txn_active));
+ if (!txn_active)
break;
WT_STAT_CONN_INCR(session, txn_release_blocked);
@@ -1010,10 +1022,10 @@ __wt_txn_global_shutdown(WT_SESSION_IMPL *session)
* Now that all transactions have completed, no timestamps should be
* pinned.
*/
- __wt_timestamp_set_inf(&txn_global->pinned_timestamp);
+ __wt_timestamp_set_inf(&S2C(session)->txn_global.pinned_timestamp);
#endif
- return (ret);
+ return (0);
}
#if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE)
@@ -1031,7 +1043,9 @@ __wt_verbose_dump_txn(WT_SESSION_IMPL *session)
const char *iso_tag;
uint64_t id;
uint32_t i, session_cnt;
-
+#ifdef HAVE_TIMESTAMPS
+ char hex_timestamp[3][2 * WT_TIMESTAMP_SIZE + 1];
+#endif
conn = S2C(session);
txn_global = &conn->txn_global;
@@ -1042,10 +1056,35 @@ __wt_verbose_dump_txn(WT_SESSION_IMPL *session)
WT_RET(__wt_msg(session,
"last running ID: %" PRIu64, txn_global->last_running));
WT_RET(__wt_msg(session, "oldest ID: %" PRIu64, txn_global->oldest_id));
- WT_RET(__wt_msg(session,
- "oldest named snapshot ID: %" PRIu64, txn_global->nsnap_oldest_id));
- WT_RET(__wt_msg(session, "checkpoint running? %s",
+#ifdef HAVE_TIMESTAMPS
+ WT_RET(__wt_timestamp_to_hex_string(
+ session, hex_timestamp[0], &txn_global->commit_timestamp));
+ WT_RET(__wt_msg(session, "commit timestamp: %s", hex_timestamp[0]));
+ WT_RET(__wt_timestamp_to_hex_string(
+ session, hex_timestamp[0], &txn_global->oldest_timestamp));
+ WT_RET(__wt_msg(session, "oldest timestamp: %s", hex_timestamp[0]));
+ WT_RET(__wt_timestamp_to_hex_string(
+ session, hex_timestamp[0], &txn_global->pinned_timestamp));
+ WT_RET(__wt_msg(session, "pinned timestamp: %s", hex_timestamp[0]));
+ WT_RET(__wt_timestamp_to_hex_string(
+ session, hex_timestamp[0], &txn_global->stable_timestamp));
+ WT_RET(__wt_msg(session, "stable timestamp: %s", hex_timestamp[0]));
+ WT_RET(__wt_msg(session, "has_commit_timestamp: %s",
+ txn_global->has_commit_timestamp ? "yes" : "no"));
+ WT_RET(__wt_msg(session, "has_oldest_timestamp: %s",
+ txn_global->has_oldest_timestamp ? "yes" : "no"));
+ WT_RET(__wt_msg(session, "has_pinned_timestamp: %s",
+ txn_global->has_pinned_timestamp ? "yes" : "no"));
+ WT_RET(__wt_msg(session, "has_stable_timestamp: %s",
+ txn_global->has_stable_timestamp ? "yes" : "no"));
+ WT_RET(__wt_msg(session, "oldest_is_pinned: %s",
+ txn_global->oldest_is_pinned ? "yes" : "no"));
+ WT_RET(__wt_msg(session, "stable_is_pinned: %s",
+ txn_global->stable_is_pinned ? "yes" : "no"));
+#endif
+
+ WT_RET(__wt_msg(session, "checkpoint running: %s",
txn_global->checkpoint_running ? "yes" : "no"));
WT_RET(__wt_msg(session, "checkpoint generation: %" PRIu64,
__wt_gen(session, WT_GEN_CHECKPOINT)));
@@ -1054,9 +1093,11 @@ __wt_verbose_dump_txn(WT_SESSION_IMPL *session)
WT_RET(__wt_msg(session, "checkpoint txn ID: %" PRIu64,
txn_global->checkpoint_state.id));
+ WT_RET(__wt_msg(session,
+ "oldest named snapshot ID: %" PRIu64, txn_global->nsnap_oldest_id));
+
WT_ORDERED_READ(session_cnt, conn->session_cnt);
WT_RET(__wt_msg(session, "session count: %" PRIu32, session_cnt));
-
WT_RET(__wt_msg(session, "Transaction state of active sessions:"));
/*
@@ -1083,7 +1124,40 @@ __wt_verbose_dump_txn(WT_SESSION_IMPL *session)
iso_tag = "WT_ISO_SNAPSHOT";
break;
}
-
+#ifdef HAVE_TIMESTAMPS
+ WT_RET(__wt_timestamp_to_hex_string(
+ session, hex_timestamp[0], &txn->commit_timestamp));
+ WT_RET(__wt_timestamp_to_hex_string(
+ session, hex_timestamp[1], &txn->first_commit_timestamp));
+ WT_RET(__wt_timestamp_to_hex_string(
+ session, hex_timestamp[2], &txn->read_timestamp));
+ WT_RET(__wt_msg(session,
+ "ID: %8" PRIu64
+ ", mod count: %u"
+ ", pinned ID: %8" PRIu64
+ ", snap min: %" PRIu64
+ ", snap max: %" PRIu64
+ ", commit_timestamp: %s"
+ ", first_commit_timestamp: %s"
+ ", read_timestamp: %s"
+ ", metadata pinned ID: %" PRIu64
+ ", flags: 0x%08" PRIx32
+ ", name: %s"
+ ", isolation: %s",
+ id,
+ txn->mod_count,
+ s->pinned_id,
+ txn->snap_min,
+ txn->snap_max,
+ hex_timestamp[0],
+ hex_timestamp[1],
+ hex_timestamp[2],
+ s->metadata_pinned,
+ txn->flags,
+ conn->sessions[i].name == NULL ?
+ "EMPTY" : conn->sessions[i].name,
+ iso_tag));
+#else
WT_RET(__wt_msg(session,
"ID: %6" PRIu64
", mod count: %u"
@@ -1104,6 +1178,7 @@ __wt_verbose_dump_txn(WT_SESSION_IMPL *session)
conn->sessions[i].name == NULL ?
"EMPTY" : conn->sessions[i].name,
iso_tag));
+#endif
}
WT_RET(__wt_msg(session, "%s", WT_DIVIDER));
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 532c4819d29..9065966fe8f 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -571,43 +571,17 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, const char *cfg[])
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *txn_state;
- char timestamp_buf[2 * WT_TIMESTAMP_SIZE + 1], timestamp_config[100];
- const char *query_cfg[] = { WT_CONFIG_BASE(session,
- WT_CONNECTION_query_timestamp), "get=stable", NULL };
const char *txn_cfg[] = { WT_CONFIG_BASE(session,
WT_SESSION_begin_transaction), "isolation=snapshot", NULL, NULL };
+ bool use_timestamp;
conn = S2C(session);
txn = &session->txn;
txn_global = &conn->txn_global;
txn_state = WT_SESSION_TXN_STATE(session);
- /*
- * Someone giving us a specific timestamp overrides the general
- * use_timestamp.
- */
- WT_RET(__wt_config_gets(session, cfg, "read_timestamp", &cval));
- if (cval.len > 0) {
- WT_RET(__wt_snprintf(timestamp_config, sizeof(timestamp_config),
- "read_timestamp=%.*s", (int)cval.len, cval.str));
- txn_cfg[2] = timestamp_config;
- } else if (txn_global->has_stable_timestamp) {
- WT_RET(__wt_config_gets(session, cfg, "use_timestamp", &cval));
- /*
- * Get the stable timestamp currently set. Then set that as
- * the read timestamp for the transaction.
- */
- if (cval.val != 0) {
- if ((ret = __wt_txn_global_query_timestamp(session,
- timestamp_buf, query_cfg)) != 0 &&
- ret != WT_NOTFOUND)
- return (ret);
- WT_RET(__wt_snprintf(timestamp_config,
- sizeof(timestamp_config),
- "read_timestamp=%s", timestamp_buf));
- txn_cfg[2] = timestamp_config;
- }
- }
+ WT_RET(__wt_config_gets(session, cfg, "use_timestamp", &cval));
+ use_timestamp = (cval.val != 0);
/*
* Start a snapshot transaction for the checkpoint.
@@ -667,15 +641,33 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, const char *cfg[])
*/
txn_state->id = txn_state->pinned_id =
txn_state->metadata_pinned = WT_TXN_NONE;
- __wt_writeunlock(session, &txn_global->rwlock);
#ifdef HAVE_TIMESTAMPS
/*
- * Now that the checkpoint transaction is published, clear it from the
- * regular lists.
+ * Set the checkpoint transaction's timestamp, if requested.
+ *
+ * We rely on having the global transaction data locked so the oldest
+ * timestamp can't move past the stable timestamp.
*/
- __wt_txn_clear_commit_timestamp(session);
- __wt_txn_clear_read_timestamp(session);
+ WT_ASSERT(session, !F_ISSET(txn,
+ WT_TXN_HAS_TS_COMMIT | WT_TXN_HAS_TS_READ |
+ WT_TXN_PUBLIC_TS_COMMIT | WT_TXN_PUBLIC_TS_READ));
+
+ if (use_timestamp && txn_global->has_stable_timestamp) {
+ __wt_timestamp_set(
+ &txn->read_timestamp, &txn_global->stable_timestamp);
+ F_SET(txn, WT_TXN_HAS_TS_READ);
+ }
+#else
+ WT_UNUSED(use_timestamp);
+#endif
+
+ __wt_writeunlock(session, &txn_global->rwlock);
+
+#ifdef HAVE_TIMESTAMPS
+ if (F_ISSET(txn, WT_TXN_HAS_TS_READ))
+ __wt_verbose_timestamp(session, &txn->read_timestamp,
+ "Checkpoint requested at stable timestamp");
#endif
/*
diff --git a/src/third_party/wiredtiger/src/txn/txn_ext.c b/src/third_party/wiredtiger/src/txn/txn_ext.c
index 1fe4d6ddf47..103a1d38166 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ext.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ext.c
@@ -72,7 +72,8 @@ __wt_ext_transaction_notify(
if (txn->notify == notify)
return (0);
if (txn->notify != NULL)
- return (ENOMEM);
+ WT_RET_MSG(
+ session, WT_ERROR, "transaction notify already scheduled");
txn->notify = notify;
diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c
index 1fc74fb53a1..a03047b5392 100644
--- a/src/third_party/wiredtiger/src/txn/txn_log.c
+++ b/src/third_party/wiredtiger/src/txn/txn_log.c
@@ -357,12 +357,14 @@ __wt_txn_checkpoint_log(
WT_ITEM *ckpt_snapshot, empty;
WT_LSN *ckpt_lsn;
WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
uint8_t *end, *p;
size_t recsize;
uint32_t i, rectype;
const char *fmt;
conn = S2C(session);
+ txn_global = &conn->txn_global;
txn = &session->txn;
ckpt_lsn = &txn->ckpt_lsn;
@@ -408,6 +410,15 @@ __wt_txn_checkpoint_log(
}
/*
+ * We take and immediately release the visibility lock.
+ * Acquiring the write lock guarantees that any transaction
+ * that has written to the log has also made its transaction
+ * visible at this time.
+ */
+ __wt_writelock(session, &txn_global->visibility_rwlock);
+ __wt_writeunlock(session, &txn_global->visibility_rwlock);
+
+ /*
* We need to make sure that the log records in the checkpoint
* LSN are on disk. In particular to make sure that the
* current log file exists.
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 9c02322c526..e19bbc73bb3 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -37,10 +37,10 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
* updated while rolling back, accessing it without a lock would
* violate protocol.
*/
- txn_global = &S2C(session)->txn_global;
- __wt_readlock(session, &txn_global->rwlock);
- __wt_timestamp_set(&rollback_timestamp, &txn_global->stable_timestamp);
- __wt_readunlock(session, &txn_global->rwlock);
+ txn_global = &conn->txn_global;
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ __wt_timestamp_set(
+ &rollback_timestamp, &txn_global->stable_timestamp));
__wt_las_cursor(session, &cursor, &session_flags);
@@ -120,11 +120,11 @@ __txn_abort_newer_update(WT_SESSION_IMPL *session,
}
/*
- * __txn_abort_newer_row_skip --
+ * __txn_abort_newer_insert --
* Apply the update abort check to each entry in an insert skip list
*/
static void
-__txn_abort_newer_row_skip(WT_SESSION_IMPL *session,
+__txn_abort_newer_insert(WT_SESSION_IMPL *session,
WT_INSERT_HEAD *head, wt_timestamp_t *rollback_timestamp)
{
WT_INSERT *ins;
@@ -134,6 +134,50 @@ __txn_abort_newer_row_skip(WT_SESSION_IMPL *session,
}
/*
+ * __txn_abort_newer_col_var --
+ * Abort updates on a variable length col leaf page with timestamps newer
+ * than the rollback timestamp.
+ */
+static void
+__txn_abort_newer_col_var(
+ WT_SESSION_IMPL *session, WT_PAGE *page, wt_timestamp_t *rollback_timestamp)
+{
+ WT_COL *cip;
+ WT_INSERT_HEAD *ins;
+ uint32_t i;
+
+ /* Review the changes to the original on-page data items */
+ WT_COL_FOREACH(page, cip, i)
+ if ((ins = WT_COL_UPDATE(page, cip)) != NULL)
+ __txn_abort_newer_insert(session,
+ ins, rollback_timestamp);
+
+ /* Review the append list */
+ if ((ins = WT_COL_APPEND(page)) != NULL)
+ __txn_abort_newer_insert(session, ins, rollback_timestamp);
+}
+
+/*
+ * __txn_abort_newer_col_fix --
+ * Abort updates on a fixed length col leaf page with timestamps newer than
+ * the rollback timestamp.
+ */
+static void
+__txn_abort_newer_col_fix(
+ WT_SESSION_IMPL *session, WT_PAGE *page, wt_timestamp_t *rollback_timestamp)
+{
+ WT_INSERT_HEAD *ins;
+
+ /* Review the changes to the original on-page data items */
+ if ((ins = WT_COL_UPDATE_SINGLE(page)) != NULL)
+ __txn_abort_newer_insert(session, ins, rollback_timestamp);
+
+ /* Review the append list */
+ if ((ins = WT_COL_APPEND(page)) != NULL)
+ __txn_abort_newer_insert(session, ins, rollback_timestamp);
+}
+
+/*
* __txn_abort_newer_row_leaf --
* Abort updates on a row leaf page with timestamps newer than the
* rollback timestamp.
@@ -152,8 +196,7 @@ __txn_abort_newer_row_leaf(
* page.
*/
if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL)
- __txn_abort_newer_row_skip(
- session, insert, rollback_timestamp);
+ __txn_abort_newer_insert(session, insert, rollback_timestamp);
/*
* Review updates that belong to keys that are on the disk image,
@@ -165,7 +208,7 @@ __txn_abort_newer_row_leaf(
session, upd, rollback_timestamp);
if ((insert = WT_ROW_INSERT(page, rip)) != NULL)
- __txn_abort_newer_row_skip(
+ __txn_abort_newer_insert(
session, insert, rollback_timestamp);
}
}
@@ -182,6 +225,13 @@ __txn_abort_newer_updates(
page = ref->page;
switch (page->type) {
+ case WT_PAGE_COL_FIX:
+ __txn_abort_newer_col_fix(session, page, rollback_timestamp);
+ break;
+ case WT_PAGE_COL_VAR:
+ __txn_abort_newer_col_var(session, page, rollback_timestamp);
+ break;
+ case WT_PAGE_COL_INT:
case WT_PAGE_ROW_INT:
/*
* There is nothing to do for internal pages, since we aren't
@@ -193,9 +243,7 @@ __txn_abort_newer_updates(
case WT_PAGE_ROW_LEAF:
__txn_abort_newer_row_leaf(session, page, rollback_timestamp);
break;
- default:
- WT_RET_MSG(session, EINVAL, "rollback_to_stable "
- "is only supported for row store btrees");
+ WT_ILLEGAL_VALUE(session);
}
return (0);
@@ -209,14 +257,11 @@ static int
__txn_rollback_to_stable_custom_skip(
WT_SESSION_IMPL *session, WT_REF *ref, void *context, bool *skipp)
{
- WT_UNUSED(session);
WT_UNUSED(context);
+ WT_UNUSED(session);
/* Review all pages that are in memory. */
- if (ref->state == WT_REF_MEM || ref->state == WT_REF_DELETED)
- *skipp = false;
- else
- *skipp = true;
+ *skipp = !(ref->state == WT_REF_MEM || ref->state == WT_REF_DELETED);
return (0);
}
@@ -284,8 +329,7 @@ __txn_rollback_to_stable_btree(
* Add the btree ID to the bitstring, so we can exclude any
* lookaside entries for this btree.
*/
- __bit_set(
- S2C(session)->stable_rollback_bitstring, btree->id);
+ __bit_set(S2C(session)->stable_rollback_bitstring, btree->id);
return (0);
}
@@ -297,19 +341,15 @@ __txn_rollback_to_stable_btree(
if (btree->root.page == NULL)
return (0);
- if (btree->type != BTREE_ROW)
- WT_RET_MSG(session, EINVAL, "rollback_to_stable "
- "is only supported for row store btrees");
-
/*
* Copy the stable timestamp, otherwise we'd need to lock it each time
* it's accessed. Even though the stable timestamp isn't supposed to be
* updated while rolling back, accessing it without a lock would
* violate protocol.
*/
- __wt_readlock(session, &txn_global->rwlock);
- __wt_timestamp_set(&rollback_timestamp, &txn_global->stable_timestamp);
- __wt_readunlock(session, &txn_global->rwlock);
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ __wt_timestamp_set(
+ &rollback_timestamp, &txn_global->stable_timestamp));
/*
* Ensure the eviction server is out of the file - we don't
@@ -333,15 +373,12 @@ static int
__txn_rollback_to_stable_check(WT_SESSION_IMPL *session)
{
WT_TXN_GLOBAL *txn_global;
- bool active_txns, stable_set;
+ bool txn_active;
txn_global = &S2C(session)->txn_global;
- __wt_readlock(session, &txn_global->rwlock);
- stable_set = !__wt_timestamp_iszero(&txn_global->stable_timestamp);
- __wt_readunlock(session, &txn_global->rwlock);
- if (!stable_set)
- WT_RET_MSG(session, EINVAL, "rollback_to_stable requires a "
- "stable timestamp");
+ if (!txn_global->has_stable_timestamp)
+ WT_RET_MSG(session, EINVAL,
+ "rollback_to_stable requires a stable timestamp");
/*
* Help the user - see if they have any active transactions. I'd
@@ -349,8 +386,8 @@ __txn_rollback_to_stable_check(WT_SESSION_IMPL *session)
* require peeking into all open sessions, which isn't really
* kosher.
*/
- WT_RET(__wt_txn_are_any_active(session, &active_txns));
- if (active_txns)
+ WT_RET(__wt_txn_activity_check(session, &txn_active));
+ if (txn_active)
WT_RET_MSG(session, EINVAL,
"rollback_to_stable illegal with active transactions");
@@ -369,9 +406,8 @@ __wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[])
#ifndef HAVE_TIMESTAMPS
WT_UNUSED(cfg);
- WT_RET_MSG(session, EINVAL, "rollback_to_stable "
- "requires a version of WiredTiger built with timestamp "
- "support");
+ WT_RET_MSG(session, ENOTSUP, "rollback_to_stable "
+ "requires a version of WiredTiger built with timestamp support");
#else
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index 9e4a1e200cc..275ef941490 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -10,6 +10,83 @@
#ifdef HAVE_TIMESTAMPS
/*
+ * __wt_timestamp_to_hex_string --
+ * Convert a timestamp to hex string representation.
+ */
+int
+__wt_timestamp_to_hex_string(
+ WT_SESSION_IMPL *session, char *hex_timestamp, const wt_timestamp_t *ts_src)
+{
+ wt_timestamp_t ts;
+
+ __wt_timestamp_set(&ts, ts_src);
+
+ if (__wt_timestamp_iszero(&ts)) {
+ hex_timestamp[0] = '0';
+ hex_timestamp[1] = '\0';
+ return (0);
+ }
+
+#if WT_TIMESTAMP_SIZE == 8
+ {
+ char *p, v;
+
+ for (p = hex_timestamp; ts.val != 0; ts.val >>= 4)
+ *p++ = (char)__wt_hex((u_char)(ts.val & 0x0f));
+ *p = '\0';
+
+ /* Reverse the string. */
+ for (--p; p > hex_timestamp;) {
+ v = *p;
+ *p-- = *hex_timestamp;
+ *hex_timestamp++ = v;
+ }
+ WT_UNUSED(session);
+ }
+#else
+ {
+ WT_ITEM hexts;
+ size_t len;
+ uint8_t *tsp;
+
+ /* Avoid memory allocation: set up an item guaranteed large enough. */
+ hexts.data = hexts.mem = hex_timestamp;
+ hexts.memsize = 2 * WT_TIMESTAMP_SIZE + 1;
+ /* Trim leading zeros. */
+ for (tsp = ts.ts, len = WT_TIMESTAMP_SIZE;
+ len > 0 && *tsp == 0;
+ ++tsp, --len)
+ ;
+ WT_RET(__wt_raw_to_hex(session, tsp, len, &hexts));
+ }
+#endif
+ return (0);
+}
+
+/*
+ * __wt_verbose_timestamp --
+ * Output a verbose message along with the specified timestamp
+ */
+void
+__wt_verbose_timestamp(WT_SESSION_IMPL *session,
+ const wt_timestamp_t *ts, const char *msg)
+{
+#ifdef HAVE_VERBOSE
+ char timestamp_buf[2 * WT_TIMESTAMP_SIZE + 1];
+
+ if (__wt_timestamp_to_hex_string(session, timestamp_buf, ts) != 0)
+ return;
+
+ __wt_verbose(session,
+ WT_VERB_TIMESTAMP, "Timestamp %s : %s", timestamp_buf, msg);
+#else
+ WT_UNUSED(session);
+ WT_UNUSED(ts);
+ WT_UNUSED(msg);
+#endif
+}
+
+/*
* __wt_txn_parse_timestamp --
* Decodes and sets a timestamp.
*/
@@ -25,7 +102,7 @@ __wt_txn_parse_timestamp(WT_SESSION_IMPL *session,
/* Protect against unexpectedly long hex strings. */
if (cval->len > 2 * WT_TIMESTAMP_SIZE)
WT_RET_MSG(session, EINVAL,
- "Failed to parse %s timestamp '%.*s': too long",
+ "%s timestamp too long '%.*s'",
name, (int)cval->len, cval->str);
#if WT_TIMESTAMP_SIZE == 8
@@ -119,10 +196,9 @@ __txn_global_query_timestamp(
if (WT_STRING_MATCH("all_committed", cval.str, cval.len)) {
if (!txn_global->has_commit_timestamp)
return (WT_NOTFOUND);
- __wt_readlock(session, &txn_global->rwlock);
- __wt_timestamp_set(&ts, &txn_global->commit_timestamp);
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ __wt_timestamp_set(&ts, &txn_global->commit_timestamp));
WT_ASSERT(session, !__wt_timestamp_iszero(&ts));
- __wt_readunlock(session, &txn_global->rwlock);
/* Compare with the oldest running transaction. */
__wt_readlock(session, &txn_global->commit_timestamp_rwlock);
@@ -157,9 +233,8 @@ __txn_global_query_timestamp(
} else if (WT_STRING_MATCH("stable", cval.str, cval.len)) {
if (!txn_global->has_stable_timestamp)
return (WT_NOTFOUND);
- __wt_readlock(session, &txn_global->rwlock);
- __wt_timestamp_set(&ts, &txn_global->stable_timestamp);
- __wt_readunlock(session, &txn_global->rwlock);
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ __wt_timestamp_set(&ts, &txn_global->stable_timestamp));
} else
WT_RET_MSG(session, EINVAL,
"unknown timestamp query %.*s", (int)cval.len, cval.str);
@@ -181,47 +256,7 @@ __wt_txn_global_query_timestamp(
wt_timestamp_t ts;
WT_RET(__txn_global_query_timestamp(session, &ts, cfg));
-
-#if WT_TIMESTAMP_SIZE == 8
- {
- char *p, v;
-
- for (p = hex_timestamp; ts.val != 0; ts.val >>= 4)
- *p++ = (char)__wt_hex((u_char)(ts.val & 0x0f));
- *p = '\0';
-
- /* Reverse the string. */
- for (--p; p > hex_timestamp;) {
- v = *p;
- *p-- = *hex_timestamp;
- *hex_timestamp++ = v;
- }
- }
-#else
- {
- WT_ITEM hexts;
- size_t len;
- uint8_t *tsp;
-
- /*
- * Keep clang-analyzer happy: it can't tell that ts will be set
- * whenever the call below succeeds.
- */
- __wt_timestamp_set_zero(&ts);
- WT_RET(__txn_global_query_timestamp(session, &ts, cfg));
-
- /* Avoid memory allocation: set up an item guaranteed large enough. */
- hexts.data = hexts.mem = hex_timestamp;
- hexts.memsize = 2 * WT_TIMESTAMP_SIZE + 1;
- /* Trim leading zeros. */
- for (tsp = ts.ts, len = WT_TIMESTAMP_SIZE;
- len > 0 && *tsp == 0;
- ++tsp, --len)
- ;
- WT_RET(__wt_raw_to_hex(session, tsp, len, &hexts));
- }
-#endif
- return (0);
+ return (__wt_timestamp_to_hex_string(session, hex_timestamp, &ts));
#else
WT_UNUSED(hex_timestamp);
WT_UNUSED(cfg);
@@ -253,9 +288,9 @@ __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session)
if (txn_global->oldest_is_pinned)
return (0);
- __wt_readlock(session, &txn_global->rwlock);
- __wt_timestamp_set(&oldest_timestamp, &txn_global->oldest_timestamp);
- __wt_readunlock(session, &txn_global->rwlock);
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ __wt_timestamp_set(
+ &oldest_timestamp, &txn_global->oldest_timestamp));
/* Scan to find the global pinned timestamp. */
if ((ret = __txn_global_query_timestamp(
@@ -276,6 +311,8 @@ __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session)
txn_global->oldest_is_pinned = __wt_timestamp_cmp(
&txn_global->pinned_timestamp,
&txn_global->oldest_timestamp) == 0;
+ __wt_verbose_timestamp(session,
+ &pinned_timestamp, "Updated pinned timestamp");
}
__wt_writeunlock(session, &txn_global->rwlock);
@@ -388,6 +425,8 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
if (has_commit) {
__wt_timestamp_set(&txn_global->commit_timestamp, &commit_ts);
txn_global->has_commit_timestamp = true;
+ __wt_verbose_timestamp(session, &commit_ts,
+ "Updated global commit timestamp");
}
if (has_oldest && (!txn_global->has_oldest_timestamp ||
@@ -396,6 +435,8 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
__wt_timestamp_set(&txn_global->oldest_timestamp, &oldest_ts);
txn_global->has_oldest_timestamp = true;
txn_global->oldest_is_pinned = false;
+ __wt_verbose_timestamp(session, &oldest_ts,
+ "Updated global oldest timestamp");
}
if (has_stable && (!txn_global->has_stable_timestamp ||
@@ -404,17 +445,18 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
__wt_timestamp_set(&txn_global->stable_timestamp, &stable_ts);
txn_global->has_stable_timestamp = true;
txn_global->stable_is_pinned = false;
+ __wt_verbose_timestamp(session, &stable_ts,
+ "Updated global stable timestamp");
}
__wt_writeunlock(session, &txn_global->rwlock);
if (has_oldest || has_stable)
WT_RET(__wt_txn_update_pinned_timestamp(session));
-
+ }
#else
WT_RET_MSG(session, EINVAL, "set_timestamp requires a "
"version of WiredTiger built with timestamp support");
#endif
- }
return (0);
}
diff --git a/src/third_party/wiredtiger/test/csuite/time_shift_test.sh b/src/third_party/wiredtiger/test/csuite/time_shift_test.sh
new file mode 100755
index 00000000000..ae06fd03f36
--- /dev/null
+++ b/src/third_party/wiredtiger/test/csuite/time_shift_test.sh
@@ -0,0 +1,116 @@
+#! /bin/sh
+
+set -e
+
+# the purpose of this test is to ensure we use monotonic clock instead of
+# realtime clock in our code. we had the instances where WT is hanging when
+# system clock shifts (for eg: due to NTP servers). this test calculates
+# the execution time of a test(test_rwlock), shifts the clock -vely by that
+# time period and reexecutes the test. if the difference in the two execution
+# times is less than 20% test is considered passed. 20% is selected, based on
+# assumption that other factors of the environment will influence the execution
+# time by less than 20%.
+
+
+# need to enable long tests to run test_rwlock
+export TESTUTIL_ENABLE_LONG_TESTS=1
+
+# We will run only when long tests are enabled.
+test "$TESTUTIL_ENABLE_LONG_TESTS" = "1" || exit 0
+
+EXIT_SUCCESS=0
+EXIT_FAILURE=1
+
+export DONT_FAKE_MONOTONIC=1
+RUN_OS=$(uname -s)
+
+# linux we run with cpu affinity, to control the execution time
+# if we don't control the execution time this test is not effective
+CPU_SET=0-1
+echo "test read write lock for time shifting using libfaketime"
+
+
+# check for program arguements, if not present, print usage
+if [ -z $1 ]
+then
+ echo "fail : this test needs libfaketime library with path"
+ echo "Usage :"
+ echo " " $0 " <libpath> [cpuset] "
+ echo " libpath : path to libfaketime library"
+ echo " cpuset : set of cpu's to be used for taskset on linux"
+ echo " : default is 0-1 "
+ exit $EXIT_FAILURE
+fi
+
+# check for the existence of dependent library
+if [ ! -r $1 ]
+then
+ echo "fail : $1 , libfaketime library is not readable"
+ exit $EXIT_FAILURE
+fi
+
+SEC1=`date +%s`
+if [ "$RUN_OS" = "Darwin" ]
+then
+ ./test_rwlock
+elif [ "$RUN_OS" = "Linux" ]
+then
+ if [ -z $2 ]
+ then
+ echo "default taskset value is 0-1"
+ else
+ CPU_SET=$2
+ fi
+ taskset -c $CPU_SET ./test_rwlock
+else
+ echo "not able to decide running OS, so exiting"
+ exit $EXIT_FAILURE
+fi
+
+SEC2=`date +%s`
+DIFF1=$((SEC2 - SEC1))
+
+# preload libfaketime
+if [ "$RUN_OS" = "Darwin" ]
+then
+ export DYLD_FORCE_FLAT_NAMESPACE=y
+ export DYLD_INSERT_LIBRARIES=$1
+ ./test_rwlock &
+else
+ LD_PRELOAD=$1 taskset -c $CPU_SET ./test_rwlock &
+fi
+
+# get pid of test run in background
+PID=$!
+
+sleep 5s
+echo "-$DIFF1""s" >| ~/.faketimerc
+
+wait $PID
+
+#kept echo statement here so as not to loose in cluster of test msgs.
+echo "after sleeping for 5 seconds set ~/.faketimerc value as -ve $DIFF1 seconds"
+rm ~/.faketimerc
+
+if [ "$RUN_OS" = "Darwin" ]
+then
+ export DYLD_FORCE_FLAT_NAMESPACE=
+ export DYLD_INSERT_LIBRARIES=
+fi
+SEC3=`date +%s`
+DIFF2=$((SEC3 - SEC2))
+
+PERC=$((((DIFF2 - DIFF1)*100)/DIFF1))
+echo "execution time difference : $PERC %, less than 20% is ok"
+echo "normal execution time : $DIFF1 seconds"
+echo "fake time reduction by : $DIFF1 seconds"
+echo "execution time with -ve time shift : $DIFF2 seconds"
+
+if [ "$PERC" -le 20 ]
+then
+ echo "pass : execution time is affected $PERC % by -ve time shift"
+ exit $EXIT_SUCCESS
+else
+ echo "fail : execution time is affected $PERC % by -ve time shift"
+ exit $EXIT_FAILURE
+fi
diff --git a/src/third_party/wiredtiger/test/csuite/wt3363_checkpoint_op_races/main.c b/src/third_party/wiredtiger/test/csuite/wt3363_checkpoint_op_races/main.c
index d007eb65382..9cb1ab0f4c6 100644
--- a/src/third_party/wiredtiger/test/csuite/wt3363_checkpoint_op_races/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt3363_checkpoint_op_races/main.c
@@ -81,7 +81,7 @@ main(int argc, char *argv[])
* This test should not run unless long tests flag is set. The test
* runs for 15 minutes.
*/
- if (!testutil_is_flag_set("WT3363_CHECKPOINT_OP_RACES"))
+ if (!testutil_is_flag_set("TESTUTIL_ENABLE_TIMING_TESTS"))
return (EXIT_SUCCESS);
opts = &_opts;
diff --git a/src/third_party/wiredtiger/test/format/backup.c b/src/third_party/wiredtiger/test/format/backup.c
index 47f3c54325f..0dc7402e181 100644
--- a/src/third_party/wiredtiger/test/format/backup.c
+++ b/src/third_party/wiredtiger/test/format/backup.c
@@ -36,6 +36,7 @@ static void
check_copy(void)
{
WT_CONNECTION *conn;
+ WT_DECL_RET;
WT_SESSION *session;
wts_open(g.home_backup, false, &conn);
@@ -44,9 +45,14 @@ check_copy(void)
conn->open_session(conn, NULL, NULL, &session),
"%s", g.home_backup);
- testutil_checkfmt(
- session->verify(session, g.uri, NULL),
- "%s: %s", g.home_backup, g.uri);
+ /*
+ * Verify can return EBUSY if the handle isn't available. Don't yield
+ * and retry, in the case of LSM, the handle may not be available for
+ * a long time.
+ */
+ ret = session->verify(session, g.uri, NULL);
+ testutil_assertfmt(ret == 0 || ret == EBUSY,
+ "WT_SESSION.verify: %s: %s", g.home_backup, g.uri);
testutil_checkfmt(conn->close(conn, NULL), "%s", g.home_backup);
}
diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c
index 6a58cad5403..031e3bb25af 100644
--- a/src/third_party/wiredtiger/test/format/wts.c
+++ b/src/third_party/wiredtiger/test/format/wts.c
@@ -584,10 +584,14 @@ wts_verify(const char *tag)
testutil_check(conn->set_timestamp(conn, config_buf));
}
- /* Session operations for LSM can return EBUSY. */
+ /*
+ * Verify can return EBUSY if the handle isn't available. Don't yield
+ * and retry, in the case of LSM, the handle may not be available for
+ * a long time.
+ */
ret = session->verify(session, g.uri, "strict");
- if (ret != 0 && !(ret == EBUSY && DATASOURCE("lsm")))
- testutil_die(ret, "session.verify: %s: %s", g.uri, tag);
+ testutil_assertfmt(
+ ret == 0 || ret == EBUSY, "session.verify: %s: %s", g.uri, tag);
if (g.logging != 0)
(void)g.wt_api->msg_printf(g.wt_api, session,
diff --git a/src/third_party/wiredtiger/test/recovery/Makefile.am b/src/third_party/wiredtiger/test/recovery/Makefile.am
index 3e7fce17d0e..298b9a995b8 100644
--- a/src/third_party/wiredtiger/test/recovery/Makefile.am
+++ b/src/third_party/wiredtiger/test/recovery/Makefile.am
@@ -2,12 +2,17 @@ AM_CPPFLAGS = -I$(top_builddir)
AM_CPPFLAGS +=-I$(top_srcdir)/src/include
AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
-noinst_PROGRAMS = random-abort truncated-log
+noinst_PROGRAMS = random-abort timestamp-abort truncated-log
random_abort_SOURCES = random-abort.c
random_abort_LDADD = $(top_builddir)/test/utility/libtest_util.la
random_abort_LDADD +=$(top_builddir)/libwiredtiger.la
random_abort_LDFLAGS = -static
+timestamp_abort_SOURCES = timestamp-abort.c
+timestamp_abort_LDADD = $(top_builddir)/test/utility/libtest_util.la
+timestamp_abort_LDADD +=$(top_builddir)/libwiredtiger.la
+timestamp_abort_LDFLAGS = -static
+
truncated_log_SOURCES = truncated-log.c
truncated_log_LDADD = $(top_builddir)/test/utility/libtest_util.la
truncated_log_LDADD +=$(top_builddir)/libwiredtiger.la
diff --git a/src/third_party/wiredtiger/test/recovery/smoke.sh b/src/third_party/wiredtiger/test/recovery/smoke.sh
index ba4d77c642b..6587c7c9f98 100755
--- a/src/third_party/wiredtiger/test/recovery/smoke.sh
+++ b/src/third_party/wiredtiger/test/recovery/smoke.sh
@@ -8,4 +8,8 @@ $TEST_WRAPPER ./random-abort -t 10 -T 5
$TEST_WRAPPER ./random-abort -m -t 10 -T 5
$TEST_WRAPPER ./random-abort -C -t 10 -T 5
$TEST_WRAPPER ./random-abort -C -m -t 10 -T 5
+$TEST_WRAPPER ./timestamp-abort -t 10 -T 5
+$TEST_WRAPPER ./timestamp-abort -m -t 10 -T 5
+$TEST_WRAPPER ./timestamp-abort -C -t 10 -T 5
+$TEST_WRAPPER ./timestamp-abort -C -m -t 10 -T 5
$TEST_WRAPPER ./truncated-log
diff --git a/src/third_party/wiredtiger/test/recovery/timestamp-abort.c b/src/third_party/wiredtiger/test/recovery/timestamp-abort.c
new file mode 100644
index 00000000000..7e912b1fe26
--- /dev/null
+++ b/src/third_party/wiredtiger/test/recovery/timestamp-abort.c
@@ -0,0 +1,722 @@
+/*-
+ * Public Domain 2014-2017 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "test_util.h"
+
+#include <sys/wait.h>
+#include <signal.h>
+
+static char home[1024]; /* Program working dir */
+
+/*
+ * Create three tables that we will write the same data to and verify that
+ * all the types of usage have the expected data in them after a crash and
+ * recovery. We want:
+ * 1. A table that is logged and is not involved in timestamps. This table
+ * simulates a user local table.
+ * 2. A table that is logged and involved in timestamps. This simulates
+ * the oplog.
+ * 3. A table that is not logged and involved in timestamps. This simulates
+ * a typical collection file.
+ *
+ * We also create a fourth table that is not logged and not involved directly
+ * in timestamps to store the stable timestamp. That way we can know what the
+ * latest stable timestamp is on checkpoint.
+ *
+ * We also create several files that are not WiredTiger tables. The checkpoint
+ * thread creates a file indicating that a checkpoint has completed. The parent
+ * process uses this to know when at least one checkpoint is done and it can
+ * start the timer to abort.
+ *
+ * Each worker thread creates its own records file that records the data it
+ * inserted and it records the timestamp that was used for that insertion.
+ */
+static const char * const uri_local = "table:local";
+static const char * const uri_oplog = "table:oplog";
+static const char * const uri_collection = "table:collection";
+
+static const char * const stable_store = "table:stable";
+static const char * const ckpt_file = "checkpoint_done";
+static bool compat, inmem, use_ts;
+static uint64_t global_ts = 1;
+
+#define MAX_TH 12
+#define MAX_TIME 40
+#define MIN_TH 5
+#define MIN_TIME 10
+#define RECORDS_FILE "records-%" PRIu32
+#define STABLE_PERIOD 100
+
+#define ENV_CONFIG_COMPAT ",compatibility=(release=\"2.9\")"
+#define ENV_CONFIG_DEF \
+ "create,log=(archive=false,file_max=10M,enabled)"
+#define ENV_CONFIG_TXNSYNC \
+ "create,log=(archive=false,file_max=10M,enabled)," \
+ "transaction_sync=(enabled,method=none)"
+#define ENV_CONFIG_REC "log=(archive=false,recover=on)"
+
+#define MAX_CKPT_INTERVAL 5 /* Maximum interval between checkpoints */
+#define MAX_VAL 1024
+
+static void usage(void)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void
+usage(void)
+{
+ fprintf(stderr,
+ "usage: %s [-h dir] [-T threads] [-t time] [-Cmvz]\n", progname);
+ exit(EXIT_FAILURE);
+}
+
+typedef struct {
+ WT_CONNECTION *conn;
+ uint64_t start;
+ uint32_t id;
+} WT_THREAD_DATA;
+
+/*
+ * thread_ckpt_run --
+ * Runner function for the checkpoint thread.
+ */
+static WT_THREAD_RET
+thread_ckpt_run(void *arg)
+{
+ FILE *fp;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ WT_THREAD_DATA *td;
+ uint64_t ts;
+ uint32_t sleep_time;
+ int i, ret;
+ bool first_ckpt;
+
+ __wt_random_init(&rnd);
+
+ td = (WT_THREAD_DATA *)arg;
+ /*
+ * Keep a separate file with the records we wrote for checking.
+ */
+ (void)unlink(ckpt_file);
+ if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "WT_CONNECTION:open_session");
+ first_ckpt = true;
+ ts = 0;
+ for (i = 0; ;++i) {
+ sleep_time = __wt_random(&rnd) % MAX_CKPT_INTERVAL;
+ sleep(sleep_time);
+ if (use_ts)
+ ts = global_ts;
+ /*
+ * Since this is the default, send in this string even if
+ * running without timestamps.
+ */
+ testutil_check(session->checkpoint(
+ session, "use_timestamp=true"));
+ printf("Checkpoint %d complete. Minimum ts %" PRIu64 "\n",
+ i, ts);
+ fflush(stdout);
+ /*
+ * Create the checkpoint file so that the parent process knows
+ * at least one checkpoint has finished and can start its
+ * timer.
+ */
+ if (first_ckpt) {
+ testutil_checksys((fp = fopen(ckpt_file, "w")) == NULL);
+ first_ckpt = false;
+ testutil_checksys(fclose(fp) != 0);
+ }
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * thread_run --
+ * Runner function for the worker threads.
+ */
+static WT_THREAD_RET
+thread_run(void *arg)
+{
+ FILE *fp;
+ WT_CURSOR *cur_coll, *cur_local, *cur_oplog, *cur_stable;
+ WT_ITEM data;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ WT_THREAD_DATA *td;
+ uint64_t i, stable_ts;
+ int ret;
+ char cbuf[MAX_VAL], lbuf[MAX_VAL], obuf[MAX_VAL];
+ char kname[64], tscfg[64];
+
+ __wt_random_init(&rnd);
+ memset(cbuf, 0, sizeof(cbuf));
+ memset(lbuf, 0, sizeof(lbuf));
+ memset(obuf, 0, sizeof(obuf));
+ memset(kname, 0, sizeof(kname));
+
+ td = (WT_THREAD_DATA *)arg;
+ /*
+ * Set up the separate file for checking.
+ */
+ testutil_check(__wt_snprintf(cbuf, sizeof(cbuf), RECORDS_FILE, td->id));
+ (void)unlink(cbuf);
+ testutil_checksys((fp = fopen(cbuf, "w")) == NULL);
+ /*
+ * Set to line buffering. But that is advisory only. We've seen
+ * cases where the result files end up with partial lines.
+ */
+ __wt_stream_set_line_buffer(fp);
+ if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "WT_CONNECTION:open_session");
+ /*
+ * Open a cursor to each table.
+ */
+ if ((ret = session->open_cursor(session,
+ uri_collection, NULL, NULL, &cur_coll)) != 0)
+ testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_collection);
+ if ((ret = session->open_cursor(session,
+ uri_local, NULL, NULL, &cur_local)) != 0)
+ testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_local);
+ if ((ret = session->open_cursor(session,
+ uri_oplog, NULL, NULL, &cur_oplog)) != 0)
+ testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_oplog);
+
+ if ((ret = session->open_cursor(
+ session, stable_store, NULL, NULL, &cur_stable)) != 0)
+ testutil_die(ret, "WT_SESSION.open_cursor: %s", stable_store);
+
+ /*
+ * Write our portion of the key space until we're killed.
+ */
+ printf("Thread %" PRIu32 " starts at %" PRIu64 "\n", td->id, td->start);
+ for (i = td->start; ; ++i) {
+ if (use_ts)
+ stable_ts = global_ts++;
+ else
+ stable_ts = 0;
+ testutil_check(__wt_snprintf(
+ kname, sizeof(kname), "%" PRIu64, i));
+
+ testutil_check(session->begin_transaction(session, NULL));
+ cur_coll->set_key(cur_coll, kname);
+ cur_local->set_key(cur_local, kname);
+ cur_oplog->set_key(cur_oplog, kname);
+ /*
+ * Put an informative string into the value so that it
+ * can be viewed well in a binary dump.
+ */
+ testutil_check(__wt_snprintf(cbuf, sizeof(cbuf),
+ "COLL: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64,
+ td->id, stable_ts, i));
+ testutil_check(__wt_snprintf(lbuf, sizeof(lbuf),
+ "LOCAL: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64,
+ td->id, stable_ts, i));
+ testutil_check(__wt_snprintf(obuf, sizeof(obuf),
+ "OPLOG: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64,
+ td->id, stable_ts, i));
+ data.size = __wt_random(&rnd) % MAX_VAL;
+ data.data = cbuf;
+ cur_coll->set_value(cur_coll, &data);
+ if ((ret = cur_coll->insert(cur_coll)) != 0)
+ testutil_die(ret, "WT_CURSOR.insert");
+ data.size = __wt_random(&rnd) % MAX_VAL;
+ data.data = obuf;
+ cur_oplog->set_value(cur_oplog, &data);
+ if ((ret = cur_oplog->insert(cur_oplog)) != 0)
+ testutil_die(ret, "WT_CURSOR.insert");
+ if (use_ts) {
+ testutil_check(__wt_snprintf(tscfg, sizeof(tscfg),
+ "commit_timestamp=%" PRIx64, stable_ts));
+ testutil_check(
+ session->commit_transaction(session, tscfg));
+ } else
+ testutil_check(
+ session->commit_transaction(session, NULL));
+ /*
+ * Insert into the local table outside the timestamp txn.
+ */
+ data.size = __wt_random(&rnd) % MAX_VAL;
+ data.data = lbuf;
+ cur_local->set_value(cur_local, &data);
+ if ((ret = cur_local->insert(cur_local)) != 0)
+ testutil_die(ret, "WT_CURSOR.insert");
+
+ /*
+ * Every N records we will record our stable timestamp into the
+ * stable table. That will define our threshold where we
+ * expect to find records after recovery.
+ */
+ if (i % STABLE_PERIOD == 0) {
+ if (use_ts) {
+ /*
+ * Set both the oldest and stable timestamp
+ * so that we don't need to maintain read
+ * availability at older timestamps.
+ */
+ testutil_check(__wt_snprintf(
+ tscfg, sizeof(tscfg),
+ "oldest_timestamp=%" PRIx64
+ ",stable_timestamp=%" PRIx64,
+ stable_ts, stable_ts));
+ testutil_check(
+ td->conn->set_timestamp(td->conn, tscfg));
+ }
+ cur_stable->set_key(cur_stable, td->id);
+ cur_stable->set_value(cur_stable, stable_ts);
+ testutil_check(cur_stable->insert(cur_stable));
+ }
+ /*
+ * Save the timestamp and key separately for checking later.
+ */
+ if (fprintf(fp,
+ "%" PRIu64 " %" PRIu64 "\n", stable_ts, i) < 0)
+ testutil_die(EIO, "fprintf");
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Child process creates the database and table, and then creates worker
+ * threads to add data until it is killed by the parent.
+ */
+static void run_workload(uint32_t)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void
+run_workload(uint32_t nth)
+{
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ WT_THREAD_DATA *td;
+ wt_thread_t *thr;
+ uint32_t i;
+ int ret;
+ char envconf[512];
+
+ thr = dcalloc(nth+1, sizeof(*thr));
+ td = dcalloc(nth+1, sizeof(WT_THREAD_DATA));
+ if (chdir(home) != 0)
+ testutil_die(errno, "Child chdir: %s", home);
+ if (inmem)
+ strcpy(envconf, ENV_CONFIG_DEF);
+ else
+ strcpy(envconf, ENV_CONFIG_TXNSYNC);
+ if (compat)
+ strcat(envconf, ENV_CONFIG_COMPAT);
+
+ if ((ret = wiredtiger_open(NULL, NULL, envconf, &conn)) != 0)
+ testutil_die(ret, "wiredtiger_open");
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "WT_CONNECTION:open_session");
+ /*
+ * Create all the tables.
+ */
+ if ((ret = session->create(session, uri_collection,
+ "key_format=S,value_format=u,log=(enabled=false)")) != 0)
+ testutil_die(ret, "WT_SESSION.create: %s", uri_collection);
+ if ((ret = session->create(session,
+ uri_local, "key_format=S,value_format=u")) != 0)
+ testutil_die(ret, "WT_SESSION.create: %s", uri_local);
+ if ((ret = session->create(session,
+ uri_oplog, "key_format=S,value_format=u")) != 0)
+ testutil_die(ret, "WT_SESSION.create: %s", uri_oplog);
+ /*
+ * Don't log the stable timestamp table so that we know what timestamp
+ * was stored at the checkpoint.
+ */
+ if ((ret = session->create(session, stable_store,
+ "key_format=Q,value_format=Q,log=(enabled=false)")) != 0)
+ testutil_die(ret, "WT_SESSION.create: %s", stable_store);
+ if ((ret = session->close(session, NULL)) != 0)
+ testutil_die(ret, "WT_SESSION:close");
+
+ /*
+ * Thread 0 is the checkpoint thread.
+ */
+ td[0].conn = conn;
+ td[0].id = 0;
+ printf("Create checkpoint thread\n");
+ testutil_check(__wt_thread_create(
+ NULL, &thr[0], thread_ckpt_run, &td[0]));
+ for (i = 1; i <= nth; ++i) {
+ td[i].conn = conn;
+ td[i].start = (UINT64_MAX / nth) * (i - 1);
+ td[i].id = i;
+ testutil_check(__wt_thread_create(
+ NULL, &thr[i], thread_run, &td[i]));
+ }
+ /*
+ * The threads never exit, so the child will just wait here until
+ * it is killed.
+ */
+ printf("Create %" PRIu32 " writer threads\n", nth);
+ fflush(stdout);
+ for (i = 0; i <= nth; ++i)
+ testutil_check(__wt_thread_join(NULL, thr[i]));
+ /*
+ * NOTREACHED
+ */
+ free(thr);
+ free(td);
+ exit(EXIT_SUCCESS);
+}
+
+extern int __wt_optind;
+extern char *__wt_optarg;
+
+int
+main(int argc, char *argv[])
+{
+ struct stat sb;
+ FILE *fp;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cur_coll, *cur_local, *cur_oplog, *cur_stable;
+ WT_RAND_STATE rnd;
+ WT_SESSION *session;
+ pid_t pid;
+ uint64_t absent_coll, absent_local, absent_oplog, count, key, last_key;
+ uint64_t first_miss, middle_coll, middle_local, middle_oplog;
+ uint64_t stable_fp, stable_val, val[MAX_TH+1];
+ uint32_t i, nth, timeout;
+ int ch, status, ret;
+ const char *working_dir;
+ char buf[128], fname[64], kname[64], statname[1024];
+ bool fatal, rand_th, rand_time, verify_only;
+
+ (void)testutil_set_progname(argv);
+
+ compat = inmem = false;
+ use_ts = true;
+ nth = MIN_TH;
+ rand_th = rand_time = true;
+ timeout = MIN_TIME;
+ verify_only = false;
+ working_dir = "WT_TEST.timestamp-abort";
+
+ while ((ch = __wt_getopt(progname, argc, argv, "Ch:mT:t:vz")) != EOF)
+ switch (ch) {
+ case 'C':
+ compat = true;
+ break;
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ case 'm':
+ inmem = true;
+ break;
+ case 'T':
+ rand_th = false;
+ nth = (uint32_t)atoi(__wt_optarg);
+ break;
+ case 't':
+ rand_time = false;
+ timeout = (uint32_t)atoi(__wt_optarg);
+ break;
+ case 'v':
+ verify_only = true;
+ break;
+ case 'z':
+ use_ts = false;
+ break;
+ default:
+ usage();
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
+ if (argc != 0)
+ usage();
+
+ testutil_work_dir_from_path(home, sizeof(home), working_dir);
+ /*
+ * If the user wants to verify they need to tell us how many threads
+ * there were so we can find the old record files.
+ */
+ if (verify_only && rand_th) {
+ fprintf(stderr,
+ "Verify option requires specifying number of threads\n");
+ exit (EXIT_FAILURE);
+ }
+ if (!verify_only) {
+ testutil_make_work_dir(home);
+
+ __wt_random_init_seed(NULL, &rnd);
+ if (rand_time) {
+ timeout = __wt_random(&rnd) % MAX_TIME;
+ if (timeout < MIN_TIME)
+ timeout = MIN_TIME;
+ }
+ if (rand_th) {
+ nth = __wt_random(&rnd) % MAX_TH;
+ if (nth < MIN_TH)
+ nth = MIN_TH;
+ }
+ printf("Parent: compatibility: %s, "
+ "in-mem log sync: %s, timestamp in use: %s\n",
+ compat ? "true" : "false",
+ inmem ? "true" : "false",
+ use_ts ? "true" : "false");
+ printf("Parent: Create %" PRIu32
+ " threads; sleep %" PRIu32 " seconds\n", nth, timeout);
+ /*
+ * Fork a child to insert as many items. We will then randomly
+ * kill the child, run recovery and make sure all items we wrote
+ * exist after recovery runs.
+ */
+ testutil_checksys((pid = fork()) < 0);
+
+ if (pid == 0) { /* child */
+ run_workload(nth);
+ return (EXIT_SUCCESS);
+ }
+
+ /* parent */
+ /*
+ * Sleep for the configured amount of time before killing
+ * the child. Start the timeout from the time we notice that
+ * the file has been created. That allows the test to run
+ * correctly on really slow machines. Verify the process ID
+ * still exists in case the child aborts for some reason we
+ * don't stay in this loop forever.
+ */
+ testutil_check(__wt_snprintf(
+ statname, sizeof(statname), "%s/%s", home, ckpt_file));
+ while (stat(statname, &sb) != 0 && kill(pid, 0) == 0)
+ sleep(1);
+ sleep(timeout);
+
+ /*
+ * !!! It should be plenty long enough to make sure more than
+ * one log file exists. If wanted, that check would be added
+ * here.
+ */
+ printf("Kill child\n");
+ testutil_checksys(kill(pid, SIGKILL) != 0);
+ testutil_checksys(waitpid(pid, &status, 0) == -1);
+ }
+ /*
+ * !!! If we wanted to take a copy of the directory before recovery,
+ * this is the place to do it.
+ */
+ if (chdir(home) != 0)
+ testutil_die(errno, "parent chdir: %s", home);
+ testutil_check(__wt_snprintf(buf, sizeof(buf),
+ "rm -rf ../%s.SAVE && mkdir ../%s.SAVE && cp -rp * ../%s.SAVE",
+ home, home, home));
+ (void)system(buf);
+ printf("Open database, run recovery and verify content\n");
+
+ /*
+ * Open the connection which forces recovery to be run.
+ */
+ if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn)) != 0)
+ testutil_die(ret, "wiredtiger_open");
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "WT_CONNECTION:open_session");
+ /*
+ * Open a cursor on all the tables.
+ */
+ if ((ret = session->open_cursor(session,
+ uri_collection, NULL, NULL, &cur_coll)) != 0)
+ testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_collection);
+ if ((ret = session->open_cursor(session,
+ uri_local, NULL, NULL, &cur_local)) != 0)
+ testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_local);
+ if ((ret = session->open_cursor(session,
+ uri_oplog, NULL, NULL, &cur_oplog)) != 0)
+ testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_oplog);
+ if ((ret = session->open_cursor(session,
+ stable_store, NULL, NULL, &cur_stable)) != 0)
+ testutil_die(ret, "WT_SESSION.open_cursor: %s", stable_store);
+
+ /*
+ * Find the biggest stable timestamp value that was saved.
+ */
+ stable_val = 0;
+ memset(val, 0, sizeof(val));
+ while (cur_stable->next(cur_stable) == 0) {
+ cur_stable->get_key(cur_stable, &key);
+ cur_stable->get_value(cur_stable, &val[key]);
+ if (val[key] > stable_val)
+ stable_val = val[key];
+
+ if (use_ts)
+ printf("Stable: key %" PRIu64 " value %" PRIu64 "\n",
+ key, val[key]);
+ }
+ if (use_ts)
+ printf("Got stable_val %" PRIu64 "\n", stable_val);
+
+ count = 0;
+ absent_coll = absent_local = absent_oplog = 0;
+ fatal = false;
+ for (i = 1; i <= nth; ++i) {
+ first_miss = middle_coll = middle_local = middle_oplog = 0;
+ testutil_check(__wt_snprintf(
+ fname, sizeof(fname), RECORDS_FILE, i));
+ if ((fp = fopen(fname, "r")) == NULL)
+ testutil_die(errno, "fopen: %s", fname);
+
+ /*
+ * For every key in the saved file, verify that the key exists
+ * in the table after recovery. If we're doing in-memory
+ * log buffering we never expect a record missing in the middle,
+ * but records may be missing at the end. If we did
+ * write-no-sync, we expect every key to have been recovered.
+ */
+ for (last_key = UINT64_MAX;; ++count, last_key = key) {
+ ret = fscanf(fp, "%" SCNu64 "%" SCNu64 "\n",
+ &stable_fp, &key);
+ if (ret != EOF && ret != 2) {
+ /*
+ * If we find a partial line, consider it
+ * like an EOF.
+ */
+ if (ret == 1 || ret == 0)
+ break;
+ testutil_die(errno, "fscanf");
+ }
+ if (ret == EOF)
+ break;
+ /*
+ * If we're unlucky, the last line may be a partially
+ * written key at the end that can result in a false
+ * negative error for a missing record. Detect it.
+ */
+ if (last_key != UINT64_MAX && key != last_key + 1) {
+ printf("%s: Ignore partial record %" PRIu64
+ " last valid key %" PRIu64 "\n",
+ fname, key, last_key);
+ break;
+ }
+ testutil_check(__wt_snprintf(
+ kname, sizeof(kname), "%" PRIu64, key));
+ cur_coll->set_key(cur_coll, kname);
+ cur_local->set_key(cur_local, kname);
+ cur_oplog->set_key(cur_oplog, kname);
+ /*
+ * The collection table should always only have the
+ * data as of the checkpoint.
+ */
+ if ((ret = cur_coll->search(cur_coll)) != 0) {
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "search");
+ /*
+ * If we don't find a record, the stable
+ * timestamp written to our file better be
+ * larger than the saved one.
+ */
+ if (!inmem &&
+ stable_fp != 0 && stable_fp <= val[i]) {
+ printf("%s: COLLECTION no record with "
+ "key %" PRIu64 " record ts %" PRIu64
+ " <= stable ts %" PRIu64 "\n",
+ fname, key, stable_fp, val[i]);
+ absent_coll++;
+ }
+ if (middle_coll == 0)
+ first_miss = key;
+ middle_coll = key;
+ } else if (middle_coll != 0) {
+ /*
+ * We should never find an existing key after
+ * we have detected one missing.
+ */
+ printf("%s: COLLECTION after absent records %"
+ PRIu64 "-%" PRIu64 " key %" PRIu64
+ " exists\n",
+ fname, first_miss, middle_coll, key);
+ fatal = true;
+ }
+ /*
+ * The local table should always have all data.
+ */
+ if ((ret = cur_local->search(cur_local)) != 0) {
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "search");
+ if (!inmem)
+ printf("%s: LOCAL no record with key %"
+ PRIu64 "\n", fname, key);
+ absent_local++;
+ middle_local = key;
+ } else if (middle_local != 0) {
+ /*
+ * We should never find an existing key after
+ * we have detected one missing.
+ */
+ printf("%s: LOCAL after absent record at %"
+ PRIu64 " key %" PRIu64 " exists\n",
+ fname, middle_local, key);
+ fatal = true;
+ }
+ /*
+ * The oplog table should always have all data.
+ */
+ if ((ret = cur_oplog->search(cur_oplog)) != 0) {
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "search");
+ if (!inmem)
+ printf("%s: OPLOG no record with key %"
+ PRIu64 "\n", fname, key);
+ absent_oplog++;
+ middle_oplog = key;
+ } else if (middle_oplog != 0) {
+ /*
+ * We should never find an existing key after
+ * we have detected one missing.
+ */
+ printf("%s: OPLOG after absent record at %"
+ PRIu64 " key %" PRIu64 " exists\n",
+ fname, middle_oplog, key);
+ fatal = true;
+ }
+ }
+ testutil_checksys(fclose(fp) != 0);
+ }
+ if ((ret = conn->close(conn, NULL)) != 0)
+ testutil_die(ret, "WT_CONNECTION:close");
+ if (fatal)
+ return (EXIT_FAILURE);
+ if (!inmem && absent_coll) {
+ printf("COLLECTION: %" PRIu64
+ " record(s) absent from %" PRIu64 "\n",
+ absent_coll, count);
+ fatal = true;
+ }
+ if (!inmem && absent_local) {
+ printf("LOCAL: %" PRIu64 " record(s) absent from %" PRIu64 "\n",
+ absent_local, count);
+ fatal = true;
+ }
+ if (!inmem && absent_oplog) {
+ printf("OPLOG: %" PRIu64 " record(s) absent from %" PRIu64 "\n",
+ absent_oplog, count);
+ fatal = true;
+ }
+ if (fatal)
+ return (EXIT_FAILURE);
+ printf("%" PRIu64 " records verified\n", count);
+ return (EXIT_SUCCESS);
+}
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp01.py b/src/third_party/wiredtiger/test/suite/test_timestamp01.py
index a934753488d..c8938296908 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp01.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp01.py
@@ -52,7 +52,7 @@ class test_timestamp01(wttest.WiredTigerTestCase, suite_subprocess):
self.session.begin_transaction()
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda: self.session.commit_transaction(
- 'commit_timestamp=' + timestamp_str(1 << 100)),
+ 'commit_timestamp=' + timestamp_str(1 << 5000)),
'/too long/')
# One is okay, as is 2**64 - 1
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp03.py b/src/third_party/wiredtiger/test/suite/test_timestamp03.py
index 734961e9e98..728200e528a 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp03.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp03.py
@@ -62,7 +62,6 @@ class test_timestamp03(wttest.WiredTigerTestCase, suite_subprocess):
('use_ts_def', dict(ckptcfg='', val='none')),
('use_ts_false', dict(ckptcfg='use_timestamp=false', val='all')),
('use_ts_true', dict(ckptcfg='use_timestamp=true', val='none')),
- ('read_ts', dict(ckptcfg='read_timestamp', val='none')),
]
conncfg = [
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp04.py b/src/third_party/wiredtiger/test/suite/test_timestamp04.py
index 146326834db..3af0feed31b 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp04.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp04.py
@@ -30,8 +30,6 @@
# Timestamps: Test that rollback_to_stable obeys expected visibility rules
#
-import datetime
-import random
from suite_subprocess import suite_subprocess
import wiredtiger, wttest
from wtscenario import make_scenarios
@@ -50,9 +48,10 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
uri = 'table:' + tablename
scenarios = make_scenarios([
- #('col', dict(extra_config=',key_format=r')),
- #('lsm', dict(extra_config=',type=lsm')),
- ('row', dict(extra_config=',memory_page_max=32k,leaf_page_max=8k,internal_page_max=8k')),
+ ('col_fix', dict(empty=1, extra_config=',key_format=r, value_format=8t')),
+ ('col_var', dict(empty=0, extra_config=',key_format=r')),
+ #('lsm', dict(empty=0, extra_config=',type=lsm')),
+ ('row', dict(empty=0, extra_config='')),
])
# Rollback only works for non-durable tables
@@ -65,17 +64,21 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
session.begin_transaction(txn_config)
c = session.open_cursor(self.uri, None)
if missing == False:
- actual = dict((k, v) for k, v, pad in c if v != 0)
+ actual = dict((k, v) for k, v in c if v != 0)
#print expected
#print actual
self.assertEqual(actual, expected)
# Search for the expected items as well as iterating
for k, v in expected.iteritems():
if missing == False:
- self.assertEqual(c[k][0], v, "for key " + str(k))
+ self.assertEqual(c[k], v, "for key " + str(k))
else:
c.set_key(k)
- self.assertEqual(c.search(), wiredtiger.WT_NOTFOUND)
+ if self.empty:
+ # Fixed-length column-store rows always exist.
+ self.assertEqual(c.search(), 0)
+ else:
+ self.assertEqual(c.search(), wiredtiger.WT_NOTFOUND)
c.close()
if txn_config:
session.commit_transaction()
@@ -87,7 +90,8 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
# Configure small page sizes to ensure eviction comes through and we have a
# somewhat complex tree
self.session.create(self.uri,
- 'key_format=i,value_format=iS,memory_page_max=16k,leaf_page_max=8k' + self.extra_config)
+ 'key_format=i,value_format=i,memory_page_max=32k,leaf_page_max=8k,internal_page_max=8k'
+ + self.extra_config)
c = self.session.open_cursor(self.uri)
# Insert keys each with timestamp=key, in some order
@@ -96,7 +100,7 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
for k in keys:
self.session.begin_transaction()
- c[k] = (1, 'the quick brown fox')
+ c[k] = 1
self.session.commit_transaction('commit_timestamp=' + timestamp_str(k))
# Setup an oldest timestamp to ensure state remains in cache.
if k == 1:
@@ -119,7 +123,7 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
# Update the values again in preparation for rolling back more
for k in keys:
self.session.begin_transaction()
- c[k] = (2, 'jumped over the lazy dog')
+ c[k] = 2
self.session.commit_transaction('commit_timestamp=' + timestamp_str(k + key_range))
# Now we should have: keys 1-100 with value 2
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp05.py b/src/third_party/wiredtiger/test/suite/test_timestamp05.py
new file mode 100644
index 00000000000..d7131cb2004
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp05.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2017 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_timestamp05.py
+# Timestamps: make sure they don't end up in metadata
+#
+
+from helper import copy_wiredtiger_home
+import random
+from suite_subprocess import suite_subprocess
+import wiredtiger, wttest
+from wtscenario import make_scenarios
+
+def timestamp_str(t):
+ return '%x' % t
+
+def timestamp_ret_str(t):
+ s = timestamp_str(t)
+ if len(s) % 2 == 1:
+ s = '0' + s
+ return s
+
+class test_timestamp05(wttest.WiredTigerTestCase, suite_subprocess):
+ uri = 'table:ts05'
+
+ def test_create(self):
+ if not wiredtiger.timestamp_build():
+ self.skipTest('requires a timestamp build')
+
+ s = self.session
+ conn = self.conn
+
+ # Start timestamps at 50
+ conn.set_timestamp('oldest_timestamp=50,stable_timestamp=50')
+
+ # Commit at 100
+ s.begin_transaction()
+ s.create(self.uri, 'key_format=i,value_format=S')
+ s.commit_transaction('commit_timestamp=' + timestamp_str(100))
+
+ # Make sure the tree is dirty
+ c = s.open_cursor(self.uri)
+ c[200] = 'new value'
+
+ # Checkpoint at 50
+ s.checkpoint('use_timestamp=true')
+
+ def test_bulk(self):
+ if not wiredtiger.timestamp_build():
+ self.skipTest('requires a timestamp build')
+
+ s = self.session
+ conn = self.conn
+
+ s.create(self.uri, 'key_format=i,value_format=S')
+ c = s.open_cursor(self.uri, None, 'bulk')
+
+ # Insert keys 1..100 each with timestamp=key, in some order
+ nkeys = 100
+ keys = range(1, nkeys+1)
+
+ for k in keys:
+ c[k] = 'some value'
+
+ # Start timestamps at 50
+ conn.set_timestamp('oldest_timestamp=50,stable_timestamp=50')
+
+ # Commit at 100
+ s.begin_transaction()
+ c.close()
+ s.commit_transaction('commit_timestamp=' + timestamp_str(100))
+
+ # Make sure the tree is dirty
+ c = s.open_cursor(self.uri)
+ c[200] = 'new value'
+
+ # Checkpoint at 50
+ s.checkpoint('use_timestamp=true')
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp07.py b/src/third_party/wiredtiger/test/suite/test_timestamp07.py
new file mode 100644
index 00000000000..c1f70e0cb1a
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp07.py
@@ -0,0 +1,284 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2017 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_timestamp07.py
+# Timestamps: checkpoints and eviction
+#
+
+from helper import copy_wiredtiger_home
+import random
+from suite_subprocess import suite_subprocess
+import wiredtiger, wttest
+from wiredtiger import stat
+from wtscenario import make_scenarios
+
+def timestamp_str(t):
+ return '%x' % t
+
+class test_timestamp07(wttest.WiredTigerTestCase, suite_subprocess):
+ tablename = 'ts07_ts_nologged'
+ tablename2 = 'ts07_nots_logged'
+ tablename3 = 'ts07_ts_logged'
+
+ types = [
+ ('file', dict(uri='file:', use_cg=False, use_index=False)),
+ ('table-cg', dict(uri='table:', use_cg=True, use_index=False)),
+ ]
+
+ conncfg = [
+ ('nolog', dict(conn_config='create,cache_size=1M,statistics=(fast)', using_log=False)),
+ ('log', dict(conn_config='create,log=(enabled),cache_size=1M,statistics=(fast)', using_log=True)),
+ ]
+
+ nkeys = [
+ ('100keys', dict(nkeys=100,evicts=False)),
+ ('500keys', dict(nkeys=500,evicts=True)),
+# ('1000keys', dict(nkeys=1000,evicts=True)),
+ ]
+
+ scenarios = make_scenarios(types, conncfg, nkeys)
+
+ modified_evicted = 0
+
+ # Binary values.
+ value = u'\u0001\u0002abcd\u0007\u0004'
+ value2 = u'\u0001\u0002dcba\u0007\u0004'
+ value3 = u'\u0001\u0002cdef\u0007\u0004'
+
+ # Check that a cursor (optionally started in a new transaction), sees the
+ # expected values.
+ def check(self, session, txn_config, expected):
+ if txn_config:
+ #print "Check: txn_config:"
+ #print txn_config
+ session.begin_transaction(txn_config)
+ c = session.open_cursor(self.uri + self.tablename, None)
+ actual = dict((k, v) for k, v in c if v != 0)
+ self.maxDiff = None
+ #print "Expected:"
+ #print expected
+ #print "Actual:"
+ #print actual
+ self.assertEqual(actual, expected)
+ # Search for the expected items as well as iterating
+ for k, v in expected.iteritems():
+ self.assertEqual(c[k], v, "for key " + str(k))
+ c.close()
+ if txn_config:
+ session.commit_transaction()
+ #
+ # Take a backup of the database and verify that the value we want to
+ # check exists in the tables the expected number of times.
+ #
+ def backup_check(self, check_value, valcnt, valcnt2, valcnt3):
+ newdir = "BACKUP"
+ copy_wiredtiger_home('.', newdir, True)
+
+ conn = self.setUpConnectionOpen(newdir)
+ session = self.setUpSessionOpen(conn)
+ c = session.open_cursor(self.uri + self.tablename, None)
+ c2 = session.open_cursor(self.uri + self.tablename2, None)
+ c3 = session.open_cursor(self.uri + self.tablename3, None)
+ # Count how many times the second value is present
+ count = 0
+ for k, v in c:
+ if check_value in str(v):
+ # print "check_value found in key " + str(k)
+ count += 1
+ c.close()
+ # Count how many times the second value is present in the
+ # non-timestamp table.
+ count2 = 0
+ for k, v in c2:
+ if check_value in str(v):
+ # print "check_value found in key " + str(k)
+ count2 += 1
+ c2.close()
+ # Count how many times the second value is present in the
+ # logged timestamp table.
+ count3 = 0
+ for k, v in c3:
+ if check_value in str(v):
+ count3 += 1
+ c3.close()
+ conn.close()
+ # print "CHECK BACKUP: Count " + str(count) + " Count2 " + str(count2) + " Count3 " + str(count3)
+ # print "CHECK BACKUP: Expect value2 count " + str(valcnt)
+ # print "CHECK BACKUP: 2nd table Expect value2 count " + str(valcnt2)
+ # print "CHECK BACKUP: 3rd table Expect value2 count " + str(valcnt3)
+ self.assertEqual(count, valcnt)
+ self.assertEqual(count2, valcnt2)
+ self.assertEqual(count3, valcnt3)
+
+ # Return whether or not eviction happened since the last call.
+ def check_eviction(self):
+ # Get a statistics cursor and look at the number of dirty pages
+ # evicted. Keep track of the last read value so we can determine
+ # if the value changed since the last call to this function.
+ stat_cursor = self.session.open_cursor('statistics:', None, None)
+ evict_dirty = stat_cursor[stat.conn.cache_eviction_dirty][2]
+
+ # Return True if the new value is more, False otherwise.
+ #print "Old: " + str(self.modified_evicted)
+ # print "New: " + str(evict_dirty)
+ did_eviction = self.modified_evicted < evict_dirty
+ stat_cursor.close()
+ self.modified_evicted = evict_dirty
+ # print "Evict ret: " + str(ret)
+
+ # XXX we can't guarantee that eviction will always happen, but make
+ # sure it doesn't happen if not expected.
+ self.assertTrue(not did_eviction or self.evicts)
+
+ # Check that a cursor sees the expected values after a checkpoint.
+ def ckpt_backup(self, check_value, valcnt, valcnt2, valcnt3):
+
+ # Take a checkpoint. Make a copy of the database. Open the
+ # copy and verify whether or not the expected data is in there.
+ ckptcfg = 'use_timestamp=true'
+ self.session.checkpoint(ckptcfg)
+ self.backup_check(check_value, valcnt, valcnt2, valcnt3)
+
+ def test_timestamp07(self):
+ if not wiredtiger.timestamp_build():
+ self.skipTest('requires a timestamp build')
+
+ uri = self.uri + self.tablename
+ uri2 = self.uri + self.tablename2
+ uri3 = self.uri + self.tablename3
+ #
+ # Open three tables:
+ # 1. Table is not logged and uses timestamps.
+ # 2. Table is logged and does not use timestamps.
+ # 3. Table is logged and uses timestamps.
+ #
+ self.session.create(uri, 'key_format=i,value_format=S,log=(enabled=false)')
+ c = self.session.open_cursor(uri)
+ self.session.create(uri2, 'key_format=i,value_format=S')
+ c2 = self.session.open_cursor(uri2)
+ self.session.create(uri3, 'key_format=i,value_format=S')
+ c3 = self.session.open_cursor(uri3)
+
+ # Insert keys 1..nkeys each with timestamp=key, in some order.
+ orig_keys = range(1, self.nkeys+1)
+ keys = orig_keys[:]
+ random.shuffle(keys)
+
+ for k in keys:
+ c2[k] = self.value
+ self.session.begin_transaction()
+ c[k] = self.value
+ c3[k] = self.value
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(k))
+
+ self.check_eviction()
+ # Now check that we see the expected state when reading at each
+ # timestamp.
+ for i, t in enumerate(orig_keys):
+ self.check(self.session, 'read_timestamp=' + timestamp_str(t),
+ dict((k, self.value) for k in orig_keys[:i+1]))
+
+ # Bump the oldest timestamp, we're not going back...
+ self.assertEqual(self.conn.query_timestamp(), timestamp_str(self.nkeys))
+ self.oldts = timestamp_str(self.nkeys)
+ self.conn.set_timestamp('oldest_timestamp=' + self.oldts)
+ self.conn.set_timestamp('stable_timestamp=' + self.oldts)
+ # print "Oldest " + self.oldts
+
+ # Update them and retry.
+ random.shuffle(keys)
+ count = 0
+ for k in keys:
+ # Make sure a timestamp cursor is the last one to update. This
+ # tests the scenario for a bug we found where recovery replayed
+ # the last record written into the log.
+ #
+ # print "Key " + str(k) + " to value2"
+ c2[k] = self.value2
+ self.session.begin_transaction()
+ c[k] = self.value2
+ c3[k] = self.value2
+ ts = timestamp_str(k + self.nkeys)
+ self.session.commit_transaction('commit_timestamp=' + ts)
+ # print "Commit key " + str(k) + " ts " + ts
+ count += 1
+
+ self.check_eviction()
+
+ # print "Updated " + str(count) + " keys to value2"
+
+ # Take a checkpoint using the given configuration. Then verify
+ # whether value2 appears in a copy of that data or not.
+ valcnt2 = valcnt3 = self.nkeys
+ valcnt = 0
+ self.ckpt_backup(self.value2, valcnt, valcnt2, valcnt3)
+ # Update the stable timestamp to the latest, but not the oldest
+ # timestamp and make sure we can see the data. Once the stable
+ # timestamp is moved we should see all keys with value2.
+ self.conn.set_timestamp('stable_timestamp=' + \
+ timestamp_str(self.nkeys*2))
+ self.ckpt_backup(self.value2, self.nkeys, self.nkeys, self.nkeys)
+
+ # If we're not using the log we're done.
+ if not self.using_log:
+ return
+
+ # Update the key and retry. This time take a backup and recover.
+ random.shuffle(keys)
+ count = 0
+ for k in keys:
+ # Make sure a timestamp cursor is the last one to update. This
+ # tests the scenario for a bug we found where recovery replayed
+ # the last record written into the log.
+ #
+ # print "Key " + str(k) + " to value3"
+ c2[k] = self.value3
+ self.session.begin_transaction()
+ c[k] = self.value3
+ c3[k] = self.value3
+ ts = timestamp_str(k + self.nkeys*2)
+ self.session.commit_transaction('commit_timestamp=' + ts)
+ # print "Commit key " + str(k) + " ts " + ts
+ count += 1
+
+ self.check_eviction()
+ # print "Updated " + str(count) + " keys to value3"
+
+ # Flush the log but don't checkpoint
+ self.session.log_flush('sync=on')
+
+ # Take a backup and then verify whether value3 appears in a copy
+ # of that data or not. Both tables that are logged should see
+ # all the data regardless of timestamps. The table that is not
+ # logged should not see any of it.
+ valcnt = 0
+ valcnt2 = valcnt3 = self.nkeys
+ self.backup_check(self.value3, valcnt, valcnt2, valcnt3)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/utility/test_util.h b/src/third_party/wiredtiger/test/utility/test_util.h
index e53018ad4ea..7500df8d5e5 100644
--- a/src/third_party/wiredtiger/test/utility/test_util.h
+++ b/src/third_party/wiredtiger/test/utility/test_util.h
@@ -117,6 +117,18 @@ typedef struct {
} while (0)
/*
+ * testutil_checksys --
+ * Complain and quit if a function call fails, returning errno. The error
+ * test must be specified, not just the call, because system calls fail in a
+ * variety of ways.
+ */
+#define testutil_checksys(call) do { \
+ if (call) \
+ testutil_die( \
+ errno, "%s/%d: %s", __func__, __LINE__, #call); \
+} while (0)
+
+/*
* testutil_checkfmt --
* Complain and quit if a function call fails, with additional arguments.
*/