summaryrefslogtreecommitdiff
path: root/src/third_party
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2018-03-29 09:36:51 +1100
committerLuke Chen <luke.chen@mongodb.com>2018-03-29 09:36:51 +1100
commit55b2be7e2d7c5262670b4375e34dd49b95dc63ef (patch)
tree0d085a9e469df598faefef57924205d7cff9aa51 /src/third_party
parentf511a790ffe197de7cedc9c6c7c16aca8054b11b (diff)
downloadmongo-55b2be7e2d7c5262670b4375e34dd49b95dc63ef.tar.gz
Import wiredtiger: dc58dd84dba0bdd358f8cc09b17f90c7659ac429 from branch mongodb-3.6
ref: 4d5794b937..dc58dd84db for: 3.6.4 WT-3869 Bi-weekly WT codebase lint WT-3913 Enhance cursor operations to account for prepare state WT-3950 Add some rollback_to_stable statistics WT-3958 Add query API to get most recent checkpoint's stable timestamp WT-3969 Enhance format tester to account for prepare state WT-3972 Allow more than 64K cursors to be open on a data source simultaneously WT-3975 Arg format mismatch after rwlock changes WT-3977 Print out actual checkpoint stable timestamp in timestamp_abort WT-3979 Fix warnings generated with newer Doxygen releases WT-3980 Failure returning a modified update without a backing "real" update WT-3982 Fix transaction visibility bugs related to lookaside usage. WT-3985 Pre-allocated log files accumulate on Windows WT-3987 Avoid reading lookaside pages in truncate fast path WT-3990 Fix Coverity warnings mostly in test programs
Diffstat (limited to 'src/third_party')
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py3
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_prototypes3
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok3
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py3
-rw-r--r--src/third_party/wiredtiger/ext/test/fail_fs/fail_fs.c10
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/block/block_open.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c70
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c71
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c175
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_delete.c242
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_io.c3
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c7
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_random.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c20
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c3
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_walk.c3
-rw-r--r--src/third_party/wiredtiger/src/cache/cache_las.c25
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c4
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache_pool.c7
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_log.c5
-rw-r--r--src/third_party/wiredtiger/src/docs/Doxyfile64
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c78
-rw-r--r--src/third_party/wiredtiger/src/include/api.h3
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h4
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i32
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.h21
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.i14
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h26
-rw-r--r--src/third_party/wiredtiger/src/include/extern_posix.h4
-rw-r--r--src/third_party/wiredtiger/src/include/extern_win.h8
-rw-r--r--src/third_party/wiredtiger/src/include/misc.h5
-rw-r--r--src/third_party/wiredtiger/src/include/mutex.h5
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h3
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h1
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i96
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in71
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_cursor.c2
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_tree.c13
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_work_unit.c7
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_fs.c2
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_dir.c23
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_utf8.c4
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c17
-rw-r--r--src/third_party/wiredtiger/src/support/mtx_rw.c41
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c12
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c3
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c17
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c3
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c5
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_load.c10
-rw-r--r--src/third_party/wiredtiger/test/bloom/test_bloom.c22
-rw-r--r--src/third_party/wiredtiger/test/csuite/rwlock/main.c4
-rw-r--r--src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c17
-rw-r--r--src/third_party/wiredtiger/test/format/config.c8
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c411
-rw-r--r--src/third_party/wiredtiger/test/suite/test_bug019.py82
-rw-r--r--src/third_party/wiredtiger/test/suite/test_cursor13.py2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_cursor14.py60
-rw-r--r--src/third_party/wiredtiger/test/suite/test_prepare04.py122
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp04.py22
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp10.py4
62 files changed, 1263 insertions, 750 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 17fe0d97735..f5e0b4a67a3 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -1371,7 +1371,8 @@ methods = {
\c oldest_timestamp and the read timestamps of all active readers, and
\c stable returns the most recent \c stable_timestamp set with
WT_CONNECTION::set_timestamp. See @ref transaction_timestamps''',
- choices=['all_committed','oldest','pinned','recovery','stable']),
+ choices=['all_committed','last_checkpoint',
+ 'oldest','pinned','recovery','stable']),
]),
'WT_CONNECTION.set_timestamp' : Method([
diff --git a/src/third_party/wiredtiger/dist/s_prototypes b/src/third_party/wiredtiger/dist/s_prototypes
index 20e08eb4c54..75863cf8f87 100755
--- a/src/third_party/wiredtiger/dist/s_prototypes
+++ b/src/third_party/wiredtiger/dist/s_prototypes
@@ -42,7 +42,8 @@ proto()
-e x \
-e '}' \
-e '# Add the warn_unused_result attribute to any external' \
- -e '# functions that return an int.' \
+ -e '# functions that return a boolean or an int.' \
+ -e '/^extern bool /s/$/ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result))/' \
-e '/^extern int /s/$/ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result))/' \
-e 's/$/;/' \
-e p < $1
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index aa68e584376..7330f560eb6 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -138,6 +138,7 @@ FULLFSYNC
Facebook
FindClose
FindFirstFile
+FindNextFileW
Fixup
Fk
FlushFileBuffers
@@ -523,7 +524,7 @@ ccr
cd
centric
cfg
-cfkos
+cfko
change's
changelog
chdir
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index a630ebe3fa9..1441187812e 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -518,6 +518,9 @@ connection_stats = [
TxnStat('txn_read_queue_inserts', 'read timestamp queue inserts total'),
TxnStat('txn_read_queue_len', 'read timestamp queue length'),
TxnStat('txn_rollback', 'transactions rolled back'),
+ TxnStat('txn_rollback_las_removed', 'rollback to stable updates removed from lookaside'),
+ TxnStat('txn_rollback_to_stable', 'rollback to stable calls'),
+ TxnStat('txn_rollback_upd_aborted', 'rollback to stable updates aborted'),
TxnStat('txn_set_ts', 'set timestamp calls'),
TxnStat('txn_set_ts_commit', 'set timestamp commit calls'),
TxnStat('txn_set_ts_commit_upd', 'set timestamp commit updates'),
diff --git a/src/third_party/wiredtiger/ext/test/fail_fs/fail_fs.c b/src/third_party/wiredtiger/ext/test/fail_fs/fail_fs.c
index bde1bfc48bf..bdb4669a637 100644
--- a/src/third_party/wiredtiger/ext/test/fail_fs/fail_fs.c
+++ b/src/third_party/wiredtiger/ext/test/fail_fs/fail_fs.c
@@ -452,16 +452,14 @@ fail_fs_directory_list(WT_FILE_SYSTEM *file_system,
* matter if the list is a bit longer than necessary.
*/
if (count >= allocated) {
- p = realloc(
- entries, (allocated + 10) * sizeof(*entries));
- if (p == NULL) {
+ allocated += 10;
+ if ((p = realloc(
+ entries, allocated * sizeof(*entries))) == NULL) {
ret = ENOMEM;
goto err;
}
entries = p;
- memset(entries + allocated * sizeof(*entries),
- 0, 10 * sizeof(*entries));
- allocated += 10;
+ memset(entries + count, 0, 10 * sizeof(*entries));
}
entries[count++] = strdup(name);
}
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 17424bdbfda..5b74711461d 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "4d5794b93795d52af97dc150d81b637442b89c5d",
+ "commit": "dc58dd84dba0bdd358f8cc09b17f90c7659ac429",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-3.6"
diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c
index 7c7e0e5c525..32e13acfa83 100644
--- a/src/third_party/wiredtiger/src/block/block_open.c
+++ b/src/third_party/wiredtiger/src/block/block_open.c
@@ -256,7 +256,7 @@ __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block)
conn = S2C(session);
__wt_verbose(session, WT_VERB_BLOCK,
- "close: %s", block->name == NULL ? "" : block->name );
+ "close: %s", block->name == NULL ? "" : block->name);
__wt_spin_lock(session, &conn->block_lock);
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index 6737af9996b..63d2cda4714 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -54,12 +54,17 @@ __cursor_fix_append_next(WT_CURSOR_BTREE *cbt, bool newpage)
* insert is aborted, we simply return zero (empty), regardless of
* whether we are at the end of the data.
*/
- if (cbt->recno < WT_INSERT_RECNO(cbt->ins) ||
- (upd = __wt_txn_read(session, cbt->ins->upd)) == NULL) {
+ if (cbt->recno < WT_INSERT_RECNO(cbt->ins)) {
cbt->v = 0;
cbt->iface.value.data = &cbt->v;
- } else
- cbt->iface.value.data = upd->data;
+ } else {
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd == NULL) {
+ cbt->v = 0;
+ cbt->iface.value.data = &cbt->v;
+ } else
+ cbt->iface.value.data = upd->data;
+ }
cbt->iface.value.size = 1;
return (0);
}
@@ -79,6 +84,7 @@ __cursor_fix_next(WT_CURSOR_BTREE *cbt, bool newpage)
session = (WT_SESSION_IMPL *)cbt->iface.session;
btree = S2BT(session);
page = cbt->ref->page;
+ upd = NULL;
/* Initialize for each new page. */
if (newpage) {
@@ -101,7 +107,8 @@ new_page:
cbt->ins_head, cbt->ins_stack, cbt->next_stack, cbt->recno);
if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins))
cbt->ins = NULL;
- upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd);
+ if (cbt->ins != NULL)
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
if (upd == NULL) {
cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt);
cbt->iface.value.data = &cbt->v;
@@ -134,7 +141,8 @@ new_page: if (cbt->ins == NULL)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
- if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL)
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd == NULL)
continue;
if (upd->type == WT_UPDATE_TOMBSTONE) {
if (upd->txnid != WT_TXN_NONE &&
@@ -193,8 +201,9 @@ new_page: /* Find the matching WT_COL slot. */
/* Check any insert list for a matching record. */
cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
- upd = cbt->ins == NULL ?
- NULL : __wt_txn_read(session, cbt->ins->upd);
+ upd = NULL;
+ if (cbt->ins != NULL)
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
if (upd != NULL) {
if (upd->type == WT_UPDATE_TOMBSTONE) {
if (upd->txnid != WT_TXN_NONE &&
@@ -311,7 +320,8 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage)
cbt->ins = WT_SKIP_NEXT(cbt->ins);
new_insert: if ((ins = cbt->ins) != NULL) {
- if ((upd = __wt_txn_read(session, ins->upd)) == NULL)
+ WT_RET(__wt_txn_read(session, ins->upd, &upd));
+ if (upd == NULL)
continue;
if (upd->type == WT_UPDATE_TOMBSTONE) {
if (upd->txnid != WT_TXN_NONE &&
@@ -344,7 +354,7 @@ new_insert: if ((ins = cbt->ins) != NULL) {
cbt->slot = cbt->row_iteration_slot / 2 - 1;
rip = &page->pg_row[cbt->slot];
- upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip));
+ WT_RET(__wt_txn_read(session, WT_ROW_UPDATE(page, rip), &upd));
if (upd != NULL && upd->type == WT_UPDATE_TOMBSTONE) {
if (upd->txnid != WT_TXN_NONE &&
__wt_txn_upd_visible_all(session, upd))
@@ -571,8 +581,9 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
WT_DECL_RET;
WT_PAGE *page;
WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
uint32_t flags;
- bool newpage;
+ bool newpage, valid;
cursor = &cbt->iface;
session = (WT_SESSION_IMPL *)cbt->iface.session;
@@ -582,6 +593,26 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ /*
+ * In case of retrying a next operation due to a prepare conflict,
+ * cursor would have been already positioned at an update structure
+ * which resulted in conflict. So, now when retrying we should examine
+ * the same update again instead of starting from the next one in the
+ * update chain.
+ */
+ F_CLR(cbt, WT_CBT_RETRY_PREV);
+ if (F_ISSET(cbt, WT_CBT_RETRY_NEXT)) {
+ WT_RET(__wt_cursor_valid(cbt, &upd, &valid));
+ F_CLR(cbt, WT_CBT_RETRY_NEXT);
+ if (valid) {
+ /*
+ * If the update, which returned prepared conflict is
+ * visible, return the value.
+ */
+ return (__cursor_kv_return(session, cbt, upd));
+ }
+ }
+
WT_RET(__cursor_func_init(cbt, false));
/*
@@ -663,15 +694,24 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND);
}
-
#ifdef HAVE_DIAGNOSTIC
if (ret == 0)
WT_ERR(__wt_cursor_key_order_check(session, cbt, true));
#endif
- if (ret == 0)
+err: switch (ret) {
+ case 0:
F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
-
-err: if (ret != 0)
+ break;
+ case WT_PREPARE_CONFLICT:
+ /*
+ * If prepare conflict occurs, cursor should not be reset,
+ * as current cursor position will be reused in case of a
+ * retry from user.
+ */
+ F_SET(cbt, WT_CBT_RETRY_NEXT);
+ break;
+ default:
WT_TRET(__cursor_reset(cbt));
+ }
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index 068a9915ab9..3356baeb24a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -199,13 +199,18 @@ __cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage)
* created records written by reconciliation are deleted and so can be
* never seen by a read.
*/
- if (cbt->ins == NULL ||
- cbt->recno > WT_INSERT_RECNO(cbt->ins) ||
- (upd = __wt_txn_read(session, cbt->ins->upd)) == NULL) {
+ if (cbt->ins == NULL || cbt->recno > WT_INSERT_RECNO(cbt->ins)) {
cbt->v = 0;
cbt->iface.value.data = &cbt->v;
- } else
- cbt->iface.value.data = upd->data;
+ } else {
+ upd = NULL;
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd == NULL) {
+ cbt->v = 0;
+ cbt->iface.value.data = &cbt->v;
+ } else
+ cbt->iface.value.data = upd->data;
+ }
cbt->iface.value.size = 1;
return (0);
}
@@ -247,7 +252,9 @@ new_page:
cbt->ins_head, cbt->ins_stack, cbt->next_stack, cbt->recno);
if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins))
cbt->ins = NULL;
- upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd);
+ upd = NULL;
+ if (cbt->ins != NULL)
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
if (upd == NULL) {
cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt);
cbt->iface.value.data = &cbt->v;
@@ -280,7 +287,8 @@ new_page: if (cbt->ins == NULL)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
- if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL)
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd == NULL)
continue;
if (upd->type == WT_UPDATE_TOMBSTONE) {
if (upd->txnid != WT_TXN_NONE &&
@@ -340,8 +348,9 @@ new_page: if (cbt->recno < cbt->ref->ref_recno)
/* Check any insert list for a matching record. */
cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
- upd = cbt->ins == NULL ?
- NULL : __wt_txn_read(session, cbt->ins->upd);
+ upd = NULL;
+ if (cbt->ins != NULL)
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
if (upd != NULL) {
if (upd->type == WT_UPDATE_TOMBSTONE) {
if (upd->txnid != WT_TXN_NONE &&
@@ -468,7 +477,8 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage)
WT_RET(__cursor_skip_prev(cbt));
new_insert: if ((ins = cbt->ins) != NULL) {
- if ((upd = __wt_txn_read(session, ins->upd)) == NULL)
+ WT_RET(__wt_txn_read(session, ins->upd, &upd));
+ if (upd == NULL)
continue;
if (upd->type == WT_UPDATE_TOMBSTONE) {
if (upd->txnid != WT_TXN_NONE &&
@@ -503,7 +513,7 @@ new_insert: if ((ins = cbt->ins) != NULL) {
cbt->slot = cbt->row_iteration_slot / 2 - 1;
rip = &page->pg_row[cbt->slot];
- upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip));
+ WT_RET(__wt_txn_read(session, WT_ROW_UPDATE(page, rip), &upd));
if (upd != NULL && upd->type == WT_UPDATE_TOMBSTONE) {
if (upd->txnid != WT_TXN_NONE &&
__wt_txn_upd_visible_all(session, upd))
@@ -526,8 +536,9 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
WT_DECL_RET;
WT_PAGE *page;
WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
uint32_t flags;
- bool newpage;
+ bool newpage, valid;
cursor = &cbt->iface;
session = (WT_SESSION_IMPL *)cbt->iface.session;
@@ -537,6 +548,26 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ /*
+ * In case of retrying a prev operation due to a prepare conflict,
+ * cursor would have been already positioned at an update structure
+ * which resulted in conflict. So, now when retrying we should examine
+ * the same update again instead of starting from the next one in the
+ * update chain.
+ */
+ F_CLR(cbt, WT_CBT_RETRY_NEXT);
+ if (F_ISSET(cbt, WT_CBT_RETRY_PREV)) {
+ WT_RET(__wt_cursor_valid(cbt, &upd, &valid));
+ F_CLR(cbt, WT_CBT_RETRY_PREV);
+ if (valid) {
+ /*
+ * If the update, which returned prepared conflict is
+ * visible, return the value.
+ */
+ return (__cursor_kv_return(session, cbt, upd));
+ }
+ }
+
WT_RET(__cursor_func_init(cbt, false));
/*
@@ -622,10 +653,20 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
if (ret == 0)
WT_ERR(__wt_cursor_key_order_check(session, cbt, false));
#endif
- if (ret == 0)
+err: switch (ret) {
+ case 0:
F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
-
-err: if (ret != 0)
+ break;
+ case WT_PREPARE_CONFLICT:
+ /*
+ * If prepare conflict occurs, cursor should not be reset,
+ * as current cursor position will be reused in case of a
+ * retry from user.
+ */
+ F_SET(cbt, WT_CBT_RETRY_PREV);
+ break;
+ default:
WT_TRET(__cursor_reset(cbt));
+ }
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 29725e22b2c..9a30ee2c1a4 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -205,8 +205,8 @@ __cursor_fix_implicit(WT_BTREE *btree, WT_CURSOR_BTREE *cbt)
* __wt_cursor_valid --
* Return if the cursor references an valid key/value pair.
*/
-bool
-__wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
+int
+__wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *valid)
{
WT_BTREE *btree;
WT_CELL *cell;
@@ -215,11 +215,12 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
WT_SESSION_IMPL *session;
WT_UPDATE *upd;
+ if (updp != NULL)
+ *updp = NULL;
+ *valid = false;
btree = cbt->btree;
page = cbt->ref->page;
session = (WT_SESSION_IMPL *)cbt->iface.session;
- if (updp != NULL)
- *updp = NULL;
/*
* We may be pointing to an insert object, and we may have a page with
@@ -265,13 +266,16 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
* First, check for an insert object with a visible update (a visible
* update that's been deleted is not a valid key/value pair).
*/
- if (cbt->ins != NULL &&
- (upd = __wt_txn_read(session, cbt->ins->upd)) != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE)
- return (false);
- if (updp != NULL)
- *updp = upd;
- return (true);
+ if (cbt->ins != NULL) {
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd != NULL) {
+ if (upd->type == WT_UPDATE_TOMBSTONE)
+ return (0);
+ if (updp != NULL)
+ *updp = upd;
+ *valid = true;
+ return (0);
+ }
}
/*
@@ -290,7 +294,7 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
* keys, check for retrieval past the end of the page.
*/
if (cbt->recno >= cbt->ref->ref_recno + page->entries)
- return (false);
+ return (0);
/*
* An update would have appeared as an "insert" object; no
@@ -300,7 +304,7 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
case BTREE_COL_VAR:
/* The search function doesn't check for empty pages. */
if (page->entries == 0)
- return (false);
+ return (0);
WT_ASSERT(session, cbt->slot < page->entries);
/*
@@ -309,7 +313,7 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
* returned on-page object must be checked for a match.
*/
if (cbt->ins != NULL && !F_ISSET(cbt, WT_CBT_VAR_ONPAGE_MATCH))
- return (false);
+ return (0);
/*
* Although updates would have appeared as an "insert" objects,
@@ -320,12 +324,12 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
cip = &page->pg_var[cbt->slot];
if ((cell = WT_COL_PTR(page, cip)) == NULL ||
__wt_cell_type(cell) == WT_CELL_DEL)
- return (false);
+ return (0);
break;
case BTREE_ROW:
/* The search function doesn't check for empty pages. */
if (page->entries == 0)
- return (false);
+ return (0);
WT_ASSERT(session, cbt->slot < page->entries);
/*
@@ -333,34 +337,23 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
* key as an on-page object, we're done.
*/
if (cbt->ins != NULL)
- return (false);
+ return (0);
/* Check for an update. */
if (page->modify != NULL &&
- page->modify->mod_row_update != NULL &&
- (upd = __wt_txn_read(session,
- page->modify->mod_row_update[cbt->slot])) != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE)
- return (false);
- if (updp != NULL)
- *updp = upd;
+ page->modify->mod_row_update != NULL) {
+ WT_RET(__wt_txn_read(session,
+ page->modify->mod_row_update[cbt->slot], &upd));
+ if (upd != NULL) {
+ if (upd->type == WT_UPDATE_TOMBSTONE)
+ return (0);
+ if (updp != NULL)
+ *updp = upd;
+ }
}
break;
}
- return (true);
-}
-
-/*
- * __cursor_kv_return --
- * Return a page referenced key/value pair to the application.
- */
-static inline int
-__cursor_kv_return(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
-{
- WT_RET(__wt_key_return(session, cbt));
- WT_RET(__wt_value_return(session, cbt, upd));
-
+ *valid = true;
return (0);
}
@@ -512,7 +505,10 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
WT_ERR(btree->type == BTREE_ROW ?
__cursor_row_search(session, cbt, cbt->ref, false) :
__cursor_col_search(session, cbt, cbt->ref));
- valid = cbt->compare == 0 && __wt_cursor_valid(cbt, &upd);
+
+ /* Return, if prepare conflict encountered. */
+ if (cbt->compare == 0)
+ WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
}
if (!valid) {
WT_ERR(__cursor_func_init(cbt, true));
@@ -520,7 +516,10 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
WT_ERR(btree->type == BTREE_ROW ?
__cursor_row_search(session, cbt, NULL, false) :
__cursor_col_search(session, cbt, NULL));
- valid = cbt->compare == 0 && __wt_cursor_valid(cbt, &upd);
+
+ /* Return, if prepare conflict encountered. */
+ if (cbt->compare == 0)
+ WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
}
if (valid)
@@ -618,14 +617,14 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
* Ignore those cases, it makes things too complicated.
*/
if (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1)
- valid = __wt_cursor_valid(cbt, &upd);
+ WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
}
if (!valid) {
WT_ERR(__cursor_func_init(cbt, true));
WT_ERR(btree->type == BTREE_ROW ?
__cursor_row_search(session, cbt, NULL, true) :
__cursor_col_search(session, cbt, NULL));
- valid = __wt_cursor_valid(cbt, &upd);
+ WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
}
/*
@@ -656,26 +655,43 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
exact = 0;
F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
- } else if ((ret = __wt_btcur_next(cbt, false)) != WT_NOTFOUND)
- exact = 1;
- else {
+ } else {
/*
- * The cursor next call may have overwritten our caller's key,
- * restore it to its original value.
+ * We didn't find an exact match: try after the search key,
+ * then before. We have to loop here because at low isolation
+ * levels, new records could appear as we are stepping through
+ * the tree.
*/
- __cursor_state_restore(cursor, &state);
+ while ((ret = __wt_btcur_next(cbt, false)) != WT_NOTFOUND) {
+ WT_ERR(ret);
+ if (btree->type == BTREE_ROW)
+ WT_ERR(__wt_compare(session, btree->collator,
+ &cursor->key, &state.key, &exact));
+ else
+ exact = cbt->recno < state.recno ? -1 :
+ cbt->recno == state.recno ? 0 : 1;
+ if (exact >= 0)
+ goto done;
+ }
- WT_ERR(__cursor_func_init(cbt, true));
- WT_ERR(btree->type == BTREE_ROW ?
- __cursor_row_search(session, cbt, NULL, true) :
- __cursor_col_search(session, cbt, NULL));
- if (__wt_cursor_valid(cbt, &upd)) {
- exact = cbt->compare;
- ret = __cursor_kv_return(session, cbt, upd);
- } else if ((ret = __wt_btcur_prev(cbt, false)) != WT_NOTFOUND)
- exact = -1;
+ /*
+ * We walked to the end of the tree without finding a match.
+ * Walk backwards instead.
+ */
+ while ((ret = __wt_btcur_prev(cbt, false)) != WT_NOTFOUND) {
+ WT_ERR(ret);
+ if (btree->type == BTREE_ROW)
+ WT_ERR(__wt_compare(session, btree->collator,
+ &cursor->key, &state.key, &exact));
+ else
+ exact = cbt->recno < state.recno ? -1 :
+ cbt->recno == state.recno ? 0 : 1;
+ if (exact <= 0)
+ goto done;
+ }
}
+done:
err: if (ret == 0 && exactp != NULL)
*exactp = exact;
@@ -703,7 +719,7 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
- bool append_key;
+ bool append_key, valid;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -784,8 +800,11 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
* key/value pair.
*/
if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) &&
- cbt->compare == 0 && __wt_cursor_valid(cbt, NULL))
- WT_ERR(WT_DUPLICATE_KEY);
+ cbt->compare == 0) {
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if (valid)
+ WT_ERR(WT_DUPLICATE_KEY);
+ }
ret = __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD);
} else {
@@ -805,10 +824,14 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
* column-store implicitly fills the gap with empty records.
* Fail in that case, the record exists.
*/
- if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) &&
- ((cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) ||
- (cbt->compare != 0 && __cursor_fix_implicit(btree, cbt))))
- WT_ERR(WT_DUPLICATE_KEY);
+ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
+ if (cbt->compare == 0) {
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if (valid)
+ WT_ERR(WT_DUPLICATE_KEY);
+ } else if (__cursor_fix_implicit(btree, cbt))
+ WT_ERR(WT_DUPLICATE_KEY);
+ }
WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD));
@@ -932,7 +955,7 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
- bool iterating;
+ bool iterating, valid;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -1028,7 +1051,10 @@ retry: if (positioned == POSITIONED)
/* Check whether an update would conflict. */
WT_ERR(__curfile_update_check(cbt));
- if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL))
+ if (cbt->compare != 0)
+ WT_ERR(WT_NOTFOUND);
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if (!valid)
WT_ERR(WT_NOTFOUND);
ret = __cursor_row_modify(session, cbt, WT_UPDATE_TOMBSTONE);
@@ -1043,7 +1069,10 @@ retry: if (positioned == POSITIONED)
WT_ERR(__curfile_update_check(cbt));
/* Remove the record if it exists. */
- if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) {
+ valid = false;
+ if (cbt->compare == 0)
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if (cbt->compare != 0 || !valid) {
if (!__cursor_fix_implicit(btree, cbt))
WT_ERR(WT_NOTFOUND);
/*
@@ -1143,6 +1172,7 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ bool valid;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -1207,7 +1237,10 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
*/
if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
WT_ERR(__curfile_update_check(cbt));
- if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL))
+ if (cbt->compare != 0)
+ WT_ERR(WT_NOTFOUND);
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if (!valid)
WT_ERR(WT_NOTFOUND);
}
ret = __cursor_row_modify_v(session, cbt, value, modify_type);
@@ -1224,8 +1257,10 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
*/
if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
WT_ERR(__curfile_update_check(cbt));
- if ((cbt->compare != 0 ||
- !__wt_cursor_valid(cbt, NULL)) &&
+ valid = false;
+ if (cbt->compare == 0)
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if ((cbt->compare != 0 || !valid) &&
!__cursor_fix_implicit(btree, cbt))
WT_ERR(WT_NOTFOUND);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c
index e9ac0bca66a..cb50bfbcf61 100644
--- a/src/third_party/wiredtiger/src/btree/bt_delete.c
+++ b/src/third_party/wiredtiger/src/btree/bt_delete.c
@@ -65,17 +65,18 @@
int
__wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
{
+ WT_ADDR *ref_addr;
WT_DECL_RET;
- WT_PAGE *parent;
uint32_t previous_state;
*skipp = false;
/* If we have a clean page in memory, attempt to evict it. */
- if (ref->state == WT_REF_MEM &&
- __wt_atomic_casv32(&ref->state, WT_REF_MEM, WT_REF_LOCKED)) {
+ previous_state = ref->state;
+ if ((previous_state == WT_REF_MEM || previous_state == WT_REF_LIMBO) &&
+ __wt_atomic_casv32(&ref->state, previous_state, WT_REF_LOCKED)) {
if (__wt_page_is_modified(ref->page)) {
- ref->state = WT_REF_MEM;
+ ref->state = previous_state;
return (0);
}
@@ -93,7 +94,6 @@ __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
previous_state = ref->state;
switch (previous_state) {
case WT_REF_DISK:
- case WT_REF_LIMBO:
case WT_REF_LOOKASIDE:
break;
default:
@@ -101,21 +101,9 @@ __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
}
if (!__wt_atomic_casv32(&ref->state, previous_state, WT_REF_LOCKED))
return (0);
- switch (previous_state) {
- case WT_REF_DISK:
- break;
- case WT_REF_LIMBO:
- case WT_REF_LOOKASIDE:
- if (__wt_las_page_skip_locked(session, ref))
- break;
- /* FALLTHROUGH */
- default:
- ref->state = previous_state;
- return (0);
- }
/*
- * If this WT_REF was previously part of a fast-delete operation, there
+ * If this WT_REF was previously part of a truncate operation, there
* may be existing page-delete information. The structure is only read
* while the state is locked, free the previous version.
*
@@ -129,21 +117,24 @@ __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
}
/*
- * We cannot fast-delete pages that have overflow key/value items as
- * the overflow blocks have to be discarded. The way we figure that
- * out is to check the page's cell type, cells for leaf pages without
- * overflow items are special.
+ * We cannot truncate pages that have overflow key/value items as the
+ * overflow blocks have to be discarded. The way we figure that out is
+ * to check the page's cell type, cells for leaf pages without overflow
+ * items are special.
*
* To look at an on-page cell, we need to look at the parent page, and
* that's dangerous, our parent page could change without warning if
- * the parent page were to split, deepening the tree. It's safe: the
- * page's reference will always point to some valid page, and if we find
- * any problems we simply fail the fast-delete optimization.
+ * the parent page were to split, deepening the tree. We can look at
+ * the parent page itself because the page can't change underneath us.
+ * However, if the parent page splits, our reference address can change;
+ * we don't care what version of it we read, as long as we don't read
+ * it twice.
*/
- parent = ref->home;
- if (__wt_off_page(parent, ref->addr) ?
- ((WT_ADDR *)ref->addr)->type != WT_ADDR_LEAF_NO :
- __wt_cell_type_raw(ref->addr) != WT_CELL_ADDR_LEAF_NO)
+ WT_ORDERED_READ(ref_addr, ref->addr);
+ if (ref_addr != NULL &&
+ (__wt_off_page(ref->home, ref_addr) ?
+ ref_addr->type != WT_ADDR_LEAF_NO :
+ __wt_cell_type_raw((WT_CELL *)ref_addr) != WT_CELL_ADDR_LEAF_NO))
goto err;
/*
@@ -181,8 +172,10 @@ err: __wt_free(session, ref->page_del);
int
__wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref)
{
- WT_UPDATE **upd;
+ WT_UPDATE **updp;
uint64_t sleep_count, yield_count;
+ uint32_t current_state;
+ bool locked;
/*
* If the page is still "deleted", it's as we left it, reset the state
@@ -190,17 +183,17 @@ __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref)
* instantiated or being instantiated. Loop because it's possible for
* the page to return to the deleted state if instantiation fails.
*/
- for (sleep_count = yield_count = 0;;) {
- switch (ref->state) {
+ for (locked = false, sleep_count = yield_count = 0;;) {
+ switch (current_state = ref->state) {
case WT_REF_DELETED:
/*
* If the page is still "deleted", it's as we left it,
* reset the state.
*/
- if (!__wt_atomic_casv32(&ref->state,
+ if (__wt_atomic_casv32(&ref->state,
WT_REF_DELETED, ref->page_del->previous_state))
- break;
- goto done;
+ goto done;
+ break;
case WT_REF_LOCKED:
/*
* A possible state, the page is being instantiated.
@@ -208,22 +201,10 @@ __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref)
break;
case WT_REF_MEM:
case WT_REF_SPLIT:
- /*
- * We can't use the normal read path to get a copy of
- * the page because the session may have closed the
- * cursor, we no longer have the reference to the tree
- * required for a hazard pointer. We're safe because
- * with unresolved transactions, the page isn't going
- * anywhere.
- *
- * The page is in an in-memory state, which means it
- * was instantiated at some point. Walk the list of
- * update structures and abort them.
- */
- for (upd =
- ref->page_del->update_list; *upd != NULL; ++upd)
- (*upd)->txnid = WT_TXN_ABORTED;
- goto done;
+ if (__wt_atomic_casv32(
+ &ref->state, current_state, WT_REF_LOCKED))
+ locked = true;
+ break;
case WT_REF_DISK:
case WT_REF_LIMBO:
case WT_REF_LOOKASIDE:
@@ -232,16 +213,38 @@ __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref)
return (__wt_illegal_value(session,
"illegal WT_REF.state rolling back deleted page"));
}
+
+ if (locked)
+ break;
+
/*
* We wait for the change in page state, yield before retrying,
- * and if we've yielded enough times, start sleeping so we don't
- * burn CPU to no purpose.
+ * and if we've yielded enough times, start sleeping so we
+ * don't burn CPU to no purpose.
*/
__wt_ref_state_yield_sleep(&yield_count, &sleep_count);
- WT_STAT_CONN_INCRV(session, page_del_rollback_blocked,
- sleep_count);
+ WT_STAT_CONN_INCRV(session,
+ page_del_rollback_blocked, sleep_count);
}
+ /*
+ * We can't use the normal read path to get a copy of the page
+ * because the session may have closed the cursor, we no longer
+ * have the reference to the tree required for a hazard
+ * pointer. We're safe because with unresolved transactions,
+ * the page isn't going anywhere.
+ *
+ * The page is in an in-memory state, which means it
+ * was instantiated at some point. Walk any list of
+ * update structures and abort them.
+ */
+ WT_ASSERT(session, locked);
+ if ((updp = ref->page_del->update_list) != NULL)
+ for (; *updp != NULL; ++updp)
+ (*updp)->txnid = WT_TXN_ABORTED;
+
+ ref->state = current_state;
+
done: /*
* Now mark the truncate aborted: this must come last because after
* this point there is nothing preventing the page from being evicted.
@@ -261,12 +264,12 @@ __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
bool skip;
/*
- * Deleted pages come from two sources: either it's a fast-delete as
+ * Deleted pages come from two sources: either it's a truncate as
* described above, or the page has been emptied by other operations
* and eviction deleted it.
*
* In both cases, the WT_REF state will be WT_REF_DELETED. In the case
- * of a fast-delete page, there will be a WT_PAGE_DELETED structure with
+ * of a truncated page, there will be a WT_PAGE_DELETED structure with
* the transaction ID of the transaction that deleted the page, and the
* page is visible if that transaction ID is visible. In the case of an
* empty page, there will be no WT_PAGE_DELETED structure and the delete
@@ -308,6 +311,31 @@ __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
}
/*
+ * __tombstone_update_alloc --
+ * Allocate and initialize a page-deleted tombstone update structure.
+ */
+static int
+__tombstone_update_alloc(WT_SESSION_IMPL *session,
+ WT_PAGE_DELETED *page_del, WT_UPDATE **updp, size_t *sizep)
+{
+ WT_UPDATE *upd;
+
+ WT_RET(
+ __wt_update_alloc(session, NULL, &upd, sizep, WT_UPDATE_TOMBSTONE));
+
+ /*
+ * Cleared memory matches the lowest possible transaction ID and
+ * timestamp, do nothing.
+ */
+ if (page_del != NULL) {
+ upd->txnid = page_del->txnid;
+ __wt_timestamp_set(&upd->timestamp, &page_del->timestamp);
+ }
+ *updp = upd;
+ return (0);
+}
+
+/*
* __wt_delete_page_instantiate --
* Instantiate an entirely deleted row-store leaf page.
*/
@@ -316,11 +344,14 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
{
WT_BTREE *btree;
WT_DECL_RET;
+ WT_INSERT *ins;
+ WT_INSERT_HEAD *insert;
WT_PAGE *page;
WT_PAGE_DELETED *page_del;
+ WT_ROW *rip;
WT_UPDATE **upd_array, *upd;
size_t size;
- uint32_t i;
+ uint32_t count, i;
btree = S2BT(session);
page = ref->page;
@@ -355,52 +386,75 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
* running inside a checkpoint, and now we're being forced to read that
* page.
*
- * In the first case, we have a page reference structure, in the second,
- * we don't.
- *
- * Allocate the per-reference update array; in the case of instantiating
- * a page, deleted by a running transaction that might eventually abort,
- * we need a list of the update structures so we can do that abort. The
- * hard case is if a page splits: the update structures might be moved
- * to different pages, and we still have to find them all for an abort.
+ * Expect a page-deleted structure if there's a running transaction that
+ * needs to be resolved, otherwise, there may not be one (and, if the
+ * transaction has resolved, we can ignore the page-deleted structure).
*/
- page_del = ref->page_del;
- if (page_del != NULL)
- WT_RET(__wt_calloc_def(
- session, page->entries + 1, &page_del->update_list));
+ page_del =
+ __wt_btree_truncate_active(session, ref) ? ref->page_del : NULL;
/*
- * Allocate the per-page update array if one doesn't already exist.
- * Because deletes may be instantiated after lookaside table updates,
- * the update array may already exist.
+ * Allocate the per-page update array if one doesn't already exist. (It
+ * might already exist because deletes are instantiated after lookaside
+ * table updates.)
*/
- if (page->modify->mod_row_update == NULL)
- WT_ERR(__wt_calloc_def(
+ if (page->entries != 0 && page->modify->mod_row_update == NULL)
+ WT_RET(__wt_calloc_def(
session, page->entries, &page->modify->mod_row_update));
/*
- * Fill in the per-reference update array with references to update
- * structures, fill in the per-page update array with references to
- * deleted items.
+ * Allocate the per-reference update array; in the case of instantiating
+ * a page deleted in a running transaction, we need a list of the update
+ * structures for the eventual commit or abort.
*/
- upd_array = page->modify->mod_row_update;
- for (i = 0, size = 0; i < page->entries; ++i) {
- WT_ERR(__wt_calloc_one(session, &upd));
- upd->type = WT_UPDATE_TOMBSTONE;
-
- if (page_del == NULL)
- upd->txnid = WT_TXN_NONE; /* Globally visible */
- else {
- upd->txnid = page_del->txnid;
- __wt_timestamp_set(
- &upd->timestamp, &page_del->timestamp);
- page_del->update_list[i] = upd;
+ if (page_del != NULL) {
+ count = 0;
+ if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL)
+ WT_SKIP_FOREACH(ins, insert)
+ ++count;
+ WT_ROW_FOREACH(page, rip, i) {
+ ++count;
+ if ((insert = WT_ROW_INSERT(page, rip)) != NULL)
+ WT_SKIP_FOREACH(ins, insert)
+ ++count;
}
+ WT_RET(__wt_calloc_def(
+ session, count + 1, &page_del->update_list));
+ }
- upd->next = upd_array[i];
- upd_array[i] = upd;
-
- size += sizeof(WT_UPDATE *) + WT_UPDATE_MEMSIZE(upd);
+ /* Walk the page entries, giving each one a tombstone. */
+ size = 0;
+ count = 0;
+ upd_array = page->modify->mod_row_update;
+ if ((insert = WT_ROW_INSERT_SMALLEST(page)) != NULL)
+ WT_SKIP_FOREACH(ins, insert) {
+ WT_ERR(__tombstone_update_alloc(
+ session, page_del, &upd, &size));
+ upd->next = ins->upd;
+ ins->upd = upd;
+
+ if (page_del != NULL)
+ page_del->update_list[count++] = upd;
+ }
+ WT_ROW_FOREACH(page, rip, i) {
+ WT_ERR(__tombstone_update_alloc(
+ session, page_del, &upd, &size));
+ upd->next = upd_array[WT_ROW_SLOT(page, rip)];
+ upd_array[WT_ROW_SLOT(page, rip)] = upd;
+
+ if (page_del != NULL)
+ page_del->update_list[count++] = upd;
+
+ if ((insert = WT_ROW_INSERT(page, rip)) != NULL)
+ WT_SKIP_FOREACH(ins, insert) {
+ WT_ERR(__tombstone_update_alloc(
+ session, page_del, &upd, &size));
+ upd->next = ins->upd;
+ ins->upd = upd;
+
+ if (page_del != NULL)
+ page_del->update_list[count++] = upd;
+ }
}
__wt_cache_page_inmem_incr(session, page, size);
diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c
index 4c108114438..1379553c211 100644
--- a/src/third_party/wiredtiger/src/btree/bt_io.c
+++ b/src/third_party/wiredtiger/src/btree/bt_io.c
@@ -355,8 +355,7 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
* Checksum the data if the buffer isn't compressed or checksums are
* configured.
*/
- data_checksum = true; /* -Werror=maybe-uninitialized */
- WT_NOT_READ(data_checksum);
+ WT_NOT_READ(data_checksum, true);
switch (btree->checksum) {
case CKSUM_ON:
data_checksum = true;
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index d191fec8502..612540956b7 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -71,6 +71,7 @@ __wt_page_alloc(WT_SESSION_IMPL *session,
break;
case WT_PAGE_COL_INT:
case WT_PAGE_ROW_INT:
+ WT_ASSERT(session, alloc_entries != 0);
/*
* Internal pages have an array of references to objects so they
* can split. Allocate the array of references and optionally,
@@ -102,11 +103,13 @@ err: if ((pindex = WT_INTL_INDEX_GET_SAFE(page)) != NULL) {
}
break;
case WT_PAGE_COL_VAR:
- page->pg_var = (WT_COL *)((uint8_t *)page + sizeof(WT_PAGE));
+ page->pg_var = alloc_entries == 0 ?
+ NULL : (WT_COL *)((uint8_t *)page + sizeof(WT_PAGE));
page->entries = alloc_entries;
break;
case WT_PAGE_ROW_LEAF:
- page->pg_row = (WT_ROW *)((uint8_t *)page + sizeof(WT_PAGE));
+ page->pg_row = alloc_entries == 0 ?
+ NULL : (WT_ROW *)((uint8_t *)page + sizeof(WT_PAGE));
page->entries = alloc_entries;
break;
WT_ILLEGAL_VALUE(session);
diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c
index 03b5039b00b..8eb120f06ec 100644
--- a/src/third_party/wiredtiger/src/btree/bt_random.c
+++ b/src/third_party/wiredtiger/src/btree/bt_random.c
@@ -302,6 +302,7 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
WT_UPDATE *upd;
wt_off_t size;
uint64_t n, skip;
+ bool valid;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -421,7 +422,8 @@ random_page_entry:
* the next entry, if that doesn't work, move to the previous entry.
*/
WT_ERR(__wt_row_random_leaf(session, cbt));
- if (__wt_cursor_valid(cbt, &upd)) {
+ WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
+ if (valid) {
WT_ERR(__wt_key_return(session, cbt));
WT_ERR(__wt_value_return(session, cbt, upd));
} else {
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index 4ac0cb2da9b..450fd6cf563 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -370,7 +370,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
WT_BTREE *btree;
WT_DECL_RET;
WT_ITEM tmp;
- WT_PAGE *page;
+ WT_PAGE *notused;
size_t addr_size;
uint64_t time_start, time_stop;
uint32_t page_flags, final_state, new_state, previous_state;
@@ -378,7 +378,6 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
bool timer;
btree = S2BT(session);
- page = NULL;
time_start = time_stop = 0;
/*
@@ -427,11 +426,8 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
if (addr == NULL) {
WT_ASSERT(session, previous_state != WT_REF_DISK);
- WT_ERR(__wt_btree_new_leaf_page(session, &page));
- ref->page = page;
- if (previous_state == WT_REF_LOOKASIDE)
- goto skip_read;
- goto done;
+ WT_ERR(__wt_btree_new_leaf_page(session, &ref->page));
+ goto skip_read;
}
/*
@@ -464,7 +460,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
WT_DATA_IN_ITEM(&tmp) ? WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED;
if (LF_ISSET(WT_READ_IGNORE_CACHE_SIZE))
FLD_SET(page_flags, WT_PAGE_EVICT_NO_PROGRESS);
- WT_ERR(__wt_page_inmem(session, ref, tmp.data, page_flags, &page));
+ WT_ERR(__wt_page_inmem(session, ref, tmp.data, page_flags, &notused));
tmp.mem = NULL;
/*
@@ -481,7 +477,7 @@ skip_read:
switch (previous_state) {
case WT_REF_DELETED:
/*
- * A fast-deleted page may also have lookaside information. The
+ * A truncated page may also have lookaside information. The
* delete happened after page eviction (writing the lookaside
* information), first update based on the lookaside table and
* then apply the delete.
@@ -491,6 +487,7 @@ skip_read:
ref->page_las->eviction_to_lookaside = false;
}
+ /* Move all records to a deleted state. */
WT_ERR(__wt_delete_page_instantiate(session, ref));
break;
case WT_REF_LOOKASIDE:
@@ -523,7 +520,7 @@ skip_read:
WT_IGNORE_RET(__wt_las_remove_block(
session, btree->id, ref->page_las->las_pageid));
-done: WT_PUBLISH(ref->state, final_state);
+ WT_PUBLISH(ref->state, final_state);
return (ret);
err: /*
@@ -719,8 +716,7 @@ read: /*
ret = __wt_page_release_evict(session, ref);
/* If forced eviction fails, stall. */
if (ret == EBUSY) {
- ret = 0;
- WT_NOT_READ(ret);
+ WT_NOT_READ(ret, 0);
WT_STAT_CONN_INCR(session,
page_forcible_evict_blocked);
stalled = true;
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 36bbe48b407..3596f5a72b7 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -719,8 +719,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
alloc_refp - alloc_index->index == (ptrdiff_t)result_entries);
/* Start making real changes to the tree, errors are fatal. */
- complete = WT_ERR_PANIC;
- WT_NOT_READ(complete);
+ WT_NOT_READ(complete, WT_ERR_PANIC);
/* Encourage a race */
__page_split_timing_stress(session,
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index 22921d7d378..535e804d6a8 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -517,8 +517,7 @@ restart: /*
* in-cache pages, or if we see a deleted page.
*/
if (ret == WT_NOTFOUND) {
- ret = 0;
- WT_NOT_READ(ret);
+ WT_NOT_READ(ret, 0);
break;
}
diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c
index 569a0247e7b..7ccc325523e 100644
--- a/src/third_party/wiredtiger/src/cache/cache_las.c
+++ b/src/third_party/wiredtiger/src/cache/cache_las.c
@@ -993,7 +993,8 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
#else
wt_timestamp_t *val_ts;
#endif
- uint64_t cnt, decrement_cnt, las_counter, las_pageid, txnid;
+ uint64_t cnt, decrement_cnt, las_counter, las_pageid, saved_pageid;
+ uint64_t las_txnid;
uint32_t las_id, session_flags;
uint8_t upd_type;
int notused;
@@ -1007,6 +1008,7 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
local_txn = locked = false;
WT_RET(__wt_scr_alloc(session, 0, &saved_key));
+ saved_pageid = 0;
/*
* Allocate a cursor and wrap all the updates in a transaction.
@@ -1059,6 +1061,20 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
/* Walk the file. */
while ((ret = cursor->next(cursor)) == 0) {
+ WT_ERR(cursor->get_key(cursor,
+ &las_pageid, &las_id, &las_counter, &las_key));
+
+ /*
+ * If we have switched to a different page, clear the saved key.
+ * Otherwise, sweep could incorrectly remove records after
+ * seeing a birthmark for a key in one block if the same key is
+ * at the beginning of the next block. See WT-3982 for details.
+ */
+ if (las_pageid != saved_pageid) {
+ saved_key->size = 0;
+ saved_pageid = las_pageid;
+ }
+
/*
* Stop if the cache is stuck: we are ignoring the cache size
* while scanning the lookaside table, so we're making things
@@ -1076,9 +1092,6 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
else if (saved_key->size == 0)
break;
- WT_ERR(cursor->get_key(cursor,
- &las_pageid, &las_id, &las_counter, &las_key));
-
/*
* If the entry belongs to a dropped tree, discard it.
*
@@ -1102,7 +1115,7 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
* now no longer needed.
*/
WT_ERR(cursor->get_value(cursor,
- &txnid, &las_timestamp, &upd_type, &las_value));
+ &las_txnid, &las_timestamp, &upd_type, &las_value));
#ifdef HAVE_TIMESTAMPS
WT_ASSERT(session, las_timestamp.size == WT_TIMESTAMP_SIZE);
memcpy(&timestamp, las_timestamp.data, las_timestamp.size);
@@ -1116,7 +1129,7 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
* If it is visible then perform additional checks to see
* whether it has aged out of a live file.
*/
- if (!__wt_txn_visible_all(session, txnid, val_ts)) {
+ if (!__wt_txn_visible_all(session, las_txnid, val_ts)) {
saved_key->size = 0;
continue;
}
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index f473cfe3e8d..ffcb2139330 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -46,8 +46,8 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_open_session[] = {
static const WT_CONFIG_CHECK confchk_WT_CONNECTION_query_timestamp[] = {
{ "get", "string",
- NULL, "choices=[\"all_committed\",\"oldest\",\"pinned\","
- "\"recovery\",\"stable\"]",
+ NULL, "choices=[\"all_committed\",\"last_checkpoint\",\"oldest\""
+ ",\"pinned\",\"recovery\",\"stable\"]",
NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
};
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
index 720df3c465d..f1043ee7546 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
@@ -303,8 +303,8 @@ __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session)
conn = S2C(session);
cache = conn->cache;
- cp_locked = found = false;
- WT_NOT_READ(cp_locked);
+ WT_NOT_READ(cp_locked, false);
+ found = false;
cp = __wt_process.cache_pool;
if (!F_ISSET(conn, WT_CONN_CACHE_POOL))
@@ -338,8 +338,7 @@ __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session)
* operation.
*/
__wt_spin_unlock(session, &cp->cache_pool_lock);
- cp_locked = false;
- WT_NOT_READ(cp_locked);
+ WT_NOT_READ(cp_locked, false);
FLD_CLR(cache->pool_flags, WT_CACHE_POOL_RUN);
__wt_cond_signal(session, cp->cache_pool_cond);
diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c
index 6e27d0f98d6..fed45dbf4c4 100644
--- a/src/third_party/wiredtiger/src/conn/conn_log.c
+++ b/src/third_party/wiredtiger/src/conn/conn_log.c
@@ -598,8 +598,7 @@ __log_file_server(void *arg)
continue;
WT_ERR(__wt_fsync(session, log->log_fh, true));
__wt_spin_lock(session, &log->log_sync_lock);
- locked = true;
- WT_NOT_READ(locked);
+ WT_NOT_READ(locked, true);
/*
* The sync LSN could have advanced while we
* were writing to disk.
@@ -950,7 +949,7 @@ __log_server(void *arg)
if (ret == EACCES &&
retry < WT_RETRY_MAX) {
retry++;
- ret = 0;
+ WT_NOT_READ(ret, 0);
} else {
/*
* Return the error if there is
diff --git a/src/third_party/wiredtiger/src/docs/Doxyfile b/src/third_party/wiredtiger/src/docs/Doxyfile
index 178655bf6ed..60b6c4690b0 100644
--- a/src/third_party/wiredtiger/src/docs/Doxyfile
+++ b/src/third_party/wiredtiger/src/docs/Doxyfile
@@ -206,39 +206,39 @@ TAB_SIZE = 8
# You can put \n's in the value part of an alias to insert newlines.
ALIASES = \
- "config{3}= @row{<tt>\1</tt>,\2,\3}" \
- "configempty{2}=@param config\n Configuration string, see @ref config_strings. No values currently permitted." \
- "configend= </table>" \
- "configstart{2}=@param config\n Configuration string, see @ref config_strings. Permitted values:\n <table>@hrow{Name,Effect,Values}" \
+ config{3}=" @row{<tt>\1</tt>,\2,\3}" \
+ configempty{2}="@param config configuration string, see @ref config_strings. No values currently permitted." \
+ configend=" </table>" \
+ configstart{2}="@param config configuration string, see @ref config_strings. Permitted values:\n <table>@hrow{Name,Effect,Values}" \
"ebusy_errors=@returns zero on success, EBUSY if the object is not available for exclusive access, and a non-zero error code on failure. See @ref error_handling \"Error handling\" for details." \
- "errors=@returns zero on success and a non-zero error code on failure. See @ref error_handling \"Error handling\" for details." \
- "exclusive=This method requires exclusive access to the specified data source(s). If any cursors are open with the specified name(s) or a data source is otherwise in use, the call will fail and return \c EBUSY.\n\n" \
- "ex_ref{1}=@ref \1 \"\1\"" \
- "hrow{1}=<tr><th>\1</th></tr>" \
- "hrow{2}=<tr><th>\1</th><th>\2</th></tr>" \
- "hrow{3}=<tr><th>\1</th><th>\2</th><th>\3</th></tr>" \
- "hrow{4}=<tr><th>\1</th><th>\2</th><th>\3</th><th>\4</th></tr>" \
- "hrow{5}=<tr><th>\1</th><th>\2</th><th>\3</th><th>\4</th><th>\5</th></tr>" \
- "hrow{6}=<tr><th>\1</th><th>\2</th><th>\3</th><th>\4</th><th>\5</th><th>\6</th></tr>" \
- "hrow{7}=<tr><th>\1</th><th>\2</th><th>\3</th><th>\4</th><th>\5</th><th>\6</th><th>\7</th></tr>" \
- "hrow{8}=<tr><th>\1</th><th>\2</th><th>\3</th><th>\4</th><th>\5</th><th>\6</th><th>\7</th><th>\8</th></tr>" \
- "hrow{9}=<tr><th>\1</th><th>\2</th><th>\3</th><th>\4</th><th>\5</th><th>\6</th><th>\7</th><th>\8</th><th>\9</th></tr>" \
- "notyet{1}=Note: <b>"\1"</b> not yet supported in WiredTiger.\n@todo fix when \1 supported\n\n" \
- "plantuml_end=PlantUML template end -->" \
- "plantuml_start{1}=\image html \1\n\image latex \1\n<!-- PlantUML template begins" \
- "requires_notransaction=This method must not be called on a session with an active transaction.\n\n" \
- "requires_transaction=This method must be called on a session with an active transaction.\n\n" \
- "ref_single=@ref" \
- "row{1}=<tr><td>\1</td></tr>" \
- "row{2}=<tr><td>\1</td><td>\2</td></tr>" \
- "row{3}=<tr><td>\1</td><td>\2</td><td>\3</td></tr>" \
- "row{4}=<tr><td>\1</td><td>\2</td><td>\3</td><td>\4</td></tr>" \
- "row{5}=<tr><td>\1</td><td>\2</td><td>\3</td><td>\4</td><td>\5</td></tr>" \
- "row{6}=<tr><td>\1</td><td>\2</td><td>\3</td><td>\4</td><td>\5</td><td>\6</td></tr>" \
- "row{7}=<tr><td>\1</td><td>\2</td><td>\3</td><td>\4</td><td>\5</td><td>\6</td><td>\7</td></tr>" \
- "row{8}=<tr><td>\1</td><td>\2</td><td>\3</td><td>\4</td><td>\5</td><td>\6</td><td>\7</td><td>\8</td></tr>" \
- "row{9}=<tr><td>\1</td><td>\2</td><td>\3</td><td>\4</td><td>\5</td><td>\6</td><td>\7</td><td>\8</td><td>\9</td></tr>" \
- "subpage_single=@subpage" \
+ errors="@returns zero on success and a non-zero error code on failure. See @ref error_handling \"Error handling\" for details." \
+ exclusive="This method requires exclusive access to the specified data source(s). If any cursors are open with the specified name(s) or a data source is otherwise in use, the call will fail and return \c EBUSY.\n\n" \
+ ex_ref{1}="@ref \1 \"\1\"" \
+ hrow{1}="<tr><th>\1</th></tr>" \
+ hrow{2}="<tr><th>\1</th><th>\2</th></tr>" \
+ hrow{3}="<tr><th>\1</th><th>\2</th><th>\3</th></tr>" \
+ hrow{4}="<tr><th>\1</th><th>\2</th><th>\3</th><th>\4</th></tr>" \
+ hrow{5}="<tr><th>\1</th><th>\2</th><th>\3</th><th>\4</th><th>\5</th></tr>" \
+ hrow{6}="<tr><th>\1</th><th>\2</th><th>\3</th><th>\4</th><th>\5</th><th>\6</th></tr>" \
+ hrow{7}="<tr><th>\1</th><th>\2</th><th>\3</th><th>\4</th><th>\5</th><th>\6</th><th>\7</th></tr>" \
+ hrow{8}="<tr><th>\1</th><th>\2</th><th>\3</th><th>\4</th><th>\5</th><th>\6</th><th>\7</th><th>\8</th></tr>" \
+ hrow{9}="<tr><th>\1</th><th>\2</th><th>\3</th><th>\4</th><th>\5</th><th>\6</th><th>\7</th><th>\8</th><th>\9</th></tr>" \
+ notyet{1}="Note: <b>"\1"</b> not yet supported in WiredTiger.\n@todo fix when \1 supported\n\n" \
+ plantuml_end="PlantUML template end -->" \
+ plantuml_start{1}="\image html \1\n\image latex \1\n<!-- PlantUML template begins" \
+ requires_notransaction="This method must not be called on a session with an active transaction.\n\n" \
+ requires_transaction="This method must be called on a session with an active transaction.\n\n" \
+ ref_single="@ref" \
+ row{1}="<tr><td>\1</td></tr>" \
+ row{2}="<tr><td>\1</td><td>\2</td></tr>" \
+ row{3}="<tr><td>\1</td><td>\2</td><td>\3</td></tr>" \
+ row{4}="<tr><td>\1</td><td>\2</td><td>\3</td><td>\4</td></tr>" \
+ row{5}="<tr><td>\1</td><td>\2</td><td>\3</td><td>\4</td><td>\5</td></tr>" \
+ row{6}="<tr><td>\1</td><td>\2</td><td>\3</td><td>\4</td><td>\5</td><td>\6</td></tr>" \
+ row{7}="<tr><td>\1</td><td>\2</td><td>\3</td><td>\4</td><td>\5</td><td>\6</td><td>\7</td></tr>" \
+ row{8}="<tr><td>\1</td><td>\2</td><td>\3</td><td>\4</td><td>\5</td><td>\6</td><td>\7</td><td>\8</td></tr>" \
+ row{9}="<tr><td>\1</td><td>\2</td><td>\3</td><td>\4</td><td>\5</td><td>\6</td><td>\7</td><td>\8</td><td>\9</td></tr>" \
+ subpage_single="@subpage" \
# This tag can be used to specify a number of word-keyword mappings (TCL only).
# A mapping has the form "name=value". For example adding
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 90b71659015..5c478654585 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -124,22 +124,36 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
bool clean_page, inmem_split, tree_dead;
conn = S2C(session);
+ page = ref->page;
+
+ __wt_verbose(session, WT_VERB_EVICT,
+ "page %p (%s)", (void *)page, __wt_page_type_string(page->type));
/* Enter the eviction generation. */
__wt_session_gen_enter(session, WT_GEN_EVICT);
- page = ref->page;
- tree_dead = F_ISSET(session->dhandle, WT_DHANDLE_DEAD);
+ /*
+ * Get exclusive access to the page if our caller doesn't have the tree
+ * locked down.
+ */
+ if (!closing) {
+ WT_ERR(__evict_exclusive(session, ref));
- __wt_verbose(session, WT_VERB_EVICT,
- "page %p (%s)", (void *)page, __wt_page_type_string(page->type));
+ /*
+ * Now the page is locked, remove it from the LRU eviction
+ * queue. We have to do this before freeing the page memory or
+ * otherwise touching the reference because eviction paths
+ * assume a non-NULL reference on the queue is pointing at
+ * valid memory.
+ */
+ __wt_evict_list_clear_page(session, ref);
+ }
/*
- * Get exclusive access to the page and review it for conditions that
- * would block our eviction of the page. If the check fails (for
- * example, we find a page with active children), we're done. We have
- * to make this check for clean pages, too: while unlikely eviction
- * would choose an internal page with children, it's not disallowed.
+ * Review the page for conditions that would block its eviction. If the
+ * check fails (for example, we find a page with active children), quit.
+ * Make this check for clean pages, too: while unlikely eviction would
+ * choose an internal page with children, it's not disallowed.
*/
WT_ERR(__evict_review(session, ref, closing, &inmem_split));
@@ -178,6 +192,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
}
/* Update the reference and discard the page. */
+ tree_dead = F_ISSET(session->dhandle, WT_DHANDLE_DEAD);
if (__wt_ref_is_root(ref))
__wt_ref_out(session, ref);
else if ((clean_page && !F_ISSET(conn, WT_CONN_IN_MEMORY)) || tree_dead)
@@ -275,12 +290,11 @@ __evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
WT_DECL_RET;
/*
- * Discard the page and update the reference structure; if the page has
- * an address, it's a disk page; if it has no address, it's a deleted
- * page re-instantiated (for example, by searching) and never written.
- *
- * If evicting a WT_REF_LIMBO reference, we get to here and transition
- * back to WT_REF_LOOKASIDE.
+ * Discard the page and update the reference structure. If evicting a
+ * WT_REF_LIMBO page, transition back to WT_REF_LOOKASIDE. Otherwise,
+ * a page with a disk address is an on-disk page, and a page without
+ * a disk address is a re-instantiated deleted page (for example, by
+ * searching), that was never subsequently written.
*/
__wt_ref_out(session, ref);
if (!closing && ref->page_las != NULL &&
@@ -417,7 +431,18 @@ __evict_child_check(WT_SESSION_IMPL *session, WT_REF *parent)
WT_INTL_FOREACH_BEGIN(session, parent->page, child) {
switch (child->state) {
case WT_REF_DISK: /* On-disk */
- case WT_REF_DELETED: /* On-disk, deleted */
+ break;
+ case WT_REF_DELETED: /* Deleted */
+ /*
+ * If the page was part of a truncate, transaction
+ * rollback might switch this page into its previous
+ * state at any time, so the delete must be resolved.
+ * We don't have to lock the page, as no thread of
+ * control can be running below our locked internal
+ * page.
+ */
+ if (__wt_btree_truncate_active(session, child))
+ return (EBUSY);
break;
default:
return (EBUSY);
@@ -446,31 +471,12 @@ __evict_review(
*inmem_splitp = false;
conn = S2C(session);
+ page = ref->page;
flags = WT_REC_EVICT;
if (!WT_SESSION_IS_CHECKPOINT(session))
LF_SET(WT_REC_VISIBLE_ALL);
/*
- * Get exclusive access to the page if our caller doesn't have the tree
- * locked down.
- */
- if (!closing) {
- WT_RET(__evict_exclusive(session, ref));
-
- /*
- * Now the page is locked, remove it from the LRU eviction
- * queue. We have to do this before freeing the page memory or
- * otherwise touching the reference because eviction paths
- * assume a non-NULL reference on the queue is pointing at
- * valid memory.
- */
- __wt_evict_list_clear_page(session, ref);
- }
-
- /* Now that we have exclusive access, review the page. */
- page = ref->page;
-
- /*
* Fail if an internal has active children, the children must be evicted
* first. The test is necessary but shouldn't fire much: the eviction
* code is biased for leaf pages, an internal page shouldn't be selected
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h
index afefbe8ad5c..ca2176fcf0e 100644
--- a/src/third_party/wiredtiger/src/include/api.h
+++ b/src/third_party/wiredtiger/src/include/api.h
@@ -68,6 +68,7 @@
if ((ret) != 0 && \
(ret) != WT_NOTFOUND && \
(ret) != WT_DUPLICATE_KEY && \
+ (ret) != WT_PREPARE_CONFLICT && \
F_ISSET(&(s)->txn, WT_TXN_RUNNING)) \
F_SET(&(s)->txn, WT_TXN_ERROR); \
/* \
@@ -237,6 +238,8 @@
JOINABLE_CURSOR_CALL_CHECK(cur)
#define CURSOR_UPDATE_API_END(s, ret) \
+ if ((ret) == WT_PREPARE_CONFLICT) \
+ (ret) = WT_ROLLBACK; \
TXN_API_END(s, ret)
#define ASYNCOP_API_CALL(conn, s, n) \
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 7ba73d1b94f..893f51aa022 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -773,7 +773,7 @@ struct __wt_page {
/*
* WT_PAGE_DELETED --
- * Related information for fast-delete, on-disk pages.
+ * Related information for truncated pages.
*/
struct __wt_page_deleted {
volatile uint64_t txnid; /* Transaction ID */
@@ -997,7 +997,7 @@ struct __wt_update {
finalized prepare */
#define WT_UPDATE_STATE_LOCKED 1 /* locked */
#define WT_UPDATE_STATE_PREPARED 2 /* prepared */
- uint8_t state; /* state (one byte : conserve memory) */
+ volatile uint8_t state;
/* If the update includes a complete value. */
#define WT_UPDATE_DATA_VALUE(upd) \
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 808c8f7ee7f..de28eb7232f 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -1151,6 +1151,23 @@ __wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref)
}
/*
+ * __wt_btree_truncate_active --
+ * Return if a truncate operation is active.
+ */
+static inline bool
+__wt_btree_truncate_active(WT_SESSION_IMPL *session, WT_REF *ref)
+{
+ WT_PAGE_DELETED *page_del;
+
+ if ((page_del = ref->page_del) == NULL)
+ return (false);
+ if (page_del->txnid == WT_TXN_ABORTED)
+ return (false);
+ return (!__wt_txn_visible_all(session,
+ page_del->txnid, WT_TIMESTAMP_NULL(&page_del->timestamp)));
+}
+
+/*
* __wt_btree_can_evict_dirty --
* Check whether eviction of dirty pages or splits are permitted in the
* current tree.
@@ -1336,7 +1353,11 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
page = ref->page;
mod = page->modify;
- /* Pages that have never been modified can always be evicted. */
+ /* A truncated page can't be evicted until the truncate completes. */
+ if (__wt_btree_truncate_active(session, ref))
+ return (false);
+
+ /* Otherwise, never modified pages can always be evicted. */
if (mod == NULL)
return (true);
@@ -1350,12 +1371,6 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
F_ISSET_ATOMIC(ref->home, WT_PAGE_OVERFLOW_KEYS))
return (false);
- /* A truncated page can't be evicted until the truncate completes. */
- if (ref->page_del != NULL && ref->page_del->txnid != WT_TXN_ABORTED &&
- !__wt_txn_visible_all(session,
- ref->page_del->txnid, WT_TIMESTAMP_NULL(&ref->page_del->timestamp)))
- return (false);
-
/*
* Check for in-memory splits before other eviction tests. If the page
* should split in-memory, return success immediately and skip more
@@ -1458,7 +1473,8 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
(LF_ISSET(WT_READ_NO_SPLIT) || (!inmem_split &&
F_ISSET(session, WT_SESSION_NO_RECONCILE)))) {
if (!WT_SESSION_IS_CHECKPOINT(session))
- (void)__wt_page_evict_urgent(session, ref);
+ WT_IGNORE_RET(
+ __wt_page_evict_urgent(session, ref));
} else {
WT_RET_BUSY_OK(__wt_page_release_evict(session, ref));
return (0);
diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h
index 70f9318f6d7..ec5c6689c3f 100644
--- a/src/third_party/wiredtiger/src/include/cursor.h
+++ b/src/third_party/wiredtiger/src/include/cursor.h
@@ -217,20 +217,23 @@ struct __wt_cursor_btree {
#endif
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CBT_ACTIVE 0x01u /* Active in the tree */
-#define WT_CBT_ITERATE_APPEND 0x02u /* Col-store: iterating append list */
-#define WT_CBT_ITERATE_NEXT 0x04u /* Next iteration configuration */
-#define WT_CBT_ITERATE_PREV 0x08u /* Prev iteration configuration */
-#define WT_CBT_NO_TXN 0x10u /* Non-txn cursor (e.g. a checkpoint) */
-#define WT_CBT_SEARCH_SMALLEST 0x20u /* Row-store: small-key insert list */
-#define WT_CBT_VAR_ONPAGE_MATCH 0x40u /* Var-store: on-page recno match */
+#define WT_CBT_ACTIVE 0x001u /* Active in the tree */
+#define WT_CBT_ITERATE_APPEND 0x002u /* Col-store: iterating append list */
+#define WT_CBT_ITERATE_NEXT 0x004u /* Next iteration configuration */
+#define WT_CBT_ITERATE_PREV 0x008u /* Prev iteration configuration */
+#define WT_CBT_NO_TXN 0x010u /* Non-txn cursor (e.g. a checkpoint) */
+#define WT_CBT_RETRY_NEXT 0x020u /* Next, resulted in prepare conflict */
+#define WT_CBT_RETRY_PREV 0x040u /* Prev, resulted in prepare conflict */
+#define WT_CBT_SEARCH_SMALLEST 0x080u /* Row-store: small-key insert list */
+#define WT_CBT_VAR_ONPAGE_MATCH 0x100u /* Var-store: on-page recno match */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
#define WT_CBT_POSITION_MASK /* Flags associated with position */ \
(WT_CBT_ITERATE_APPEND | WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV | \
- WT_CBT_SEARCH_SMALLEST | WT_CBT_VAR_ONPAGE_MATCH)
+ WT_CBT_RETRY_NEXT | WT_CBT_RETRY_PREV | WT_CBT_SEARCH_SMALLEST | \
+ WT_CBT_VAR_ONPAGE_MATCH)
- uint8_t flags;
+ uint32_t flags;
};
struct __wt_cursor_bulk {
diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i
index a4e986c4325..d338c47dfae 100644
--- a/src/third_party/wiredtiger/src/include/cursor.i
+++ b/src/third_party/wiredtiger/src/include/cursor.i
@@ -311,6 +311,20 @@ __wt_cursor_dhandle_decr_use(WT_SESSION_IMPL *session)
}
/*
+ * __cursor_kv_return --
+ * Return a page referenced key/value pair to the application.
+ */
+static inline int
+__cursor_kv_return(
+ WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
+{
+ WT_RET(__wt_key_return(session, cbt));
+ WT_RET(__wt_value_return(session, cbt, upd));
+
+ return (0);
+}
+
+/*
* __cursor_func_init --
* Cursor call setup.
*/
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 8b69f9ef244..d884401feb2 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -68,7 +68,7 @@ extern int __wt_block_ext_prealloc(WT_SESSION_IMPL *session, u_int max) WT_GCC_F
extern int __wt_block_ext_discard(WT_SESSION_IMPL *session, u_int max) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_salvage_start(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_salvage_end(WT_SESSION_IMPL *session, WT_BLOCK *block) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_block_offset_invalid(WT_BLOCK *block, wt_off_t offset, uint32_t size);
+extern bool __wt_block_offset_invalid(WT_BLOCK *block, wt_off_t offset, uint32_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_salvage_next(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr, size_t *addr_sizep, bool *eofp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_salvage_valid(WT_SESSION_IMPL *session, WT_BLOCK *block, uint8_t *addr, size_t addr_size, bool valid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_block_verify_start(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -100,7 +100,7 @@ extern void __wt_cursor_key_order_reset(WT_CURSOR_BTREE *cbt);
extern void __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt);
extern int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp);
+extern int __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *valid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -128,7 +128,7 @@ extern int __wt_debug_tree(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *re
extern int __wt_debug_page(WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all);
+extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_ref_out(WT_SESSION_IMPL *session, WT_REF *ref);
extern void __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep);
@@ -141,7 +141,7 @@ extern int __wt_btree_discard(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBU
extern void __wt_root_ref_init(WT_REF *root_ref, WT_PAGE *root, bool is_recno);
extern int __wt_btree_tree_open(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_btree_immediately_durable(WT_SESSION_IMPL *session);
+extern bool __wt_btree_immediately_durable(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_btree_huffman_close(WT_SESSION_IMPL *session);
extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -200,15 +200,15 @@ extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_
extern WT_UPDATE *__wt_update_obsolete_check(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd);
extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert, bool restore) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_las_nonempty(WT_SESSION_IMPL *session);
+extern bool __wt_las_nonempty(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_las_stats_update(WT_SESSION_IMPL *session);
extern int __wt_las_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_cursor_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_las_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags);
extern int __wt_las_cursor_close(WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref);
-extern bool __wt_las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref);
+extern bool __wt_las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_PAGE *page, WT_MULTI *multi, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_remove_block(WT_SESSION_IMPL *session, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -369,7 +369,7 @@ extern int __wt_curtable_open(WT_SESSION_IMPL *session, const char *uri, WT_CURS
extern int __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref);
extern void __wt_evict_server_wake(WT_SESSION_IMPL *session);
-extern bool __wt_evict_thread_chk(WT_SESSION_IMPL *session);
+extern bool __wt_evict_thread_chk(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_evict_thread_stop(WT_SESSION_IMPL *session, WT_THREAD *thread) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_evict_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -377,7 +377,7 @@ extern int __wt_evict_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBU
extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session);
extern int __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, bool readonly, double pct_full) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref);
+extern bool __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v);
extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session);
extern int __wt_verbose_dump_cache(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -495,7 +495,7 @@ extern int __wt_lsm_tree_worker(WT_SESSION_IMPL *session, const char *uri, int (
extern int __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool force, WT_LSM_CHUNK **chunkp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_work_switch(WT_SESSION_IMPL *session, WT_LSM_WORK_UNIT **entryp, bool *ran) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_work_bloom(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_lsm_chunk_visible_all(WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk);
+extern bool __wt_lsm_chunk_visible_all(WT_SESSION_IMPL *session, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_free_chunks(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -557,7 +557,7 @@ extern void __wt_free_int(WT_SESSION_IMPL *session, const void *p_arg) WT_GCC_FU
extern int __wt_errno(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen);
extern int __wt_ext_map_windows_error(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint32_t windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name);
+extern bool __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_close_connection_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -742,11 +742,11 @@ extern void __wt_readunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l);
extern int __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l);
extern void __wt_writeunlock(WT_SESSION_IMPL *session, WT_RWLOCK *l);
-extern bool __wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *l);
+extern bool __wt_rwlock_islocked(WT_SESSION_IMPL *session, WT_RWLOCK *l) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern uint32_t __wt_nlpo2_round(uint32_t v);
extern uint32_t __wt_nlpo2(uint32_t v);
extern uint32_t __wt_log2_int(uint32_t n);
-extern bool __wt_ispo2(uint32_t v);
+extern bool __wt_ispo2(uint32_t v) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2);
extern void __wt_random_init(WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_random_init_seed(WT_SESSION_IMPL *session, WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
diff --git a/src/third_party/wiredtiger/src/include/extern_posix.h b/src/third_party/wiredtiger/src/include/extern_posix.h
index bc71b77d0f6..8b92d99d4f1 100644
--- a/src/third_party/wiredtiger/src/include/extern_posix.h
+++ b/src/third_party/wiredtiger/src/include/extern_posix.h
@@ -19,9 +19,9 @@ extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond);
extern void __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp);
extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_absolute_path(const char *path);
+extern bool __wt_absolute_path(const char *path) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern const char *__wt_path_separator(void);
-extern bool __wt_has_priv(void);
+extern bool __wt_has_priv(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_stream_set_line_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_stream_set_no_buffer(FILE *fp) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
diff --git a/src/third_party/wiredtiger/src/include/extern_win.h b/src/third_party/wiredtiger/src/include/extern_win.h
index bdd54b7954a..50808750c56 100644
--- a/src/third_party/wiredtiger/src/include/extern_win.h
+++ b/src/third_party/wiredtiger/src/include/extern_win.h
@@ -17,9 +17,9 @@ extern void __wt_cond_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond);
extern void __wt_cond_destroy(WT_SESSION_IMPL *session, WT_CONDVAR **condp);
extern int __wt_once(void (*init_routine)(void)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_get_vm_pagesize(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_absolute_path(const char *path);
+extern bool __wt_absolute_path(const char *path) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern const char *__wt_path_separator(void);
-extern bool __wt_has_priv(void);
+extern bool __wt_has_priv(void) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_stream_set_line_buffer(FILE *fp);
extern void __wt_stream_set_no_buffer(FILE *fp);
extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds);
@@ -30,8 +30,8 @@ extern void __wt_thread_id(uintmax_t *id);
extern int __wt_thread_str(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern uintmax_t __wt_process_id(void);
extern void __wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp);
-extern int __wt_to_utf16_string(WT_SESSION_IMPL *session, const char*utf8, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_to_utf8_string(WT_SESSION_IMPL *session, const wchar_t*wide, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_to_utf16_string(WT_SESSION_IMPL *session, const char *utf8, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_to_utf8_string(WT_SESSION_IMPL *session, const wchar_t *wide, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern DWORD __wt_getlasterror(void);
extern int __wt_map_windows_error(DWORD windows_error) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern const char *__wt_formatmessage(WT_SESSION_IMPL *session, DWORD windows_error);
diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h
index a3a81c21569..c4d7def85c0 100644
--- a/src/third_party/wiredtiger/src/include/misc.h
+++ b/src/third_party/wiredtiger/src/include/misc.h
@@ -11,7 +11,10 @@
* and unused function return values.
*/
#define WT_UNUSED(var) (void)(var)
-#define WT_NOT_READ(var) (void)(var)
+#define WT_NOT_READ(v, val) do { \
+ (v) = (val); \
+ (void)(v); \
+} while (0);
#define WT_IGNORE_RET(call) do { \
int __ignored_ret; \
__ignored_ret = (call); \
diff --git a/src/third_party/wiredtiger/src/include/mutex.h b/src/third_party/wiredtiger/src/include/mutex.h
index 2a3fc7448f8..ba32d166f03 100644
--- a/src/third_party/wiredtiger/src/include/mutex.h
+++ b/src/third_party/wiredtiger/src/include/mutex.h
@@ -44,9 +44,8 @@ struct __wt_rwlock { /* Read/write lock */
uint8_t current; /* Current ticket */
uint8_t next; /* Next available ticket */
uint8_t reader; /* Read queue ticket */
- uint8_t __notused; /* Padding */
- uint16_t readers_active;/* Count of active readers */
- uint16_t readers_queued;/* Count of queued readers */
+ uint8_t readers_queued; /* Count of queued readers */
+ uint32_t readers_active;/* Count of active readers */
} s;
} u;
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 7ef63cb0eaf..01a982b8602 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -627,6 +627,9 @@ struct __wt_connection_stats {
int64_t txn_read_queue_head;
int64_t txn_read_queue_inserts;
int64_t txn_read_queue_len;
+ int64_t txn_rollback_to_stable;
+ int64_t txn_rollback_upd_aborted;
+ int64_t txn_rollback_las_removed;
int64_t txn_set_ts;
int64_t txn_set_ts_commit;
int64_t txn_set_ts_commit_upd;
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index dd7f5d4a8bc..19e0be2d695 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -102,6 +102,7 @@ struct __wt_txn_global {
volatile uint64_t oldest_id;
WT_DECL_TIMESTAMP(commit_timestamp)
+ WT_DECL_TIMESTAMP(last_ckpt_timestamp)
WT_DECL_TIMESTAMP(oldest_timestamp)
WT_DECL_TIMESTAMP(pinned_timestamp)
WT_DECL_TIMESTAMP(recovery_timestamp)
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 5fcf8ee11c9..9061157ff5a 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -9,6 +9,11 @@
static inline int __wt_txn_id_check(WT_SESSION_IMPL *session);
static inline void __wt_txn_read_last(WT_SESSION_IMPL *session);
+typedef enum {
+ WT_VISIBLE_FALSE=0, /* Not a visible update */
+ WT_VISIBLE_PREPARE=1, /* Prepared update */
+ WT_VISIBLE_TRUE=2 /* A visible update */
+} WT_VISIBLE_TYPE;
#ifdef HAVE_TIMESTAMPS
/*
* __wt_txn_timestamp_flags --
@@ -291,7 +296,7 @@ __wt_txn_modify(WT_SESSION_IMPL *session, WT_UPDATE *upd)
/*
* __wt_txn_modify_page_delete --
- * Remember a page fast-deleted by the current transaction.
+ * Remember a page truncated by the current transaction.
*/
static inline int
__wt_txn_modify_page_delete(WT_SESSION_IMPL *session, WT_REF *ref)
@@ -539,33 +544,74 @@ __wt_txn_visible(
}
/*
+ * __wt_txn_upd_visible_type --
+ * Visible type of given update for the current transaction.
+ */
+static inline WT_VISIBLE_TYPE
+__wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd)
+{
+ uint8_t upd_state;
+ bool upd_visible;
+
+ for (;;__wt_yield()) {
+ /* Commit is in progress, yield and try again. */
+ if ((upd_state = upd->state) == WT_UPDATE_STATE_LOCKED)
+ continue;
+
+ upd_visible = __wt_txn_visible(
+ session, upd->txnid, WT_TIMESTAMP_NULL(&upd->timestamp));
+
+ /*
+ * The visibility check is only valid if the update does not
+ * change state. If the state does change, recheck visibility.
+ */
+ if (upd->state == upd_state)
+ break;
+ }
+
+ if (!upd_visible)
+ return (WT_VISIBLE_FALSE);
+
+ if (upd_state == WT_UPDATE_STATE_PREPARED)
+ return (F_ISSET(&session->txn, WT_TXN_IGNORE_PREPARE) ?
+ WT_VISIBLE_FALSE : WT_VISIBLE_PREPARE);
+
+ return (WT_VISIBLE_TRUE);
+}
+
+/*
* __wt_txn_upd_visible --
* Can the current transaction see the given update.
*/
static inline bool
__wt_txn_upd_visible(WT_SESSION_IMPL *session, WT_UPDATE *upd)
{
- return (__wt_txn_visible(session,
- upd->txnid, WT_TIMESTAMP_NULL(&upd->timestamp)));
+ return (__wt_txn_upd_visible_type(session, upd) == WT_VISIBLE_TRUE);
}
/*
* __wt_txn_read --
* Get the first visible update in a list (or NULL if none are visible).
*/
-static inline WT_UPDATE *
-__wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd)
+static inline int
+__wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_UPDATE **updp)
{
static WT_UPDATE tombstone = {
.txnid = WT_TXN_NONE, .type = WT_UPDATE_TOMBSTONE
};
+ WT_VISIBLE_TYPE upd_visible;
bool skipped_birthmark;
+ *updp = NULL;
for (skipped_birthmark = false; upd != NULL; upd = upd->next) {
/* Skip reserved place-holders, they're never visible. */
- if (upd->type != WT_UPDATE_RESERVE &&
- __wt_txn_upd_visible(session, upd))
- break;
+ if (upd->type != WT_UPDATE_RESERVE) {
+ upd_visible = __wt_txn_upd_visible_type(session, upd);
+ if (upd_visible == WT_VISIBLE_TRUE)
+ break;
+ if (upd_visible == WT_VISIBLE_PREPARE)
+ return (WT_PREPARE_CONFLICT);
+ }
/* An invisible birthmark is equivalent to a tombstone. */
if (upd->type == WT_UPDATE_BIRTHMARK)
skipped_birthmark = true;
@@ -574,7 +620,8 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd)
if (upd == NULL && skipped_birthmark)
upd = &tombstone;
- return (upd == NULL || upd->type == WT_UPDATE_BIRTHMARK ? NULL : upd);
+ *updp = (upd == NULL || upd->type == WT_UPDATE_BIRTHMARK ? NULL : upd);
+ return (0);
}
/*
@@ -786,21 +833,32 @@ static inline int
__wt_txn_update_check(WT_SESSION_IMPL *session, WT_UPDATE *upd)
{
WT_TXN *txn;
+ bool ignore_prepare_set;
txn = &session->txn;
- if (txn->isolation == WT_ISO_SNAPSHOT)
- while (upd != NULL && !__wt_txn_upd_visible(session, upd)) {
- if (upd->txnid != WT_TXN_ABORTED) {
- WT_STAT_CONN_INCR(
- session, txn_update_conflict);
- WT_STAT_DATA_INCR(
- session, txn_update_conflict);
- return (__wt_txn_rollback_required(session,
+ if (txn->isolation != WT_ISO_SNAPSHOT)
+ return (0);
+
+ /*
+ * Clear the ignore prepare setting of txn, as it is not supposed, to
+ * affect the visibility for update operations.
+ */
+ ignore_prepare_set = F_ISSET(txn, WT_TXN_IGNORE_PREPARE);
+ F_CLR(txn, WT_TXN_IGNORE_PREPARE);
+ for (;upd != NULL && !__wt_txn_upd_visible(session, upd);
+ upd = upd->next) {
+ if (upd->txnid != WT_TXN_ABORTED) {
+ if (ignore_prepare_set)
+ F_SET(txn, WT_TXN_IGNORE_PREPARE);
+ WT_STAT_CONN_INCR(session, txn_update_conflict);
+ WT_STAT_DATA_INCR(session, txn_update_conflict);
+ return (__wt_txn_rollback_required(session,
"conflict between concurrent operations"));
- }
- upd = upd->next;
}
+ }
+ if (ignore_prepare_set)
+ F_SET(txn, WT_TXN_IGNORE_PREPARE);
return (0);
}
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index a75c22497ce..1f2a438b8e9 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -2431,8 +2431,9 @@ struct __wt_connection {
* timestamps of all active readers\, and \c stable returns the most
* recent \c stable_timestamp set with WT_CONNECTION::set_timestamp.
* See @ref transaction_timestamps., a string\, chosen from the
- * following options: \c "all_committed"\, \c "oldest"\, \c "pinned"\,
- * \c "recovery"\, \c "stable"; default \c all_committed.}
+ * following options: \c "all_committed"\, \c "last_checkpoint"\, \c
+ * "oldest"\, \c "pinned"\, \c "recovery"\, \c "stable"; default \c
+ * all_committed.}
* @configend
* @errors
* If there is no matching timestamp (e.g., if this method is called
@@ -5562,81 +5563,87 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1312
/*! transaction: read timestamp queue length */
#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1313
+/*! transaction: rollback to stable calls */
+#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE 1314
+/*! transaction: rollback to stable updates aborted */
+#define WT_STAT_CONN_TXN_ROLLBACK_UPD_ABORTED 1315
+/*! transaction: rollback to stable updates removed from lookaside */
+#define WT_STAT_CONN_TXN_ROLLBACK_LAS_REMOVED 1316
/*! transaction: set timestamp calls */
-#define WT_STAT_CONN_TXN_SET_TS 1314
+#define WT_STAT_CONN_TXN_SET_TS 1317
/*! transaction: set timestamp commit calls */
-#define WT_STAT_CONN_TXN_SET_TS_COMMIT 1315
+#define WT_STAT_CONN_TXN_SET_TS_COMMIT 1318
/*! transaction: set timestamp commit updates */
-#define WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD 1316
+#define WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD 1319
/*! transaction: set timestamp oldest calls */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1317
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1320
/*! transaction: set timestamp oldest updates */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1318
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1321
/*! transaction: set timestamp stable calls */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE 1319
+#define WT_STAT_CONN_TXN_SET_TS_STABLE 1322
/*! transaction: set timestamp stable updates */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1320
+#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1323
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1321
+#define WT_STAT_CONN_TXN_BEGIN 1324
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1322
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1325
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1323
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1326
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1324
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1327
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1325
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1328
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1326
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1329
/*! transaction: transaction checkpoint scrub dirty target */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1327
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1330
/*! transaction: transaction checkpoint scrub time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1328
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1331
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1329
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1332
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1330
+#define WT_STAT_CONN_TXN_CHECKPOINT 1333
/*!
* transaction: transaction checkpoints skipped because database was
* clean
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1331
+#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1334
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1332
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1335
/*!
* transaction: transaction fsync calls for checkpoint after allocating
* the transaction ID
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1333
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1336
/*!
* transaction: transaction fsync duration for checkpoint after
* allocating the transaction ID (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1334
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1337
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1335
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1338
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1336
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1339
/*!
* transaction: transaction range of IDs currently pinned by named
* snapshots
*/
-#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1337
+#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1340
/*! transaction: transaction range of timestamps currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1338
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1341
/*!
* transaction: transaction range of timestamps pinned by the oldest
* timestamp
*/
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1339
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1342
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1340
+#define WT_STAT_CONN_TXN_SYNC 1343
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1341
+#define WT_STAT_CONN_TXN_COMMIT 1344
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1342
+#define WT_STAT_CONN_TXN_ROLLBACK 1345
/*! transaction: update conflicts */
-#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1343
+#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1346
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
index 7050a66a558..4d9f6f92832 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
@@ -593,7 +593,7 @@ retry: if (F_ISSET(clsm, WT_CLSM_MERGE)) {
if (clsm->chunks != NULL && ngood < clsm->nchunks) {
close_range_start = ngood;
close_range_end = clsm->nchunks;
- } else if (!F_ISSET(clsm, WT_CLSM_OPEN_READ) && nupdates > 0 ) {
+ } else if (!F_ISSET(clsm, WT_CLSM_OPEN_READ) && nupdates > 0) {
close_range_start = 0;
close_range_end = WT_MIN(nchunks, clsm->nchunks);
if (close_range_end > nupdates)
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
index 1b92028072d..9a7ab20f18f 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
@@ -911,8 +911,7 @@ __wt_lsm_tree_drop(
int tret;
bool locked;
- locked = false;
- WT_NOT_READ(locked);
+ WT_NOT_READ(locked, false);
/* Get the LSM tree. */
WT_RET(__wt_lsm_tree_get(session, name, true, &lsm_tree));
@@ -971,8 +970,7 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session,
bool locked;
old = NULL;
- locked = false;
- WT_NOT_READ(locked);
+ WT_NOT_READ(locked, false);
/* Get the LSM tree. */
WT_RET(__wt_lsm_tree_get(session, olduri, true, &lsm_tree));
@@ -1043,8 +1041,7 @@ __wt_lsm_tree_truncate(
WT_UNUSED(cfg);
chunk = NULL;
- locked = false;
- WT_NOT_READ(locked);
+ WT_NOT_READ(locked, false);
/* Get the LSM tree. */
WT_RET(__wt_lsm_tree_get(session, name, true, &lsm_tree));
@@ -1382,8 +1379,8 @@ __wt_lsm_tree_worker(WT_SESSION_IMPL *session,
u_int i;
bool exclusive, locked, need_release;
- locked = need_release = false;
- WT_NOT_READ(locked);
+ WT_NOT_READ(locked, false);
+ WT_NOT_READ(need_release, false);
exclusive = FLD_ISSET(open_flags, WT_DHANDLE_EXCLUSIVE);
WT_RET(__wt_lsm_tree_get(session, uri, exclusive, &lsm_tree));
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
index dcb9b34802a..6f18f4fb152 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
@@ -345,8 +345,8 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
WT_TXN_ISOLATION saved_isolation;
bool flush_set, release_dhandle;
- flush_set = release_dhandle = false;
- WT_NOT_READ(flush_set);
+ WT_NOT_READ(flush_set, false);
+ release_dhandle = false;
/*
* If the chunk is already checkpointed, make sure it is also evicted.
@@ -360,8 +360,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
if (ret == 0)
chunk->evicted = 1;
else if (ret == EBUSY) {
- ret = 0;
- WT_NOT_READ(ret);
+ WT_NOT_READ(ret, 0);
} else
WT_RET_MSG(session, ret, "discard handle");
}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c
index ca810fa8d88..811c0576eef 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_fs.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c
@@ -97,7 +97,7 @@ __posix_directory_sync(WT_SESSION_IMPL *session, const char *path)
dir = tmp->mem;
strrchr(dir, '/')[1] = '\0';
- fd = -1; /* -Wconditional-uninitialized */
+ fd = 0; /* -Wconditional-uninitialized */
WT_SYSCALL_RETRY((
(fd = open(dir, O_RDONLY, 0444)) == -1 ? -1 : 0), ret);
if (ret != 0)
diff --git a/src/third_party/wiredtiger/src/os_win/os_dir.c b/src/third_party/wiredtiger/src/os_win/os_dir.c
index 3b78106f3b4..d5095e7ef78 100644
--- a/src/third_party/wiredtiger/src/os_win/os_dir.c
+++ b/src/third_party/wiredtiger/src/os_win/os_dir.c
@@ -59,30 +59,41 @@ __directory_list_worker(WT_FILE_SYSTEM *file_system,
WT_ERR(__wt_map_windows_error(windows_error));
}
- count = 0;
- do {
+ for (count = 0;;) {
/*
* Skip . and ..
*/
if (wcscmp(finddata.cFileName, L".") == 0 ||
wcscmp(finddata.cFileName, L"..") == 0)
- continue;
+ goto skip;
/* The list of files is optionally filtered by a prefix. */
if (prefix != NULL &&
wcsncmp(finddata.cFileName, prefix_wide->data,
prefix_widelen) != 0)
- continue;
+ goto skip;
WT_ERR(__wt_realloc_def(
session, &dirallocsz, count + 1, &entries));
-
WT_ERR(__wt_to_utf8_string(
session, finddata.cFileName, &file_utf8));
WT_ERR(__wt_strdup(session, file_utf8->data, &entries[count]));
++count;
__wt_scr_free(session, &file_utf8);
- } while (!single && FindNextFileW(findhandle, &finddata) != 0);
+
+ if (single)
+ break;
+
+skip: if (FindNextFileW(findhandle, &finddata) != 0)
+ continue;
+ windows_error = __wt_getlasterror();
+ if (windows_error == ERROR_NO_MORE_FILES)
+ break;
+ __wt_errx(session,
+ "%s: directory-list: FindNextFileW: %s",
+ pathbuf->data, __wt_formatmessage(session, windows_error));
+ WT_ERR(__wt_map_windows_error(windows_error));
+ }
*dirlistp = entries;
*countp = count;
diff --git a/src/third_party/wiredtiger/src/os_win/os_utf8.c b/src/third_party/wiredtiger/src/os_win/os_utf8.c
index 077c39db3ef..1c9efe39506 100644
--- a/src/third_party/wiredtiger/src/os_win/os_utf8.c
+++ b/src/third_party/wiredtiger/src/os_win/os_utf8.c
@@ -14,7 +14,7 @@
*/
int
__wt_to_utf16_string(
- WT_SESSION_IMPL *session, const char* utf8, WT_ITEM **outbuf)
+ WT_SESSION_IMPL *session, const char *utf8, WT_ITEM **outbuf)
{
DWORD windows_error;
int bufferSize;
@@ -50,7 +50,7 @@ __wt_to_utf16_string(
*/
int
__wt_to_utf8_string(
- WT_SESSION_IMPL *session, const wchar_t* wide, WT_ITEM **outbuf)
+ WT_SESSION_IMPL *session, const wchar_t *wide, WT_ITEM **outbuf)
{
DWORD windows_error;
int bufferSize;
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index f4d0fc0b1ef..1c46da9be10 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -1341,12 +1341,14 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
* started. The global commit point can move forward during
* reconciliation so we use a cached copy to avoid races when a
* concurrent transaction commits or rolls back while we are
- * examining its updates.
+ * examining its updates. As prepared transaction id's are
+ * globally visible, need to check the update state as well.
*/
if (F_ISSET(r, WT_REC_EVICT) &&
+ (upd->state != WT_UPDATE_STATE_READY ||
(F_ISSET(r, WT_REC_VISIBLE_ALL) ?
WT_TXNID_LE(r->last_running, txnid) :
- !__txn_visible_id(session, txnid))) {
+ !__txn_visible_id(session, txnid)))) {
uncommitted = r->update_uncommitted = true;
continue;
}
@@ -1783,12 +1785,12 @@ __rec_child_modify(WT_SESSION_IMPL *session,
/*
* If called during checkpoint, the child is being
* considered by the eviction server or the child is a
- * fast-delete page being read. The eviction may have
+ * truncated page being read. The eviction may have
* started before the checkpoint and so we must wait
* for the eviction to be resolved. I suspect we could
- * handle fast-delete reads, but we can't distinguish
- * between the two and fast-delete reads aren't expected
- * to be common.
+ * handle reads of truncated pages, but we can't
+ * distinguish between the two and reads of truncated
+ * pages aren't expected to be common.
*/
break;
@@ -5630,8 +5632,7 @@ build:
if (key_onpage_ovfl) {
WT_ERR(__wt_dsk_cell_data_ref(session,
WT_PAGE_ROW_LEAF, kpack, r->cur));
- key_onpage_ovfl = false;
- WT_NOT_READ(key_onpage_ovfl);
+ WT_NOT_READ(key_onpage_ovfl, false);
}
/*
diff --git a/src/third_party/wiredtiger/src/support/mtx_rw.c b/src/third_party/wiredtiger/src/support/mtx_rw.c
index 572592b9fbc..fd66a1a40bb 100644
--- a/src/third_party/wiredtiger/src/support/mtx_rw.c
+++ b/src/third_party/wiredtiger/src/support/mtx_rw.c
@@ -48,9 +48,8 @@
* uint8_t current; // Current ticket
* uint8_t next; // Next available ticket
* uint8_t reader; // Read queue ticket
- * uint8_t __notused; // Padding
- * uint16_t readers_active; // Count of active readers
- * uint16_t readers_queued; // Count of queued readers
+ * uint8_t readers_queued; // Count of queued readers
+ * uint32_t readers_active; // Count of active readers
* } s;
* } u;
*
@@ -75,6 +74,12 @@
* 'reader' to 'next' (i.e. readers are scheduled after any queued writers,
* avoiding starvation), then atomically incrementing 'readers_queued'.
*
+ * We limit how many readers can queue: we don't allow more readers to queue
+ * than there are active writers (calculated as `next - current`): otherwise,
+ * in write-heavy workloads, readers can keep queuing up in front of writers
+ * and throughput is unstable. The remaining read requests wait without any
+ * ordering.
+ *
* The 'next' field is a 1-byte value so the available ticket number wraps
* after 256 requests. If a thread's write lock request would cause the 'next'
* field to catch up with 'current', instead it waits to avoid the same ticket
@@ -173,12 +178,10 @@ __wt_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
int pause_cnt;
bool set_stats;
+ stats = NULL; /* -Wconditional-uninitialized */
+ time_start = time_stop = 0; /* -Wconditional-uninitialized */
+
WT_STAT_CONN_INCR(session, rwlock_read);
- stats = (int64_t **)S2C(session)->stats;
- set_stats = (l->stat_read_count_off != -1 && WT_STAT_ENABLED(session));
- time_start = time_stop = 0;
- if (set_stats)
- stats[session->stat_bucket][l->stat_read_count_off]++;
WT_DIAGNOSTIC_YIELD;
@@ -236,8 +239,12 @@ stall: __wt_cond_wait(session,
break;
}
- if (set_stats)
+ set_stats = (l->stat_read_count_off != -1 && WT_STAT_ENABLED(session));
+ if (set_stats) {
+ stats = (int64_t **)S2C(session)->stats;
+ stats[session->stat_bucket][l->stat_read_count_off]++;
time_start = __wt_clock(session);
+ }
/* Wait for our group to start. */
for (pause_cnt = 0; ticket != l->u.s.current; pause_cnt++) {
if (pause_cnt < 1000)
@@ -370,12 +377,10 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
int pause_cnt;
bool set_stats;
+ stats = NULL; /* -Wconditional-uninitialized */
+ time_start = time_stop = 0; /* -Wconditional-uninitialized */
+
WT_STAT_CONN_INCR(session, rwlock_write);
- stats = (int64_t **)S2C(session)->stats;
- set_stats = (l->stat_write_count_off != -1 && WT_STAT_ENABLED(session));
- time_start = time_stop = 0;
- if (set_stats)
- stats[session->stat_bucket][l->stat_write_count_off]++;
for (;;) {
old.u.v = l->u.v;
@@ -398,6 +403,12 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
break;
}
+ set_stats = (l->stat_write_count_off != -1 && WT_STAT_ENABLED(session));
+ if (set_stats) {
+ stats = (int64_t **)S2C(session)->stats;
+ stats[session->stat_bucket][l->stat_write_count_off]++;
+ time_start = __wt_clock(session);
+ }
/*
* Wait for our group to start and any readers to drain.
*
@@ -406,8 +417,6 @@ __wt_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *l)
* could see no readers active from a different batch and decide that
* we have the lock.
*/
- if (set_stats)
- time_start = __wt_clock(session);
for (pause_cnt = 0, old.u.v = l->u.v;
ticket != old.u.s.current || old.u.s.readers_active != 0;
pause_cnt++, old.u.v = l->u.v) {
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 40a07be0174..ae13f7d8abe 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -1052,6 +1052,9 @@ static const char * const __stats_connection_desc[] = {
"transaction: read timestamp queue inserts to head",
"transaction: read timestamp queue inserts total",
"transaction: read timestamp queue length",
+ "transaction: rollback to stable calls",
+ "transaction: rollback to stable updates aborted",
+ "transaction: rollback to stable updates removed from lookaside",
"transaction: set timestamp calls",
"transaction: set timestamp commit calls",
"transaction: set timestamp commit updates",
@@ -1438,6 +1441,9 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->txn_read_queue_head = 0;
stats->txn_read_queue_inserts = 0;
stats->txn_read_queue_len = 0;
+ stats->txn_rollback_to_stable = 0;
+ stats->txn_rollback_upd_aborted = 0;
+ stats->txn_rollback_las_removed = 0;
stats->txn_set_ts = 0;
stats->txn_set_ts_commit = 0;
stats->txn_set_ts_commit_upd = 0;
@@ -1956,6 +1962,12 @@ __wt_stat_connection_aggregate(
to->txn_read_queue_inserts +=
WT_STAT_READ(from, txn_read_queue_inserts);
to->txn_read_queue_len += WT_STAT_READ(from, txn_read_queue_len);
+ to->txn_rollback_to_stable +=
+ WT_STAT_READ(from, txn_rollback_to_stable);
+ to->txn_rollback_upd_aborted +=
+ WT_STAT_READ(from, txn_rollback_upd_aborted);
+ to->txn_rollback_las_removed +=
+ WT_STAT_READ(from, txn_rollback_las_removed);
to->txn_set_ts += WT_STAT_READ(from, txn_set_ts);
to->txn_set_ts_commit += WT_STAT_READ(from, txn_set_ts_commit);
to->txn_set_ts_commit_upd +=
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 561961f4e98..3a9b3755ff5 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -1394,8 +1394,7 @@ __wt_verbose_dump_txn_one(WT_SESSION_IMPL *session, WT_TXN *txn)
#endif
const char *iso_tag;
- iso_tag = "INVALID";
- WT_NOT_READ(iso_tag);
+ WT_NOT_READ(iso_tag, "INVALID");
switch (txn->isolation) {
case WT_ISO_READ_COMMITTED:
iso_tag = "WT_ISO_READ_COMMITTED";
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 1235bc8c2b2..d3f11c5fa69 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -748,6 +748,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ WT_DECL_TIMESTAMP(ckpt_tmp_ts)
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
WT_TXN_ISOLATION saved_isolation;
@@ -899,6 +900,15 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* checkpointing the metadata since we know that all files in the
* checkpoint are now in a consistent state.
*/
+#ifdef HAVE_TIMESTAMPS
+ /*
+ * Record the timestamp from the transaction if we were successful.
+ * Store it in a temp variable now because it will be invalidated during
+ * commit but we don't want to set it until we know the checkpoint
+ * is successful.
+ */
+ __wt_timestamp_set(&ckpt_tmp_ts, &txn->read_timestamp);
+#endif
WT_ERR(__wt_txn_commit(session, NULL));
/*
@@ -942,8 +952,13 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
*/
txn_global->checkpoint_state.pinned_id = WT_TXN_NONE;
- if (full)
+ if (full) {
__checkpoint_stats(session);
+#ifdef HAVE_TIMESTAMPS
+ __wt_timestamp_set(
+ &conn->txn_global.last_ckpt_timestamp, &ckpt_tmp_ts);
+#endif
+ }
err: /*
* Reset the timer so that next checkpoint tracks the progress only if
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index d31b3995092..eef2fde5284 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -74,6 +74,7 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
&rollback_timestamp, las_timestamp.data) < 0) {
WT_ERR(cursor->remove(cursor));
++remove_cnt;
+ WT_STAT_CONN_INCR(session, txn_rollback_las_removed);
} else
++las_total;
}
@@ -111,6 +112,7 @@ __txn_abort_newer_update(WT_SESSION_IMPL *session,
if (__wt_timestamp_cmp(
rollback_timestamp, &next_upd->timestamp) < 0) {
next_upd->txnid = WT_TXN_ABORTED;
+ WT_STAT_CONN_INCR(session, txn_rollback_upd_aborted);
__wt_timestamp_set_zero(&next_upd->timestamp);
/*
@@ -425,6 +427,7 @@ __wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[])
conn = S2C(session);
+ WT_STAT_CONN_INCR(session, txn_rollback_to_stable);
/*
* Mark that a rollback operation is in progress and wait for eviction
* to drain. This is necessary because lookaside eviction uses
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index 280425eb56e..2266a9cd6f5 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -239,7 +239,10 @@ __txn_global_query_timestamp(
break;
}
__wt_readunlock(session, &txn_global->commit_timestamp_rwlock);
- } else if (WT_STRING_MATCH("oldest", cval.str, cval.len)) {
+ } else if (WT_STRING_MATCH("last_checkpoint", cval.str, cval.len))
+ /* Read-only value forever. No lock needed. */
+ __wt_timestamp_set(&ts, &txn_global->last_ckpt_timestamp);
+ else if (WT_STRING_MATCH("oldest", cval.str, cval.len)) {
if (!txn_global->has_oldest_timestamp)
return (WT_NOTFOUND);
WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
diff --git a/src/third_party/wiredtiger/src/utilities/util_load.c b/src/third_party/wiredtiger/src/utilities/util_load.c
index 3ed21fc591c..dab24930fe6 100644
--- a/src/third_party/wiredtiger/src/utilities/util_load.c
+++ b/src/third_party/wiredtiger/src/utilities/util_load.c
@@ -124,8 +124,10 @@ load_dump(WT_SESSION *session)
"dump=%s%s%s",
hex ? "hex" : "print",
append ? ",append" : "",
- no_overwrite ? ",overwrite=false" : "")) != 0)
- return (util_err(session, ret, NULL));
+ no_overwrite ? ",overwrite=false" : "")) != 0) {
+ ret = util_err(session, ret, NULL);
+ goto err;
+ }
if ((ret = session->open_cursor(
session, uri, NULL, config, &cursor)) != 0) {
ret = util_err(session, ret, "%s: session.open_cursor", uri);
@@ -494,8 +496,10 @@ config_rename(WT_SESSION *session, char **urip, const char *name)
*p = '\0';
p = strchr(p + 1, ':');
if ((ret = __wt_snprintf(
- buf, len, "%s:%s%s", *urip, name, p == NULL ? "" : p)) != 0)
+ buf, len, "%s:%s%s", *urip, name, p == NULL ? "" : p)) != 0) {
+ free(buf);
return (util_err(session, ret, NULL));
+ }
*urip = buf;
return (0);
diff --git a/src/third_party/wiredtiger/test/bloom/test_bloom.c b/src/third_party/wiredtiger/test/bloom/test_bloom.c
index 10607a3719c..dcc7ab372a9 100644
--- a/src/third_party/wiredtiger/test/bloom/test_bloom.c
+++ b/src/third_party/wiredtiger/test/bloom/test_bloom.c
@@ -39,7 +39,8 @@ static struct {
uint32_t c_ops;
uint32_t c_k; /* Number of hash iterations */
uint32_t c_factor; /* Number of bits per item */
- uint32_t c_srand;
+
+ WT_RAND_STATE rand;
uint8_t **entries;
} g;
@@ -67,10 +68,9 @@ main(int argc, char *argv[])
g.c_key_max = 100;
g.c_k = 8;
g.c_factor = 16;
- g.c_srand = 3233456;
/* Set values from the command line. */
- while ((ch = __wt_getopt(progname, argc, argv, "c:f:k:o:s:")) != EOF)
+ while ((ch = __wt_getopt(progname, argc, argv, "c:f:k:o:")) != EOF)
switch (ch) {
case 'c': /* Cache size */
g.c_cache = (u_int)atoi(__wt_optarg);
@@ -78,15 +78,12 @@ main(int argc, char *argv[])
case 'f': /* Factor */
g.c_factor = (u_int)atoi(__wt_optarg);
break;
- case 'k': /* Number of hash functions */
+ case 'k': /* Number of hash functions */
g.c_k = (u_int)atoi(__wt_optarg);
break;
case 'o': /* Number of ops */
g.c_ops = (u_int)atoi(__wt_optarg);
break;
- case 's': /* Number of ops */
- g.c_srand = (u_int)atoi(__wt_optarg);
- break;
default:
usage();
}
@@ -184,7 +181,7 @@ run(void)
memset((void *)item.data, 'a', item.size);
for (i = 0, fp = 0; i < g.c_ops; i++) {
((uint8_t *)item.data)[i % item.size] =
- 'a' + ((uint8_t)rand() % 26);
+ 'a' + (__wt_random(&g.rand) % 26);
if ((ret = __wt_bloom_get(bloomp, &item)) == 0)
++fp;
if (ret != 0 && ret != WT_NOTFOUND)
@@ -219,14 +216,14 @@ populate_entries(void)
uint32_t i, j;
uint8_t **entries;
- srand(g.c_srand);
+ __wt_random_init_seed(NULL, &g.rand);
entries = dcalloc(g.c_ops, sizeof(uint8_t *));
for (i = 0; i < g.c_ops; i++) {
entries[i] = dcalloc(g.c_key_max, sizeof(uint8_t));
for (j = 0; j < g.c_key_max; j++)
- entries[i][j] = 'a' + ((uint8_t)rand() % 26);
+ entries[i][j] = 'a' + (__wt_random(&g.rand) % 26);
}
g.entries = entries;
@@ -239,13 +236,12 @@ populate_entries(void)
void
usage(void)
{
- fprintf(stderr, "usage: %s [-cfkos]\n", progname);
+ fprintf(stderr, "usage: %s [-cfko]\n", progname);
fprintf(stderr, "%s",
"\t-c cache size\n"
"\t-f number of bits per item\n"
"\t-k size of entry strings\n"
- "\t-o number of operations to perform\n"
- "\t-s random seed for run\n");
+ "\t-o number of operations to perform\n");
exit(EXIT_FAILURE);
}
diff --git a/src/third_party/wiredtiger/test/csuite/rwlock/main.c b/src/third_party/wiredtiger/test/csuite/rwlock/main.c
index e1d00344ee2..f69628dca40 100644
--- a/src/third_party/wiredtiger/test/csuite/rwlock/main.c
+++ b/src/third_party/wiredtiger/test/csuite/rwlock/main.c
@@ -171,8 +171,8 @@ thread_dump(void *arg) {
sleep(1);
printf("\n"
"rwlock { current %" PRIu8 ", next %" PRIu8
- ", reader %" PRIu8 ", readers_active %" PRIu16
- ", readers_queued %" PRIu16 " }\n",
+ ", reader %" PRIu8 ", readers_active %" PRIu32
+ ", readers_queued %" PRIu8 " }\n",
rwlock.u.s.current,
rwlock.u.s.next,
rwlock.u.s.reader,
diff --git a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
index a15baa0ba32..8a1781eae45 100644
--- a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
+++ b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
@@ -64,7 +64,7 @@ static char home[1024]; /* Program working dir */
#define MIN_TH 5
#define MIN_TIME 10
#define PREPARE_FREQ 5
-#define PREPARE_YIELD PREPARE_FREQ * 10
+#define PREPARE_YIELD (PREPARE_FREQ * 10)
#define RECORDS_FILE "records-%" PRIu32
#define STABLE_PERIOD 100
@@ -184,10 +184,11 @@ thread_ckpt_run(void *arg)
WT_RAND_STATE rnd;
WT_SESSION *session;
THREAD_DATA *td;
- uint64_t ts;
+ uint64_t stable;
uint32_t sleep_time;
int i;
bool first_ckpt;
+ char buf[128];
__wt_random_init(&rnd);
@@ -198,20 +199,20 @@ thread_ckpt_run(void *arg)
(void)unlink(ckpt_file);
testutil_check(td->conn->open_session(td->conn, NULL, NULL, &session));
first_ckpt = true;
- ts = 0;
for (i = 0; ;++i) {
sleep_time = __wt_random(&rnd) % MAX_CKPT_INVL;
sleep(sleep_time);
- if (use_ts)
- ts = global_ts;
/*
* Since this is the default, send in this string even if
* running without timestamps.
*/
testutil_check(session->checkpoint(
session, "use_timestamp=true"));
- printf("Checkpoint %d complete. Minimum ts %" PRIu64 "\n",
- i, ts);
+ testutil_check(td->conn->query_timestamp(
+ td->conn, buf, "get=last_checkpoint"));
+ sscanf(buf, "%" SCNx64, &stable);
+ printf("Checkpoint %d complete at stable %"
+ PRIu64 ".\n", i, stable);
fflush(stdout);
/*
* Create the checkpoint file so that the parent process knows
@@ -638,7 +639,7 @@ main(int argc, char *argv[])
use_ts ? "true" : "false");
printf("Parent: Create %" PRIu32
" threads; sleep %" PRIu32 " seconds\n", nth, timeout);
- printf("CONFIG: %s%s%s%s -h %s -T %" PRIu32 "-t %" PRIu32 "\n",
+ printf("CONFIG: %s%s%s%s -h %s -T %" PRIu32 " -t %" PRIu32 "\n",
progname,
compat ? " -C" : "",
inmem ? " -m" : "",
diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c
index 8d85d331c89..d46b0868887 100644
--- a/src/third_party/wiredtiger/test/format/config.c
+++ b/src/third_party/wiredtiger/test/format/config.c
@@ -191,9 +191,13 @@ config_setup(void)
/*
* Turn off truncate for LSM runs (some configurations with truncate
* always results in a timeout).
+ *
+ * WiredTiger doesn't currently support truncate and prepare at the
+ * same time, see WT-3922. For now, pick one on each run.
*/
- if (!config_is_perm("truncate") && DATASOURCE("lsm"))
- config_single("truncate=off", 0);
+ if (!config_is_perm("truncate"))
+ if (DATASOURCE("lsm") || mmrand(NULL, 0, 1) == 1)
+ config_single("truncate=off", 0);
/* Give Helium configuration a final review. */
if (DATASOURCE("helium"))
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index 4c54972516e..596d952dcc6 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -403,10 +403,8 @@ snap_check(WT_CURSOR *cursor,
break;
case WT_NOTFOUND:
break;
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
default:
- testutil_die(ret, "WT_CURSOR.search");
+ return (ret);
}
/* Check for simple matches. */
@@ -644,6 +642,19 @@ prepare_transaction(TINFO *tinfo, WT_SESSION *session)
}
/*
+ * OP_FAILED --
+ * General error handling.
+ */
+#define OP_FAILED(notfound_ok) do { \
+ positioned = false; \
+ if (intxn && (ret == WT_CACHE_FULL || ret == WT_ROLLBACK)) \
+ goto deadlock; \
+ testutil_assert((notfound_ok && ret == WT_NOTFOUND) || \
+ ret == WT_CACHE_FULL || \
+ ret == WT_PREPARE_CONFLICT || ret == WT_ROLLBACK); \
+} while (0)
+
+/*
* ops --
* Per-thread operations.
*/
@@ -825,11 +836,8 @@ ops(void *arg)
if (ret == 0) {
positioned = true;
SNAP_TRACK(READ, tinfo);
- } else {
- if (ret == WT_ROLLBACK && intxn)
- goto deadlock;
- testutil_assert(ret == WT_NOTFOUND);
- }
+ } else
+ OP_FAILED(true);
}
/* Optionally reserve a row. */
@@ -847,12 +855,8 @@ ops(void *arg)
positioned = true;
__wt_yield(); /* Let other threads proceed. */
- } else {
- positioned = false;
- if (ret == WT_ROLLBACK && intxn)
- goto deadlock;
- testutil_assert(ret == WT_NOTFOUND);
- }
+ } else
+ OP_FAILED(true);
}
/* Perform the operation. */
@@ -881,11 +885,8 @@ ops(void *arg)
if (ret == 0) {
++tinfo->insert;
SNAP_TRACK(INSERT, tinfo);
- } else {
- if (ret == WT_ROLLBACK && intxn)
- goto deadlock;
- testutil_assert(ret == WT_ROLLBACK);
- }
+ } else
+ OP_FAILED(false);
break;
case MODIFY:
/*
@@ -907,13 +908,8 @@ ops(void *arg)
if (ret == 0) {
positioned = true;
SNAP_TRACK(MODIFY, tinfo);
- } else {
- positioned = false;
- if (ret == WT_ROLLBACK && intxn)
- goto deadlock;
- testutil_assert(
- ret == WT_NOTFOUND || ret == WT_ROLLBACK);
- }
+ } else
+ OP_FAILED(true);
break;
case READ:
++tinfo->search;
@@ -921,12 +917,8 @@ ops(void *arg)
if (ret == 0) {
positioned = true;
SNAP_TRACK(READ, tinfo);
- } else {
- positioned = false;
- if (ret == WT_ROLLBACK && intxn)
- goto deadlock;
- testutil_assert(ret == WT_NOTFOUND);
- }
+ } else
+ OP_FAILED(true);
break;
case REMOVE:
remove_instead_of_truncate:
@@ -946,12 +938,8 @@ remove_instead_of_truncate:
* previous state, but not necessarily set.
*/
SNAP_TRACK(REMOVE, tinfo);
- } else {
- positioned = false;
- if (ret == WT_ROLLBACK && intxn)
- goto deadlock;
- testutil_assert(ret == WT_NOTFOUND);
- }
+ } else
+ OP_FAILED(true);
break;
case TRUNCATE:
/*
@@ -1020,11 +1008,8 @@ remove_instead_of_truncate:
if (ret == 0) {
++tinfo->truncate;
SNAP_TRACK(TRUNCATE, tinfo);
- } else {
- testutil_assert(ret == WT_ROLLBACK);
- if (intxn)
- goto deadlock;
- }
+ } else
+ OP_FAILED(false);
break;
case UPDATE:
update_instead_of_chosen_op:
@@ -1041,12 +1026,8 @@ update_instead_of_chosen_op:
if (ret == 0) {
positioned = true;
SNAP_TRACK(UPDATE, tinfo);
- } else {
- positioned = false;
- if (ret == WT_ROLLBACK && intxn)
- goto deadlock;
- testutil_assert(ret == WT_ROLLBACK);
- }
+ } else
+ OP_FAILED(false);
break;
}
@@ -1061,9 +1042,8 @@ update_instead_of_chosen_op:
for (i = 0; i < j; ++i) {
if ((ret = nextprev(tinfo, cursor, next)) == 0)
continue;
- if (ret == WT_ROLLBACK && intxn)
- goto deadlock;
- testutil_assert(ret == WT_NOTFOUND);
+
+ OP_FAILED(true);
break;
}
}
@@ -1090,9 +1070,11 @@ update_instead_of_chosen_op:
goto deadlock;
}
- /* Prepare the transaction 10% of the time. */
- /* XXX: CONFIGURE PREPARE OFF FOR NOW */
- if (mmrand(&tinfo->rnd, 1, 10) == 0) {
+ /*
+ * Prepare the transaction 10% of the time.
+ * Currently doesn't work with truncation, see WT-3922.
+ */
+ if (g.c_truncate == 0 && mmrand(&tinfo->rnd, 1, 10) == 1) {
ret = prepare_transaction(tinfo, session);
testutil_assert(ret == 0 || ret == WT_PREPARE_CONFLICT);
if (ret == WT_PREPARE_CONFLICT)
@@ -1138,7 +1120,7 @@ deadlock: ++tinfo->deadlock;
/*
* wts_read_scan --
- * Read and verify all elements in a file.
+ * Read and verify a subset of the elements in a file.
*/
void
wts_read_scan(void)
@@ -1182,6 +1164,7 @@ wts_read_scan(void)
case 0:
case WT_NOTFOUND:
case WT_ROLLBACK:
+ case WT_PREPARE_CONFLICT:
break;
default:
testutil_die(
@@ -1209,11 +1192,6 @@ read_row_worker(
session = cursor->session;
- /* Log the operation */
- if (g.logging == LOG_OPS)
- (void)g.wt_api->msg_printf(g.wt_api,
- session, "%-10s%" PRIu64, "read", keyno);
-
/* Retrieve the key/value pair by key. */
switch (g.type) {
case FIX:
@@ -1254,12 +1232,15 @@ read_row_worker(
value->size = 1;
}
break;
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
default:
- testutil_die(ret, "read_row: read row %" PRIu64, keyno);
+ return (ret);
}
+ /* Log the operation */
+ if (g.logging == LOG_OPS)
+ (void)g.wt_api->msg_printf(g.wt_api,
+ session, "%-10s%" PRIu64, "read", keyno);
+
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
return (ret);
@@ -1394,24 +1375,39 @@ nextprev(TINFO *tinfo, WT_CURSOR *cursor, bool next)
break;
case WT_NOTFOUND:
break;
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
default:
- testutil_die(ret, "%s", which);
+ return (ret);
}
+ if (ret == 0 && g.logging == LOG_OPS)
+ switch (g.type) {
+ case FIX:
+ (void)g.wt_api->msg_printf(g.wt_api,
+ cursor->session, "%-10s%" PRIu64 " {0x%02x}",
+ which, keyno, ((char *)value.data)[0]);
+ break;
+ case ROW:
+ (void)g.wt_api->msg_printf(g.wt_api,
+ cursor->session, "%-10s{%.*s}, {%.*s}",
+ which, (int)key.size, (char *)key.data,
+ (int)value.size, (char *)value.data);
+ break;
+ case VAR:
+ (void)g.wt_api->msg_printf(g.wt_api,
+ cursor->session, "%-10s%" PRIu64 " {%.*s}",
+ which, keyno, (int)value.size, (char *)value.data);
+ break;
+ }
+
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
return (ret);
{
WT_ITEM bdb_key, bdb_value;
- WT_SESSION *session;
int notfound;
char *p;
- session = cursor->session;
-
/* Retrieve the BDB key/value. */
bdb_np(next, &bdb_key.data, &bdb_key.size,
&bdb_value.data, &bdb_value.size, &notfound);
@@ -1444,26 +1440,6 @@ mismatch: if (g.type == ROW) {
print_item(" wt-value", &value);
testutil_die(0, NULL);
}
-
- if (g.logging == LOG_OPS)
- switch (g.type) {
- case FIX:
- (void)g.wt_api->msg_printf(g.wt_api,
- session, "%-10s%" PRIu64 " {0x%02x}", which,
- keyno, ((char *)value.data)[0]);
- break;
- case ROW:
- (void)g.wt_api->msg_printf(
- g.wt_api, session, "%-10s{%.*s}, {%.*s}", which,
- (int)key.size, (char *)key.data,
- (int)value.size, (char *)value.data);
- break;
- case VAR:
- (void)g.wt_api->msg_printf(g.wt_api, session,
- "%-10s%" PRIu64 " {%.*s}", which,
- keyno, (int)value.size, (char *)value.data);
- break;
- }
}
#endif
return (ret);
@@ -1483,24 +1459,14 @@ row_reserve(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
cursor->set_key(cursor, tinfo->key);
}
+ if ((ret = cursor->reserve(cursor)) != 0)
+ return (ret);
+
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
"%-10s{%.*s}", "reserve",
(int)tinfo->key->size, tinfo->key->data);
- switch (ret = cursor->reserve(cursor)) {
- case 0:
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- case WT_NOTFOUND:
- return (WT_NOTFOUND);
- default:
- testutil_die(ret,
- "row_reserve: reserve row %" PRIu64 " by key",
- tinfo->keyno);
- }
return (0);
}
@@ -1516,21 +1482,13 @@ col_reserve(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
if (!positioned)
cursor->set_key(cursor, tinfo->keyno);
+ if ((ret = cursor->reserve(cursor)) != 0)
+ return (ret);
+
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
"%-10s%" PRIu64, "reserve", tinfo->keyno);
- switch (ret = cursor->reserve(cursor)) {
- case 0:
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- case WT_NOTFOUND:
- return (WT_NOTFOUND);
- default:
- testutil_die(ret, "col_reserve: %" PRIu64, tinfo->keyno);
- }
return (0);
}
@@ -1577,19 +1535,10 @@ row_modify(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
}
modify_build(tinfo, entries, &nentries);
- switch (ret = cursor->modify(cursor, entries, nentries)) {
- case 0:
- testutil_check(cursor->get_value(cursor, tinfo->value));
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- case WT_NOTFOUND:
- return (WT_NOTFOUND);
- default:
- testutil_die(ret,
- "row_modify: modify row %" PRIu64 " by key", tinfo->keyno);
- }
+ if ((ret = cursor->modify(cursor, entries, nentries)) != 0)
+ return (ret);
+
+ testutil_check(cursor->get_value(cursor, tinfo->value));
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
@@ -1624,25 +1573,16 @@ col_modify(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
cursor->set_key(cursor, tinfo->keyno);
modify_build(tinfo, entries, &nentries);
- switch (ret = cursor->modify(cursor, entries, nentries)) {
- case 0:
- testutil_check(cursor->get_value(cursor, tinfo->value));
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- case WT_NOTFOUND:
- return (WT_NOTFOUND);
- default:
- testutil_die(ret,
- "col_modify: modify row %" PRIu64, tinfo->keyno);
- }
+ if ((ret = cursor->modify(cursor, entries, nentries)) != 0)
+ return (ret);
+
+ testutil_check(cursor->get_value(cursor, tinfo->value));
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
- "%-10s{%.*s}, {%.*s}",
+ "%-10s%" PRIu64 ", {%.*s}",
"modify",
- (int)tinfo->key->size, tinfo->key->data,
+ tinfo->keyno,
(int)tinfo->value->size, tinfo->value->data);
#ifdef HAVE_BERKELEY_DB
@@ -1698,24 +1638,15 @@ row_truncate(TINFO *tinfo, WT_CURSOR *cursor)
testutil_check(c2->close(c2));
}
+ if (ret != 0)
+ return (ret);
+
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, session,
"%-10s%" PRIu64 ", %" PRIu64,
"truncate",
tinfo->keyno, tinfo->last);
- switch (ret) {
- case 0:
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- default:
- testutil_die(ret,
- "row_truncate: row %" PRIu64 "-%" PRIu64,
- tinfo->keyno, tinfo->last);
- }
-
#ifdef HAVE_BERKELEY_DB
if (SINGLETHREADED)
bdb_truncate(tinfo->keyno, tinfo->last);
@@ -1724,49 +1655,6 @@ row_truncate(TINFO *tinfo, WT_CURSOR *cursor)
}
/*
- * row_update --
- * Update a row in a row-store file.
- */
-static int
-row_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
-{
- WT_DECL_RET;
-
- if (!positioned) {
- key_gen(tinfo->key, tinfo->keyno);
- cursor->set_key(cursor, tinfo->key);
- }
- val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno);
- cursor->set_value(cursor, tinfo->value);
-
- if (g.logging == LOG_OPS)
- (void)g.wt_api->msg_printf(g.wt_api, cursor->session,
- "%-10s{%.*s}, {%.*s}",
- "put",
- (int)tinfo->key->size, tinfo->key->data,
- (int)tinfo->value->size, tinfo->value->data);
-
- switch (ret = cursor->update(cursor)) {
- case 0:
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- default:
- testutil_die(ret,
- "row_update: update row %" PRIu64 " by key", tinfo->keyno);
- }
-
-#ifdef HAVE_BERKELEY_DB
- if (SINGLETHREADED)
- bdb_update(
- tinfo->key->data, tinfo->key->size,
- tinfo->value->data, tinfo->value->size);
-#endif
- return (0);
-}
-
-/*
* col_truncate --
* Truncate rows in a column-store file.
*/
@@ -1802,6 +1690,8 @@ col_truncate(TINFO *tinfo, WT_CURSOR *cursor)
ret = session->truncate(session, NULL, cursor, c2, NULL);
testutil_check(c2->close(c2));
}
+ if (ret != 0)
+ return (ret);
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, session,
@@ -1809,21 +1699,44 @@ col_truncate(TINFO *tinfo, WT_CURSOR *cursor)
"truncate",
tinfo->keyno, tinfo->last);
- switch (ret) {
- case 0:
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- default:
- testutil_die(ret,
- "col_truncate: row %" PRIu64 "-%" PRIu64,
- tinfo->keyno, tinfo->last);
+#ifdef HAVE_BERKELEY_DB
+ if (SINGLETHREADED)
+ bdb_truncate(tinfo->keyno, tinfo->last);
+#endif
+ return (0);
+}
+
+/*
+ * row_update --
+ * Update a row in a row-store file.
+ */
+static int
+row_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
+{
+ WT_DECL_RET;
+
+ if (!positioned) {
+ key_gen(tinfo->key, tinfo->keyno);
+ cursor->set_key(cursor, tinfo->key);
}
+ val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno);
+ cursor->set_value(cursor, tinfo->value);
+
+ if ((ret = cursor->update(cursor)) != 0)
+ return (ret);
+
+ if (g.logging == LOG_OPS)
+ (void)g.wt_api->msg_printf(g.wt_api, cursor->session,
+ "%-10s{%.*s}, {%.*s}",
+ "put",
+ (int)tinfo->key->size, tinfo->key->data,
+ (int)tinfo->value->size, tinfo->value->data);
#ifdef HAVE_BERKELEY_DB
if (SINGLETHREADED)
- bdb_truncate(tinfo->keyno, tinfo->last);
+ bdb_update(
+ tinfo->key->data, tinfo->key->size,
+ tinfo->value->data, tinfo->value->size);
#endif
return (0);
}
@@ -1845,6 +1758,9 @@ col_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
else
cursor->set_value(cursor, tinfo->value);
+ if ((ret = cursor->update(cursor)) != 0)
+ return (ret);
+
if (g.logging == LOG_OPS) {
if (g.type == FIX)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
@@ -1859,16 +1775,6 @@ col_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
(char *)tinfo->value->data);
}
- switch (ret = cursor->update(cursor)) {
- case 0:
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- default:
- testutil_die(ret, "col_update: %" PRIu64, tinfo->keyno);
- }
-
#ifdef HAVE_BERKELEY_DB
if (SINGLETHREADED) {
key_gen(tinfo->key, tinfo->keyno);
@@ -1999,6 +1905,9 @@ row_insert(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno);
cursor->set_value(cursor, tinfo->value);
+ if ((ret = cursor->insert(cursor)) != 0)
+ return (ret);
+
/* Log the operation */
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
@@ -2007,17 +1916,6 @@ row_insert(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
(int)tinfo->key->size, tinfo->key->data,
(int)tinfo->value->size, tinfo->value->data);
- switch (ret = cursor->insert(cursor)) {
- case 0:
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- default:
- testutil_die(ret,
- "row_insert: insert row %" PRIu64 " by key", tinfo->keyno);
- }
-
#ifdef HAVE_BERKELEY_DB
if (SINGLETHREADED)
bdb_update(
@@ -2041,15 +1939,10 @@ col_insert(TINFO *tinfo, WT_CURSOR *cursor)
cursor->set_value(cursor, *(uint8_t *)tinfo->value->data);
else
cursor->set_value(cursor, tinfo->value);
- switch (ret = cursor->insert(cursor)) {
- case 0:
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- default:
- testutil_die(ret, "cursor.insert");
- }
+
+ if ((ret = cursor->insert(cursor)) != 0)
+ return (ret);
+
testutil_check(cursor->get_key(cursor, &tinfo->keyno));
table_append(tinfo->keyno); /* Extend the object. */
@@ -2093,23 +1986,16 @@ row_remove(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
cursor->set_key(cursor, tinfo->key);
}
- if (g.logging == LOG_OPS)
- (void)g.wt_api->msg_printf(g.wt_api,
- cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno);
-
/* We use the cursor in overwrite mode, check for existence. */
if ((ret = cursor->search(cursor)) == 0)
ret = cursor->remove(cursor);
- switch (ret) {
- case 0:
- case WT_NOTFOUND:
- break;
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- default:
- testutil_die(ret,
- "row_remove: remove %" PRIu64 " by key", tinfo->keyno);
- }
+
+ if (ret != 0 && ret != WT_NOTFOUND)
+ return (ret);
+
+ if (g.logging == LOG_OPS)
+ (void)g.wt_api->msg_printf(g.wt_api,
+ cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno);
#ifdef HAVE_BERKELEY_DB
if (SINGLETHREADED) {
@@ -2134,23 +2020,16 @@ col_remove(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
if (!positioned)
cursor->set_key(cursor, tinfo->keyno);
- if (g.logging == LOG_OPS)
- (void)g.wt_api->msg_printf(g.wt_api,
- cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno);
-
/* We use the cursor in overwrite mode, check for existence. */
if ((ret = cursor->search(cursor)) == 0)
ret = cursor->remove(cursor);
- switch (ret) {
- case 0:
- case WT_NOTFOUND:
- break;
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- default:
- testutil_die(ret,
- "col_remove: remove %" PRIu64 " by key", tinfo->keyno);
- }
+
+ if (ret != 0 && ret != WT_NOTFOUND)
+ return (ret);
+
+ if (g.logging == LOG_OPS)
+ (void)g.wt_api->msg_printf(g.wt_api,
+ cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno);
#ifdef HAVE_BERKELEY_DB
if (SINGLETHREADED) {
diff --git a/src/third_party/wiredtiger/test/suite/test_bug019.py b/src/third_party/wiredtiger/test/suite/test_bug019.py
new file mode 100644
index 00000000000..202ca6b6b60
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_bug019.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2018 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import fnmatch, os, time
+import wiredtiger, wttest
+from wtdataset import SimpleDataSet
+
+# test_bug019.py
+# Test that pre-allocating log files only pre-allocates a small number.
+class test_bug019(wttest.WiredTigerTestCase):
+ conn_config = 'log=(enabled,file_max=100K)'
+ uri = "table:bug019"
+ entries = 100000
+
+ # Modify rows so we write log records. We're writing a lot more than a
+ # single log file, so we know the underlying library will churn through
+ # log files.
+ def populate(self, nentries):
+ c = self.session.open_cursor(self.uri, None, None)
+ for i in range(0, nentries):
+ c[i] = i
+ c.close()
+
+ # Wait for a log file to be pre-allocated. Avoid timing problems, but
+ # assert a file is created within 30 seconds.
+ def prepfiles(self):
+ for i in range(1,30):
+ f = fnmatch.filter(os.listdir('.'), "*Prep*")
+ if f:
+ return f
+ time.sleep(1)
+ self.assertFalse(not f)
+
+ # There was a bug where pre-allocated log files accumulated on
+ # Windows systems due to an issue with the directory list code.
+ def test_bug019(self):
+ # Create a table just to write something into the log.
+ self.session.create(self.uri, 'key_format=i,value_format=i')
+ self.populate(self.entries)
+ self.session.checkpoint()
+
+ # Loop, making sure pre-allocation is working and the range is moving.
+ older = self.prepfiles()
+ for i in range(1, 10):
+ self.populate(self.entries)
+ newer = self.prepfiles()
+
+ # Files can be returned in any order when reading a directory, older
+ # pre-allocated files can persist longer than newer files when newer
+ # files are returned first. Confirm files are being consumed.
+ self.assertFalse(set(older) < set(newer))
+
+ older = newer
+ self.session.checkpoint()
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_cursor13.py b/src/third_party/wiredtiger/test/suite/test_cursor13.py
index 27884b6726c..35a841ed78d 100644
--- a/src/third_party/wiredtiger/test/suite/test_cursor13.py
+++ b/src/third_party/wiredtiger/test/suite/test_cursor13.py
@@ -509,7 +509,7 @@ class test_cursor13_sweep(test_cursor13_big_base):
swept = end_sweep_stats[3] - begin_sweep_stats[3]
# Although this is subject to tuning parameters, we know that
- # in an active sesssion, we'll sweep through minimum of 1% of
+ # in an active session, we'll sweep through minimum of 1% of
# the cached cursors per second. We've set this test to run
# 5 rounds. In 2 of the 5 rounds (sandwiched between the others),
# some of the uris are allowed to close. So during the 'closing rounds'
diff --git a/src/third_party/wiredtiger/test/suite/test_cursor14.py b/src/third_party/wiredtiger/test/suite/test_cursor14.py
new file mode 100644
index 00000000000..25bd0cec00a
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_cursor14.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2018 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+from wtdataset import SimpleDataSet, ComplexDataSet, ComplexLSMDataSet
+from wtscenario import make_scenarios
+
+# test_cursor14.py
+# Test that more than 64K cursors can be opened on a data source
+class test_cursor14(wttest.WiredTigerTestCase):
+ scenarios = make_scenarios([
+ ('file-r', dict(type='file:', keyfmt='r', dataset=SimpleDataSet)),
+ ('file-S', dict(type='file:', keyfmt='S', dataset=SimpleDataSet)),
+ ('lsm-S', dict(type='lsm:', keyfmt='S', dataset=SimpleDataSet)),
+ ('table-r', dict(type='table:', keyfmt='r', dataset=SimpleDataSet)),
+ ('table-S', dict(type='table:', keyfmt='S', dataset=SimpleDataSet)),
+ ('table-r-complex', dict(type='table:', keyfmt='r',
+ dataset=ComplexDataSet)),
+ ('table-S-complex', dict(type='table:', keyfmt='S',
+ dataset=ComplexDataSet)),
+ ('table-S-complex-lsm', dict(type='table:', keyfmt='S',
+ dataset=ComplexLSMDataSet)),
+ ])
+
+ def test_cursor14(self):
+ uri = self.type + 'cursor14'
+
+ ds = self.dataset(self, uri, 100, key_format=self.keyfmt)
+ ds.populate()
+
+ for i in xrange(66000):
+ cursor = self.session.open_cursor(uri, None, None)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_prepare04.py b/src/third_party/wiredtiger/test/suite/test_prepare04.py
new file mode 100644
index 00000000000..af5dd12b1e5
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_prepare04.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2018 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_prepare04.py
+# Prepare: prepare conflict with update and read operations
+#
+
+import random
+from suite_subprocess import suite_subprocess
+import wiredtiger, wttest
+from wtscenario import make_scenarios
+
+def timestamp_str(t):
+ return '%x' % t
+
+class test_prepare04(wttest.WiredTigerTestCase, suite_subprocess):
+ tablename = 'test_prepare_cursor'
+ uri = 'table:' + tablename
+ before_ts = timestamp_str(150)
+ prepare_ts = timestamp_str(200)
+ after_ts = timestamp_str(250)
+
+ types = [
+ ('col', dict(extra_config=',log=(enabled=false),key_format=r')),
+ ('lsm', dict(extra_config=',log=(enabled=false),type=lsm')),
+ ('row', dict(extra_config=',log=(enabled=false)')),
+ ]
+
+ # Various begin_transaction config
+ txncfg = [
+ ('before_ts', dict(txn_config='isolation=snapshot,read_timestamp=' + before_ts, after_ts=False)),
+ ('after_ts', dict(txn_config='isolation=snapshot,read_timestamp=' + after_ts, after_ts=True)),
+ ('no_ts', dict(txn_config='isolation=snapshot', after_ts=True)),
+ ]
+
+ preparecfg = [
+ ('ignore_false', dict(ignore_config=',ignore_prepare=false', ignore=False)),
+ ('ignore_true', dict(ignore_config=',ignore_prepare=true', ignore=True)),
+ ]
+ conn_config = 'log=(enabled)'
+
+ scenarios = make_scenarios(types, txncfg, preparecfg)
+
+ def test_prepare_conflict(self):
+ if not wiredtiger.timestamp_build():
+ self.skipTest('requires a timestamp build')
+
+ self.session.create(self.uri,
+ 'key_format=i,value_format=i' + self.extra_config)
+ c = self.session.open_cursor(self.uri)
+
+ # Insert keys 1..100 each with timestamp=key, in some order
+ orig_keys = range(1, 101)
+ keys = orig_keys[:]
+ random.shuffle(keys)
+
+ k = 1
+ self.session.begin_transaction()
+ c[k] = 1
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(100))
+
+ # Everything up to and including timestamp 100 has been committed.
+ self.assertTimestampsEqual(self.conn.query_timestamp(), timestamp_str(100))
+
+ # Bump the oldest timestamp, we're not going back...
+ self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(100))
+
+ # make prepared updates.
+ k = 1
+ self.session.begin_transaction('isolation=snapshot')
+ c.set_key(1)
+ c.set_value(2)
+ c.update()
+ self.session.prepare_transaction('prepare_timestamp=' + self.prepare_ts)
+ conflictmsg = '/conflict between concurrent operations/'
+ preparemsg = '/conflict with a prepared update/'
+
+ #'''
+ # Verify data visibility from a different session/transaction.
+ s_other = self.conn.open_session()
+ c_other = s_other.open_cursor(self.uri, None)
+ s_other.begin_transaction(self.txn_config + self.ignore_config)
+ c_other.set_key(1)
+ if self.ignore == False and self.after_ts == True:
+ self.assertRaises(wiredtiger.WiredTigerError, lambda:c_other.search())
+ else:
+ c_other.search()
+ self.assertTrue(c_other.get_value() == 1)
+ c_other.set_value(3)
+ self.assertRaises(wiredtiger.WiredTigerError, lambda:c_other.update())
+ s_other.commit_transaction()
+ #'''
+
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(300))
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp04.py b/src/third_party/wiredtiger/test/suite/test_timestamp04.py
index 48ec7fac9a6..83ed4e904a6 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp04.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp04.py
@@ -32,6 +32,7 @@
from suite_subprocess import suite_subprocess
import wiredtiger, wttest
+from wiredtiger import stat
from wtscenario import make_scenarios
def timestamp_str(t):
@@ -98,7 +99,7 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
def ConnectionOpen(self, cacheSize):
self.home = '.'
- conn_params = 'create,' + \
+ conn_params = 'create,statistics=(fast),' + \
cacheSize + ',error_prefix="%s" %s' % (self.shortid(), self.conn_config)
try:
self.conn = wiredtiger.wiredtiger_open(self.home, conn_params)
@@ -164,6 +165,12 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
stable_ts = timestamp_str(key_range / 2)
self.conn.set_timestamp('stable_timestamp=' + stable_ts)
self.conn.rollback_to_stable()
+ stat_cursor = self.session.open_cursor('statistics:', None, None)
+ calls = stat_cursor[stat.conn.txn_rollback_to_stable][2]
+ upd_aborted = stat_cursor[stat.conn.txn_rollback_upd_aborted][2]
+ stat_cursor.close()
+ self.assertEqual(calls, 1)
+ self.assertTrue(upd_aborted >= key_range/2)
# Check that we see the inserted value (i.e. 1) for all the keys in
# non-timestamp tables.
@@ -224,9 +231,20 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
# Scenario: 4
# Advance the stable_timestamp by a quarter range and rollback.
# Three-fourths of the later timestamps will be rolled back.
- stable_ts = timestamp_str(key_range + key_range / 4)
+ rolled_range = key_range + key_range / 4
+ stable_ts = timestamp_str(rolled_range)
self.conn.set_timestamp('stable_timestamp=' + stable_ts)
self.conn.rollback_to_stable()
+ stat_cursor = self.session.open_cursor('statistics:', None, None)
+ calls = stat_cursor[stat.conn.txn_rollback_to_stable][2]
+ upd_aborted = stat_cursor[stat.conn.txn_rollback_upd_aborted][2]
+ stat_cursor.close()
+ self.assertEqual(calls, 2)
+ #
+ # We rolled back half on the earlier call and now three-quarters on
+ # this call, which is one and one quarter of all keys rolled back.
+ #
+ self.assertTrue(upd_aborted >= rolled_range)
# Check that we see the updated value (i.e. 2) for all the keys in
# non-timestamped tables.
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp10.py b/src/third_party/wiredtiger/test/suite/test_timestamp10.py
index a798f5ff355..02b22e6afbe 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp10.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp10.py
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
#
# test_timestamp10.py
-# Timestamps: Saving and querying the checkpoint recovery timestamp
+# Timestamps: Saving and querying the last checkpoint and recovery timestamps
#
import fnmatch, os, shutil
@@ -101,6 +101,8 @@ class test_timestamp10(wttest.WiredTigerTestCase, suite_subprocess):
',stable_timestamp=' + timestamp_str(ts))
# This forces a different checkpoint timestamp for each table.
self.session.checkpoint()
+ q = self.conn.query_timestamp('get=last_checkpoint')
+ self.assertTimestampsEqual(q, timestamp_str(ts))
# Copy to a new database and then recover.
self.copy_dir(".", "RESTART")