summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2018-03-14 16:52:29 +1100
committerLuke Chen <luke.chen@mongodb.com>2018-03-14 16:52:29 +1100
commitc5bfa9391f364ad1f36334d95c487a077aa76cea (patch)
tree52e1df6a798ba881bedc2f10d1dbf2dcfe0d90cf /src/third_party/wiredtiger
parent1481f7068376b464eb6dff954f004b023d3bbbd5 (diff)
downloadmongo-c5bfa9391f364ad1f36334d95c487a077aa76cea.tar.gz
Import wiredtiger: b33708d7d9b2971cda05e71fcba6067b230b97cc from branch mongodb-3.8
ref: 60a06941b8..b33708d7d9 for: 3.7.4 WT-3913 Enhance cursor operations to account for prepare state WT-3950 Add some rollback_to_stable statistics WT-3958 Add query API to get most recent checkpoint's stable timestamp WT-3969 enhance format tester to account for prepare state WT-3972 Allow more than 64K cursors to be open on a data source simultaneously WT-3975 arg format mismatch after rwlock changes
Diffstat (limited to 'src/third_party/wiredtiger')
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py3
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py3
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c70
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c71
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c140
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_random.c4
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c4
-rw-r--r--src/third_party/wiredtiger/src/include/api.h2
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h2
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.h21
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.i14
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h2
-rw-r--r--src/third_party/wiredtiger/src/include/mutex.h5
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h3
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h1
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i94
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in71
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c6
-rw-r--r--src/third_party/wiredtiger/src/support/mtx_rw.c11
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c12
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c17
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c3
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c5
-rw-r--r--src/third_party/wiredtiger/test/csuite/rwlock/main.c4
-rw-r--r--src/third_party/wiredtiger/test/format/config.c8
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c411
-rw-r--r--src/third_party/wiredtiger/test/suite/test_cursor14.py60
-rw-r--r--src/third_party/wiredtiger/test/suite/test_prepare04.py122
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp04.py22
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp10.py4
31 files changed, 759 insertions, 438 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 17fe0d97735..f5e0b4a67a3 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -1371,7 +1371,8 @@ methods = {
\c oldest_timestamp and the read timestamps of all active readers, and
\c stable returns the most recent \c stable_timestamp set with
WT_CONNECTION::set_timestamp. See @ref transaction_timestamps''',
- choices=['all_committed','oldest','pinned','recovery','stable']),
+ choices=['all_committed','last_checkpoint',
+ 'oldest','pinned','recovery','stable']),
]),
'WT_CONNECTION.set_timestamp' : Method([
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index a630ebe3fa9..1441187812e 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -518,6 +518,9 @@ connection_stats = [
TxnStat('txn_read_queue_inserts', 'read timestamp queue inserts total'),
TxnStat('txn_read_queue_len', 'read timestamp queue length'),
TxnStat('txn_rollback', 'transactions rolled back'),
+ TxnStat('txn_rollback_las_removed', 'rollback to stable updates removed from lookaside'),
+ TxnStat('txn_rollback_to_stable', 'rollback to stable calls'),
+ TxnStat('txn_rollback_upd_aborted', 'rollback to stable updates aborted'),
TxnStat('txn_set_ts', 'set timestamp calls'),
TxnStat('txn_set_ts_commit', 'set timestamp commit calls'),
TxnStat('txn_set_ts_commit_upd', 'set timestamp commit updates'),
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 16374d1697a..53642b757b5 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "60a06941b8d5d7ddc9f93646e75fc4b52d40f9b4",
+ "commit": "b33708d7d9b2971cda05e71fcba6067b230b97cc",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-3.8"
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index 6737af9996b..63d2cda4714 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -54,12 +54,17 @@ __cursor_fix_append_next(WT_CURSOR_BTREE *cbt, bool newpage)
* insert is aborted, we simply return zero (empty), regardless of
* whether we are at the end of the data.
*/
- if (cbt->recno < WT_INSERT_RECNO(cbt->ins) ||
- (upd = __wt_txn_read(session, cbt->ins->upd)) == NULL) {
+ if (cbt->recno < WT_INSERT_RECNO(cbt->ins)) {
cbt->v = 0;
cbt->iface.value.data = &cbt->v;
- } else
- cbt->iface.value.data = upd->data;
+ } else {
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd == NULL) {
+ cbt->v = 0;
+ cbt->iface.value.data = &cbt->v;
+ } else
+ cbt->iface.value.data = upd->data;
+ }
cbt->iface.value.size = 1;
return (0);
}
@@ -79,6 +84,7 @@ __cursor_fix_next(WT_CURSOR_BTREE *cbt, bool newpage)
session = (WT_SESSION_IMPL *)cbt->iface.session;
btree = S2BT(session);
page = cbt->ref->page;
+ upd = NULL;
/* Initialize for each new page. */
if (newpage) {
@@ -101,7 +107,8 @@ new_page:
cbt->ins_head, cbt->ins_stack, cbt->next_stack, cbt->recno);
if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins))
cbt->ins = NULL;
- upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd);
+ if (cbt->ins != NULL)
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
if (upd == NULL) {
cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt);
cbt->iface.value.data = &cbt->v;
@@ -134,7 +141,8 @@ new_page: if (cbt->ins == NULL)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
- if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL)
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd == NULL)
continue;
if (upd->type == WT_UPDATE_TOMBSTONE) {
if (upd->txnid != WT_TXN_NONE &&
@@ -193,8 +201,9 @@ new_page: /* Find the matching WT_COL slot. */
/* Check any insert list for a matching record. */
cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
- upd = cbt->ins == NULL ?
- NULL : __wt_txn_read(session, cbt->ins->upd);
+ upd = NULL;
+ if (cbt->ins != NULL)
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
if (upd != NULL) {
if (upd->type == WT_UPDATE_TOMBSTONE) {
if (upd->txnid != WT_TXN_NONE &&
@@ -311,7 +320,8 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage)
cbt->ins = WT_SKIP_NEXT(cbt->ins);
new_insert: if ((ins = cbt->ins) != NULL) {
- if ((upd = __wt_txn_read(session, ins->upd)) == NULL)
+ WT_RET(__wt_txn_read(session, ins->upd, &upd));
+ if (upd == NULL)
continue;
if (upd->type == WT_UPDATE_TOMBSTONE) {
if (upd->txnid != WT_TXN_NONE &&
@@ -344,7 +354,7 @@ new_insert: if ((ins = cbt->ins) != NULL) {
cbt->slot = cbt->row_iteration_slot / 2 - 1;
rip = &page->pg_row[cbt->slot];
- upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip));
+ WT_RET(__wt_txn_read(session, WT_ROW_UPDATE(page, rip), &upd));
if (upd != NULL && upd->type == WT_UPDATE_TOMBSTONE) {
if (upd->txnid != WT_TXN_NONE &&
__wt_txn_upd_visible_all(session, upd))
@@ -571,8 +581,9 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
WT_DECL_RET;
WT_PAGE *page;
WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
uint32_t flags;
- bool newpage;
+ bool newpage, valid;
cursor = &cbt->iface;
session = (WT_SESSION_IMPL *)cbt->iface.session;
@@ -582,6 +593,26 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ /*
+ * In case of retrying a next operation due to a prepare conflict,
+ * cursor would have been already positioned at an update structure
+ * which resulted in conflict. So, now when retrying we should examine
+ * the same update again instead of starting from the next one in the
+ * update chain.
+ */
+ F_CLR(cbt, WT_CBT_RETRY_PREV);
+ if (F_ISSET(cbt, WT_CBT_RETRY_NEXT)) {
+ WT_RET(__wt_cursor_valid(cbt, &upd, &valid));
+ F_CLR(cbt, WT_CBT_RETRY_NEXT);
+ if (valid) {
+ /*
+ * If the update, which returned prepared conflict is
+ * visible, return the value.
+ */
+ return (__cursor_kv_return(session, cbt, upd));
+ }
+ }
+
WT_RET(__cursor_func_init(cbt, false));
/*
@@ -663,15 +694,24 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND);
}
-
#ifdef HAVE_DIAGNOSTIC
if (ret == 0)
WT_ERR(__wt_cursor_key_order_check(session, cbt, true));
#endif
- if (ret == 0)
+err: switch (ret) {
+ case 0:
F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
-
-err: if (ret != 0)
+ break;
+ case WT_PREPARE_CONFLICT:
+ /*
+ * If prepare conflict occurs, cursor should not be reset,
+ * as current cursor position will be reused in case of a
+ * retry from user.
+ */
+ F_SET(cbt, WT_CBT_RETRY_NEXT);
+ break;
+ default:
WT_TRET(__cursor_reset(cbt));
+ }
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index 068a9915ab9..3356baeb24a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -199,13 +199,18 @@ __cursor_fix_append_prev(WT_CURSOR_BTREE *cbt, bool newpage)
* created records written by reconciliation are deleted and so can be
* never seen by a read.
*/
- if (cbt->ins == NULL ||
- cbt->recno > WT_INSERT_RECNO(cbt->ins) ||
- (upd = __wt_txn_read(session, cbt->ins->upd)) == NULL) {
+ if (cbt->ins == NULL || cbt->recno > WT_INSERT_RECNO(cbt->ins)) {
cbt->v = 0;
cbt->iface.value.data = &cbt->v;
- } else
- cbt->iface.value.data = upd->data;
+ } else {
+ upd = NULL;
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd == NULL) {
+ cbt->v = 0;
+ cbt->iface.value.data = &cbt->v;
+ } else
+ cbt->iface.value.data = upd->data;
+ }
cbt->iface.value.size = 1;
return (0);
}
@@ -247,7 +252,9 @@ new_page:
cbt->ins_head, cbt->ins_stack, cbt->next_stack, cbt->recno);
if (cbt->ins != NULL && cbt->recno != WT_INSERT_RECNO(cbt->ins))
cbt->ins = NULL;
- upd = cbt->ins == NULL ? NULL : __wt_txn_read(session, cbt->ins->upd);
+ upd = NULL;
+ if (cbt->ins != NULL)
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
if (upd == NULL) {
cbt->v = __bit_getv_recno(cbt->ref, cbt->recno, btree->bitcnt);
cbt->iface.value.data = &cbt->v;
@@ -280,7 +287,8 @@ new_page: if (cbt->ins == NULL)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
- if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL)
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd == NULL)
continue;
if (upd->type == WT_UPDATE_TOMBSTONE) {
if (upd->txnid != WT_TXN_NONE &&
@@ -340,8 +348,9 @@ new_page: if (cbt->recno < cbt->ref->ref_recno)
/* Check any insert list for a matching record. */
cbt->ins_head = WT_COL_UPDATE_SLOT(page, cbt->slot);
cbt->ins = __col_insert_search_match(cbt->ins_head, cbt->recno);
- upd = cbt->ins == NULL ?
- NULL : __wt_txn_read(session, cbt->ins->upd);
+ upd = NULL;
+ if (cbt->ins != NULL)
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
if (upd != NULL) {
if (upd->type == WT_UPDATE_TOMBSTONE) {
if (upd->txnid != WT_TXN_NONE &&
@@ -468,7 +477,8 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage)
WT_RET(__cursor_skip_prev(cbt));
new_insert: if ((ins = cbt->ins) != NULL) {
- if ((upd = __wt_txn_read(session, ins->upd)) == NULL)
+ WT_RET(__wt_txn_read(session, ins->upd, &upd));
+ if (upd == NULL)
continue;
if (upd->type == WT_UPDATE_TOMBSTONE) {
if (upd->txnid != WT_TXN_NONE &&
@@ -503,7 +513,7 @@ new_insert: if ((ins = cbt->ins) != NULL) {
cbt->slot = cbt->row_iteration_slot / 2 - 1;
rip = &page->pg_row[cbt->slot];
- upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip));
+ WT_RET(__wt_txn_read(session, WT_ROW_UPDATE(page, rip), &upd));
if (upd != NULL && upd->type == WT_UPDATE_TOMBSTONE) {
if (upd->txnid != WT_TXN_NONE &&
__wt_txn_upd_visible_all(session, upd))
@@ -526,8 +536,9 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
WT_DECL_RET;
WT_PAGE *page;
WT_SESSION_IMPL *session;
+ WT_UPDATE *upd;
uint32_t flags;
- bool newpage;
+ bool newpage, valid;
cursor = &cbt->iface;
session = (WT_SESSION_IMPL *)cbt->iface.session;
@@ -537,6 +548,26 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ /*
+ * In case of retrying a prev operation due to a prepare conflict,
+ * cursor would have been already positioned at an update structure
+ * which resulted in conflict. So, now when retrying we should examine
+ * the same update again instead of starting from the next one in the
+ * update chain.
+ */
+ F_CLR(cbt, WT_CBT_RETRY_NEXT);
+ if (F_ISSET(cbt, WT_CBT_RETRY_PREV)) {
+ WT_RET(__wt_cursor_valid(cbt, &upd, &valid));
+ F_CLR(cbt, WT_CBT_RETRY_PREV);
+ if (valid) {
+ /*
+ * If the update, which returned prepared conflict is
+ * visible, return the value.
+ */
+ return (__cursor_kv_return(session, cbt, upd));
+ }
+ }
+
WT_RET(__cursor_func_init(cbt, false));
/*
@@ -622,10 +653,20 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
if (ret == 0)
WT_ERR(__wt_cursor_key_order_check(session, cbt, false));
#endif
- if (ret == 0)
+err: switch (ret) {
+ case 0:
F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
-
-err: if (ret != 0)
+ break;
+ case WT_PREPARE_CONFLICT:
+ /*
+ * If prepare conflict occurs, cursor should not be reset,
+ * as current cursor position will be reused in case of a
+ * retry from user.
+ */
+ F_SET(cbt, WT_CBT_RETRY_PREV);
+ break;
+ default:
WT_TRET(__cursor_reset(cbt));
+ }
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 29725e22b2c..bf535896c73 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -205,8 +205,8 @@ __cursor_fix_implicit(WT_BTREE *btree, WT_CURSOR_BTREE *cbt)
* __wt_cursor_valid --
* Return if the cursor references an valid key/value pair.
*/
-bool
-__wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
+int
+__wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *valid)
{
WT_BTREE *btree;
WT_CELL *cell;
@@ -215,11 +215,12 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
WT_SESSION_IMPL *session;
WT_UPDATE *upd;
+ if (updp != NULL)
+ *updp = NULL;
+ *valid = false;
btree = cbt->btree;
page = cbt->ref->page;
session = (WT_SESSION_IMPL *)cbt->iface.session;
- if (updp != NULL)
- *updp = NULL;
/*
* We may be pointing to an insert object, and we may have a page with
@@ -265,13 +266,16 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
* First, check for an insert object with a visible update (a visible
* update that's been deleted is not a valid key/value pair).
*/
- if (cbt->ins != NULL &&
- (upd = __wt_txn_read(session, cbt->ins->upd)) != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE)
- return (false);
- if (updp != NULL)
- *updp = upd;
- return (true);
+ if (cbt->ins != NULL) {
+ WT_RET(__wt_txn_read(session, cbt->ins->upd, &upd));
+ if (upd != NULL) {
+ if (upd->type == WT_UPDATE_TOMBSTONE)
+ return (0);
+ if (updp != NULL)
+ *updp = upd;
+ *valid = true;
+ return (0);
+ }
}
/*
@@ -290,7 +294,7 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
* keys, check for retrieval past the end of the page.
*/
if (cbt->recno >= cbt->ref->ref_recno + page->entries)
- return (false);
+ return (0);
/*
* An update would have appeared as an "insert" object; no
@@ -300,7 +304,7 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
case BTREE_COL_VAR:
/* The search function doesn't check for empty pages. */
if (page->entries == 0)
- return (false);
+ return (0);
WT_ASSERT(session, cbt->slot < page->entries);
/*
@@ -309,7 +313,7 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
* returned on-page object must be checked for a match.
*/
if (cbt->ins != NULL && !F_ISSET(cbt, WT_CBT_VAR_ONPAGE_MATCH))
- return (false);
+ return (0);
/*
* Although updates would have appeared as an "insert" objects,
@@ -320,12 +324,12 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
cip = &page->pg_var[cbt->slot];
if ((cell = WT_COL_PTR(page, cip)) == NULL ||
__wt_cell_type(cell) == WT_CELL_DEL)
- return (false);
+ return (0);
break;
case BTREE_ROW:
/* The search function doesn't check for empty pages. */
if (page->entries == 0)
- return (false);
+ return (0);
WT_ASSERT(session, cbt->slot < page->entries);
/*
@@ -333,34 +337,23 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp)
* key as an on-page object, we're done.
*/
if (cbt->ins != NULL)
- return (false);
+ return (0);
/* Check for an update. */
if (page->modify != NULL &&
- page->modify->mod_row_update != NULL &&
- (upd = __wt_txn_read(session,
- page->modify->mod_row_update[cbt->slot])) != NULL) {
- if (upd->type == WT_UPDATE_TOMBSTONE)
- return (false);
- if (updp != NULL)
- *updp = upd;
+ page->modify->mod_row_update != NULL) {
+ WT_RET(__wt_txn_read(session,
+ page->modify->mod_row_update[cbt->slot], &upd));
+ if (upd != NULL) {
+ if (upd->type == WT_UPDATE_TOMBSTONE)
+ return (0);
+ if (updp != NULL)
+ *updp = upd;
+ }
}
break;
}
- return (true);
-}
-
-/*
- * __cursor_kv_return --
- * Return a page referenced key/value pair to the application.
- */
-static inline int
-__cursor_kv_return(
- WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
-{
- WT_RET(__wt_key_return(session, cbt));
- WT_RET(__wt_value_return(session, cbt, upd));
-
+ *valid = true;
return (0);
}
@@ -512,7 +505,10 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
WT_ERR(btree->type == BTREE_ROW ?
__cursor_row_search(session, cbt, cbt->ref, false) :
__cursor_col_search(session, cbt, cbt->ref));
- valid = cbt->compare == 0 && __wt_cursor_valid(cbt, &upd);
+
+ /* Return, if prepare conflict encountered. */
+ if (cbt->compare == 0)
+ WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
}
if (!valid) {
WT_ERR(__cursor_func_init(cbt, true));
@@ -520,7 +516,10 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
WT_ERR(btree->type == BTREE_ROW ?
__cursor_row_search(session, cbt, NULL, false) :
__cursor_col_search(session, cbt, NULL));
- valid = cbt->compare == 0 && __wt_cursor_valid(cbt, &upd);
+
+ /* Return, if prepare conflict encountered. */
+ if (cbt->compare == 0)
+ WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
}
if (valid)
@@ -618,14 +617,14 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
* Ignore those cases, it makes things too complicated.
*/
if (cbt->slot != 0 && cbt->slot != cbt->ref->page->entries - 1)
- valid = __wt_cursor_valid(cbt, &upd);
+ WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
}
if (!valid) {
WT_ERR(__cursor_func_init(cbt, true));
WT_ERR(btree->type == BTREE_ROW ?
__cursor_row_search(session, cbt, NULL, true) :
__cursor_col_search(session, cbt, NULL));
- valid = __wt_cursor_valid(cbt, &upd);
+ WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
}
/*
@@ -656,9 +655,10 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
exact = 0;
F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
- } else if ((ret = __wt_btcur_next(cbt, false)) != WT_NOTFOUND)
+ } else if ((ret = __wt_btcur_next(cbt, false)) != WT_NOTFOUND) {
+ WT_ERR(ret);
exact = 1;
- else {
+ } else {
/*
* The cursor next call may have overwritten our caller's key,
* restore it to its original value.
@@ -669,11 +669,14 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
WT_ERR(btree->type == BTREE_ROW ?
__cursor_row_search(session, cbt, NULL, true) :
__cursor_col_search(session, cbt, NULL));
- if (__wt_cursor_valid(cbt, &upd)) {
+ WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
+ if (valid) {
exact = cbt->compare;
ret = __cursor_kv_return(session, cbt, upd);
- } else if ((ret = __wt_btcur_prev(cbt, false)) != WT_NOTFOUND)
+ } else if ((ret = __wt_btcur_prev(cbt, false)) != WT_NOTFOUND) {
+ WT_ERR(ret);
exact = -1;
+ }
}
err: if (ret == 0 && exactp != NULL)
@@ -703,7 +706,7 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
- bool append_key;
+ bool append_key, valid;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -784,8 +787,11 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
* key/value pair.
*/
if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) &&
- cbt->compare == 0 && __wt_cursor_valid(cbt, NULL))
- WT_ERR(WT_DUPLICATE_KEY);
+ cbt->compare == 0) {
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if (valid)
+ WT_ERR(WT_DUPLICATE_KEY);
+ }
ret = __cursor_row_modify(session, cbt, WT_UPDATE_STANDARD);
} else {
@@ -805,10 +811,14 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
* column-store implicitly fills the gap with empty records.
* Fail in that case, the record exists.
*/
- if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE) &&
- ((cbt->compare == 0 && __wt_cursor_valid(cbt, NULL)) ||
- (cbt->compare != 0 && __cursor_fix_implicit(btree, cbt))))
- WT_ERR(WT_DUPLICATE_KEY);
+ if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
+ if (cbt->compare == 0) {
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if (valid)
+ WT_ERR(WT_DUPLICATE_KEY);
+ } else if (__cursor_fix_implicit(btree, cbt))
+ WT_ERR(WT_DUPLICATE_KEY);
+ }
WT_ERR(__cursor_col_modify(session, cbt, WT_UPDATE_STANDARD));
@@ -932,7 +942,7 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
- bool iterating;
+ bool iterating, valid;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -1028,7 +1038,10 @@ retry: if (positioned == POSITIONED)
/* Check whether an update would conflict. */
WT_ERR(__curfile_update_check(cbt));
- if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL))
+ if (cbt->compare != 0)
+ WT_ERR(WT_NOTFOUND);
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if (!valid)
WT_ERR(WT_NOTFOUND);
ret = __cursor_row_modify(session, cbt, WT_UPDATE_TOMBSTONE);
@@ -1043,7 +1056,10 @@ retry: if (positioned == POSITIONED)
WT_ERR(__curfile_update_check(cbt));
/* Remove the record if it exists. */
- if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL)) {
+ valid = false;
+ if (cbt->compare == 0)
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if (cbt->compare != 0 || !valid) {
if (!__cursor_fix_implicit(btree, cbt))
WT_ERR(WT_NOTFOUND);
/*
@@ -1143,6 +1159,7 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ bool valid;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -1207,7 +1224,10 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
*/
if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
WT_ERR(__curfile_update_check(cbt));
- if (cbt->compare != 0 || !__wt_cursor_valid(cbt, NULL))
+ if (cbt->compare != 0)
+ WT_ERR(WT_NOTFOUND);
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if (!valid)
WT_ERR(WT_NOTFOUND);
}
ret = __cursor_row_modify_v(session, cbt, value, modify_type);
@@ -1224,8 +1244,10 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
*/
if (!F_ISSET(cursor, WT_CURSTD_OVERWRITE)) {
WT_ERR(__curfile_update_check(cbt));
- if ((cbt->compare != 0 ||
- !__wt_cursor_valid(cbt, NULL)) &&
+ valid = false;
+ if (cbt->compare == 0)
+ WT_ERR(__wt_cursor_valid(cbt, NULL, &valid));
+ if ((cbt->compare != 0 || !valid) &&
!__cursor_fix_implicit(btree, cbt))
WT_ERR(WT_NOTFOUND);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c
index 03b5039b00b..8eb120f06ec 100644
--- a/src/third_party/wiredtiger/src/btree/bt_random.c
+++ b/src/third_party/wiredtiger/src/btree/bt_random.c
@@ -302,6 +302,7 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
WT_UPDATE *upd;
wt_off_t size;
uint64_t n, skip;
+ bool valid;
btree = cbt->btree;
cursor = &cbt->iface;
@@ -421,7 +422,8 @@ random_page_entry:
* the next entry, if that doesn't work, move to the previous entry.
*/
WT_ERR(__wt_row_random_leaf(session, cbt));
- if (__wt_cursor_valid(cbt, &upd)) {
+ WT_ERR(__wt_cursor_valid(cbt, &upd, &valid));
+ if (valid) {
WT_ERR(__wt_key_return(session, cbt));
WT_ERR(__wt_value_return(session, cbt, upd));
} else {
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index f473cfe3e8d..ffcb2139330 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -46,8 +46,8 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_open_session[] = {
static const WT_CONFIG_CHECK confchk_WT_CONNECTION_query_timestamp[] = {
{ "get", "string",
- NULL, "choices=[\"all_committed\",\"oldest\",\"pinned\","
- "\"recovery\",\"stable\"]",
+ NULL, "choices=[\"all_committed\",\"last_checkpoint\",\"oldest\""
+ ",\"pinned\",\"recovery\",\"stable\"]",
NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
};
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h
index afefbe8ad5c..ff757c4bfb5 100644
--- a/src/third_party/wiredtiger/src/include/api.h
+++ b/src/third_party/wiredtiger/src/include/api.h
@@ -68,6 +68,7 @@
if ((ret) != 0 && \
(ret) != WT_NOTFOUND && \
(ret) != WT_DUPLICATE_KEY && \
+ (ret) != WT_PREPARE_CONFLICT && \
F_ISSET(&(s)->txn, WT_TXN_RUNNING)) \
F_SET(&(s)->txn, WT_TXN_ERROR); \
/* \
@@ -237,6 +238,7 @@
JOINABLE_CURSOR_CALL_CHECK(cur)
#define CURSOR_UPDATE_API_END(s, ret) \
+ ((ret == WT_PREPARE_CONFLICT) ? (ret = WT_ROLLBACK) : ret ); \
TXN_API_END(s, ret)
#define ASYNCOP_API_CALL(conn, s, n) \
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 7ba73d1b94f..d56a3773933 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -997,7 +997,7 @@ struct __wt_update {
finalized prepare */
#define WT_UPDATE_STATE_LOCKED 1 /* locked */
#define WT_UPDATE_STATE_PREPARED 2 /* prepared */
- uint8_t state; /* state (one byte : conserve memory) */
+ volatile uint8_t state;
/* If the update includes a complete value. */
#define WT_UPDATE_DATA_VALUE(upd) \
diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h
index 70f9318f6d7..ec5c6689c3f 100644
--- a/src/third_party/wiredtiger/src/include/cursor.h
+++ b/src/third_party/wiredtiger/src/include/cursor.h
@@ -217,20 +217,23 @@ struct __wt_cursor_btree {
#endif
/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_CBT_ACTIVE 0x01u /* Active in the tree */
-#define WT_CBT_ITERATE_APPEND 0x02u /* Col-store: iterating append list */
-#define WT_CBT_ITERATE_NEXT 0x04u /* Next iteration configuration */
-#define WT_CBT_ITERATE_PREV 0x08u /* Prev iteration configuration */
-#define WT_CBT_NO_TXN 0x10u /* Non-txn cursor (e.g. a checkpoint) */
-#define WT_CBT_SEARCH_SMALLEST 0x20u /* Row-store: small-key insert list */
-#define WT_CBT_VAR_ONPAGE_MATCH 0x40u /* Var-store: on-page recno match */
+#define WT_CBT_ACTIVE 0x001u /* Active in the tree */
+#define WT_CBT_ITERATE_APPEND 0x002u /* Col-store: iterating append list */
+#define WT_CBT_ITERATE_NEXT 0x004u /* Next iteration configuration */
+#define WT_CBT_ITERATE_PREV 0x008u /* Prev iteration configuration */
+#define WT_CBT_NO_TXN 0x010u /* Non-txn cursor (e.g. a checkpoint) */
+#define WT_CBT_RETRY_NEXT 0x020u /* Next, resulted in prepare conflict */
+#define WT_CBT_RETRY_PREV 0x040u /* Prev, resulted in prepare conflict */
+#define WT_CBT_SEARCH_SMALLEST 0x080u /* Row-store: small-key insert list */
+#define WT_CBT_VAR_ONPAGE_MATCH 0x100u /* Var-store: on-page recno match */
/* AUTOMATIC FLAG VALUE GENERATION STOP */
#define WT_CBT_POSITION_MASK /* Flags associated with position */ \
(WT_CBT_ITERATE_APPEND | WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV | \
- WT_CBT_SEARCH_SMALLEST | WT_CBT_VAR_ONPAGE_MATCH)
+ WT_CBT_RETRY_NEXT | WT_CBT_RETRY_PREV | WT_CBT_SEARCH_SMALLEST | \
+ WT_CBT_VAR_ONPAGE_MATCH)
- uint8_t flags;
+ uint32_t flags;
};
struct __wt_cursor_bulk {
diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i
index a4e986c4325..d338c47dfae 100644
--- a/src/third_party/wiredtiger/src/include/cursor.i
+++ b/src/third_party/wiredtiger/src/include/cursor.i
@@ -311,6 +311,20 @@ __wt_cursor_dhandle_decr_use(WT_SESSION_IMPL *session)
}
/*
+ * __cursor_kv_return --
+ * Return a page referenced key/value pair to the application.
+ */
+static inline int
+__cursor_kv_return(
+ WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPDATE *upd)
+{
+ WT_RET(__wt_key_return(session, cbt));
+ WT_RET(__wt_value_return(session, cbt, upd));
+
+ return (0);
+}
+
+/*
* __cursor_func_init --
* Cursor call setup.
*/
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 8b69f9ef244..508480b95c2 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -100,7 +100,7 @@ extern void __wt_cursor_key_order_reset(WT_CURSOR_BTREE *cbt);
extern void __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt);
extern int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern bool __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp);
+extern int __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *valid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_reset(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_search(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/third_party/wiredtiger/src/include/mutex.h b/src/third_party/wiredtiger/src/include/mutex.h
index 2a3fc7448f8..ba32d166f03 100644
--- a/src/third_party/wiredtiger/src/include/mutex.h
+++ b/src/third_party/wiredtiger/src/include/mutex.h
@@ -44,9 +44,8 @@ struct __wt_rwlock { /* Read/write lock */
uint8_t current; /* Current ticket */
uint8_t next; /* Next available ticket */
uint8_t reader; /* Read queue ticket */
- uint8_t __notused; /* Padding */
- uint16_t readers_active;/* Count of active readers */
- uint16_t readers_queued;/* Count of queued readers */
+ uint8_t readers_queued; /* Count of queued readers */
+ uint32_t readers_active;/* Count of active readers */
} s;
} u;
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 7ef63cb0eaf..01a982b8602 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -627,6 +627,9 @@ struct __wt_connection_stats {
int64_t txn_read_queue_head;
int64_t txn_read_queue_inserts;
int64_t txn_read_queue_len;
+ int64_t txn_rollback_to_stable;
+ int64_t txn_rollback_upd_aborted;
+ int64_t txn_rollback_las_removed;
int64_t txn_set_ts;
int64_t txn_set_ts_commit;
int64_t txn_set_ts_commit_upd;
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index dd7f5d4a8bc..19e0be2d695 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -102,6 +102,7 @@ struct __wt_txn_global {
volatile uint64_t oldest_id;
WT_DECL_TIMESTAMP(commit_timestamp)
+ WT_DECL_TIMESTAMP(last_ckpt_timestamp)
WT_DECL_TIMESTAMP(oldest_timestamp)
WT_DECL_TIMESTAMP(pinned_timestamp)
WT_DECL_TIMESTAMP(recovery_timestamp)
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 5fcf8ee11c9..36cac1a26f3 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -9,6 +9,11 @@
static inline int __wt_txn_id_check(WT_SESSION_IMPL *session);
static inline void __wt_txn_read_last(WT_SESSION_IMPL *session);
+typedef enum {
+ WT_VISIBLE_FALSE=0, /* Not a visible update */
+ WT_VISIBLE_PREPARE=1, /* Prepared update */
+ WT_VISIBLE_TRUE=2 /* A visible update */
+} WT_VISIBLE_TYPE;
#ifdef HAVE_TIMESTAMPS
/*
* __wt_txn_timestamp_flags --
@@ -539,33 +544,74 @@ __wt_txn_visible(
}
/*
+ * __wt_txn_upd_visible_type --
+ * Visible type of given update for the current transaction.
+ */
+static inline WT_VISIBLE_TYPE
+__wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd)
+{
+ uint8_t upd_state;
+ bool upd_visible;
+
+ for (;;__wt_yield()) {
+ /* Commit is in progress, yield and try again. */
+ if ((upd_state = upd->state) == WT_UPDATE_STATE_LOCKED)
+ continue;
+
+ upd_visible = __wt_txn_visible(
+ session, upd->txnid, WT_TIMESTAMP_NULL(&upd->timestamp));
+
+ /*
+ * The visibility check is only valid if the update does not
+ * change state. If the state does change, recheck visibility.
+ */
+ if (upd->state == upd_state)
+ break;
+ }
+
+ if (!upd_visible)
+ return (WT_VISIBLE_FALSE);
+
+ if (upd_state == WT_UPDATE_STATE_PREPARED)
+ return (F_ISSET(&session->txn, WT_TXN_IGNORE_PREPARE) ?
+ WT_VISIBLE_FALSE : WT_VISIBLE_PREPARE);
+
+ return (WT_VISIBLE_TRUE);
+}
+
+/*
* __wt_txn_upd_visible --
* Can the current transaction see the given update.
*/
static inline bool
__wt_txn_upd_visible(WT_SESSION_IMPL *session, WT_UPDATE *upd)
{
- return (__wt_txn_visible(session,
- upd->txnid, WT_TIMESTAMP_NULL(&upd->timestamp)));
+ return (__wt_txn_upd_visible_type(session, upd) == WT_VISIBLE_TRUE);
}
/*
* __wt_txn_read --
* Get the first visible update in a list (or NULL if none are visible).
*/
-static inline WT_UPDATE *
-__wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd)
+static inline int
+__wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_UPDATE **updp)
{
static WT_UPDATE tombstone = {
.txnid = WT_TXN_NONE, .type = WT_UPDATE_TOMBSTONE
};
+ WT_VISIBLE_TYPE upd_visible;
bool skipped_birthmark;
+ *updp = NULL;
for (skipped_birthmark = false; upd != NULL; upd = upd->next) {
/* Skip reserved place-holders, they're never visible. */
- if (upd->type != WT_UPDATE_RESERVE &&
- __wt_txn_upd_visible(session, upd))
- break;
+ if (upd->type != WT_UPDATE_RESERVE) {
+ upd_visible = __wt_txn_upd_visible_type(session, upd);
+ if (upd_visible == WT_VISIBLE_TRUE)
+ break;
+ if (upd_visible == WT_VISIBLE_PREPARE)
+ return (WT_PREPARE_CONFLICT);
+ }
/* An invisible birthmark is equivalent to a tombstone. */
if (upd->type == WT_UPDATE_BIRTHMARK)
skipped_birthmark = true;
@@ -574,7 +620,8 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd)
if (upd == NULL && skipped_birthmark)
upd = &tombstone;
- return (upd == NULL || upd->type == WT_UPDATE_BIRTHMARK ? NULL : upd);
+ *updp = (upd == NULL || upd->type == WT_UPDATE_BIRTHMARK ? NULL : upd);
+ return (0);
}
/*
@@ -786,21 +833,32 @@ static inline int
__wt_txn_update_check(WT_SESSION_IMPL *session, WT_UPDATE *upd)
{
WT_TXN *txn;
+ bool ignore_prepare_set;
txn = &session->txn;
- if (txn->isolation == WT_ISO_SNAPSHOT)
- while (upd != NULL && !__wt_txn_upd_visible(session, upd)) {
- if (upd->txnid != WT_TXN_ABORTED) {
- WT_STAT_CONN_INCR(
- session, txn_update_conflict);
- WT_STAT_DATA_INCR(
- session, txn_update_conflict);
- return (__wt_txn_rollback_required(session,
+ if (txn->isolation != WT_ISO_SNAPSHOT)
+ return (0);
+
+ /*
+ * Clear the ignore prepare setting of txn, as it is not supposed, to
+ * affect the visibility for update operations.
+ */
+ ignore_prepare_set = F_ISSET(txn, WT_TXN_IGNORE_PREPARE);
+ F_CLR(txn, WT_TXN_IGNORE_PREPARE);
+ for (;upd != NULL && !__wt_txn_upd_visible(session, upd);
+ upd = upd->next) {
+ if (upd->txnid != WT_TXN_ABORTED) {
+ if (ignore_prepare_set)
+ F_SET(txn, WT_TXN_IGNORE_PREPARE);
+ WT_STAT_CONN_INCR(session, txn_update_conflict);
+ WT_STAT_DATA_INCR(session, txn_update_conflict);
+ return (__wt_txn_rollback_required(session,
"conflict between concurrent operations"));
- }
- upd = upd->next;
}
+ }
+ if (ignore_prepare_set)
+ F_SET(txn, WT_TXN_IGNORE_PREPARE);
return (0);
}
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index a75c22497ce..1f2a438b8e9 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -2431,8 +2431,9 @@ struct __wt_connection {
* timestamps of all active readers\, and \c stable returns the most
* recent \c stable_timestamp set with WT_CONNECTION::set_timestamp.
* See @ref transaction_timestamps., a string\, chosen from the
- * following options: \c "all_committed"\, \c "oldest"\, \c "pinned"\,
- * \c "recovery"\, \c "stable"; default \c all_committed.}
+ * following options: \c "all_committed"\, \c "last_checkpoint"\, \c
+ * "oldest"\, \c "pinned"\, \c "recovery"\, \c "stable"; default \c
+ * all_committed.}
* @configend
* @errors
* If there is no matching timestamp (e.g., if this method is called
@@ -5562,81 +5563,87 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1312
/*! transaction: read timestamp queue length */
#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1313
+/*! transaction: rollback to stable calls */
+#define WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE 1314
+/*! transaction: rollback to stable updates aborted */
+#define WT_STAT_CONN_TXN_ROLLBACK_UPD_ABORTED 1315
+/*! transaction: rollback to stable updates removed from lookaside */
+#define WT_STAT_CONN_TXN_ROLLBACK_LAS_REMOVED 1316
/*! transaction: set timestamp calls */
-#define WT_STAT_CONN_TXN_SET_TS 1314
+#define WT_STAT_CONN_TXN_SET_TS 1317
/*! transaction: set timestamp commit calls */
-#define WT_STAT_CONN_TXN_SET_TS_COMMIT 1315
+#define WT_STAT_CONN_TXN_SET_TS_COMMIT 1318
/*! transaction: set timestamp commit updates */
-#define WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD 1316
+#define WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD 1319
/*! transaction: set timestamp oldest calls */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1317
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST 1320
/*! transaction: set timestamp oldest updates */
-#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1318
+#define WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD 1321
/*! transaction: set timestamp stable calls */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE 1319
+#define WT_STAT_CONN_TXN_SET_TS_STABLE 1322
/*! transaction: set timestamp stable updates */
-#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1320
+#define WT_STAT_CONN_TXN_SET_TS_STABLE_UPD 1323
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1321
+#define WT_STAT_CONN_TXN_BEGIN 1324
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1322
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1325
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1323
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1326
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1324
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1327
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1325
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1328
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1326
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1329
/*! transaction: transaction checkpoint scrub dirty target */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1327
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1330
/*! transaction: transaction checkpoint scrub time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1328
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1331
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1329
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1332
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1330
+#define WT_STAT_CONN_TXN_CHECKPOINT 1333
/*!
* transaction: transaction checkpoints skipped because database was
* clean
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1331
+#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1334
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1332
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1335
/*!
* transaction: transaction fsync calls for checkpoint after allocating
* the transaction ID
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1333
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1336
/*!
* transaction: transaction fsync duration for checkpoint after
* allocating the transaction ID (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1334
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1337
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1335
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1338
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1336
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1339
/*!
* transaction: transaction range of IDs currently pinned by named
* snapshots
*/
-#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1337
+#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1340
/*! transaction: transaction range of timestamps currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1338
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1341
/*!
* transaction: transaction range of timestamps pinned by the oldest
* timestamp
*/
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1339
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1342
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1340
+#define WT_STAT_CONN_TXN_SYNC 1343
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1341
+#define WT_STAT_CONN_TXN_COMMIT 1344
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1342
+#define WT_STAT_CONN_TXN_ROLLBACK 1345
/*! transaction: update conflicts */
-#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1343
+#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1346
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index f4d0fc0b1ef..a2ce2c136b5 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -1341,12 +1341,14 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
* started. The global commit point can move forward during
* reconciliation so we use a cached copy to avoid races when a
* concurrent transaction commits or rolls back while we are
- * examining its updates.
+ * examining its updates. As prepared transaction id's are
+ * globally visible, need to check the update state as well.
*/
if (F_ISSET(r, WT_REC_EVICT) &&
+ (upd->state != WT_UPDATE_STATE_READY ||
(F_ISSET(r, WT_REC_VISIBLE_ALL) ?
WT_TXNID_LE(r->last_running, txnid) :
- !__txn_visible_id(session, txnid))) {
+ !__txn_visible_id(session, txnid)))) {
uncommitted = r->update_uncommitted = true;
continue;
}
diff --git a/src/third_party/wiredtiger/src/support/mtx_rw.c b/src/third_party/wiredtiger/src/support/mtx_rw.c
index 572592b9fbc..b554f589b3d 100644
--- a/src/third_party/wiredtiger/src/support/mtx_rw.c
+++ b/src/third_party/wiredtiger/src/support/mtx_rw.c
@@ -48,9 +48,8 @@
* uint8_t current; // Current ticket
* uint8_t next; // Next available ticket
* uint8_t reader; // Read queue ticket
- * uint8_t __notused; // Padding
- * uint16_t readers_active; // Count of active readers
- * uint16_t readers_queued; // Count of queued readers
+ * uint8_t readers_queued; // Count of queued readers
+ * uint32_t readers_active; // Count of active readers
* } s;
* } u;
*
@@ -75,6 +74,12 @@
* 'reader' to 'next' (i.e. readers are scheduled after any queued writers,
* avoiding starvation), then atomically incrementing 'readers_queued'.
*
+ * We limit how many readers can queue: we don't allow more readers to queue
+ * than there are active writers (calculated as `next - current`): otherwise,
+ * in write-heavy workloads, readers can keep queuing up in front of writers
+ * and throughput is unstable. The remaining read requests wait without any
+ * ordering.
+ *
* The 'next' field is a 1-byte value so the available ticket number wraps
* after 256 requests. If a thread's write lock request would cause the 'next'
* field to catch up with 'current', instead it waits to avoid the same ticket
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 40a07be0174..ae13f7d8abe 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -1052,6 +1052,9 @@ static const char * const __stats_connection_desc[] = {
"transaction: read timestamp queue inserts to head",
"transaction: read timestamp queue inserts total",
"transaction: read timestamp queue length",
+ "transaction: rollback to stable calls",
+ "transaction: rollback to stable updates aborted",
+ "transaction: rollback to stable updates removed from lookaside",
"transaction: set timestamp calls",
"transaction: set timestamp commit calls",
"transaction: set timestamp commit updates",
@@ -1438,6 +1441,9 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->txn_read_queue_head = 0;
stats->txn_read_queue_inserts = 0;
stats->txn_read_queue_len = 0;
+ stats->txn_rollback_to_stable = 0;
+ stats->txn_rollback_upd_aborted = 0;
+ stats->txn_rollback_las_removed = 0;
stats->txn_set_ts = 0;
stats->txn_set_ts_commit = 0;
stats->txn_set_ts_commit_upd = 0;
@@ -1956,6 +1962,12 @@ __wt_stat_connection_aggregate(
to->txn_read_queue_inserts +=
WT_STAT_READ(from, txn_read_queue_inserts);
to->txn_read_queue_len += WT_STAT_READ(from, txn_read_queue_len);
+ to->txn_rollback_to_stable +=
+ WT_STAT_READ(from, txn_rollback_to_stable);
+ to->txn_rollback_upd_aborted +=
+ WT_STAT_READ(from, txn_rollback_upd_aborted);
+ to->txn_rollback_las_removed +=
+ WT_STAT_READ(from, txn_rollback_las_removed);
to->txn_set_ts += WT_STAT_READ(from, txn_set_ts);
to->txn_set_ts_commit += WT_STAT_READ(from, txn_set_ts_commit);
to->txn_set_ts_commit_upd +=
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 1235bc8c2b2..d3f11c5fa69 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -748,6 +748,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ WT_DECL_TIMESTAMP(ckpt_tmp_ts)
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
WT_TXN_ISOLATION saved_isolation;
@@ -899,6 +900,15 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* checkpointing the metadata since we know that all files in the
* checkpoint are now in a consistent state.
*/
+#ifdef HAVE_TIMESTAMPS
+ /*
+ * Record the timestamp from the transaction if we were successful.
+ * Store it in a temp variable now because it will be invalidated during
+ * commit but we don't want to set it until we know the checkpoint
+ * is successful.
+ */
+ __wt_timestamp_set(&ckpt_tmp_ts, &txn->read_timestamp);
+#endif
WT_ERR(__wt_txn_commit(session, NULL));
/*
@@ -942,8 +952,13 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
*/
txn_global->checkpoint_state.pinned_id = WT_TXN_NONE;
- if (full)
+ if (full) {
__checkpoint_stats(session);
+#ifdef HAVE_TIMESTAMPS
+ __wt_timestamp_set(
+ &conn->txn_global.last_ckpt_timestamp, &ckpt_tmp_ts);
+#endif
+ }
err: /*
* Reset the timer so that next checkpoint tracks the progress only if
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index d31b3995092..eef2fde5284 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -74,6 +74,7 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
&rollback_timestamp, las_timestamp.data) < 0) {
WT_ERR(cursor->remove(cursor));
++remove_cnt;
+ WT_STAT_CONN_INCR(session, txn_rollback_las_removed);
} else
++las_total;
}
@@ -111,6 +112,7 @@ __txn_abort_newer_update(WT_SESSION_IMPL *session,
if (__wt_timestamp_cmp(
rollback_timestamp, &next_upd->timestamp) < 0) {
next_upd->txnid = WT_TXN_ABORTED;
+ WT_STAT_CONN_INCR(session, txn_rollback_upd_aborted);
__wt_timestamp_set_zero(&next_upd->timestamp);
/*
@@ -425,6 +427,7 @@ __wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[])
conn = S2C(session);
+ WT_STAT_CONN_INCR(session, txn_rollback_to_stable);
/*
* Mark that a rollback operation is in progress and wait for eviction
* to drain. This is necessary because lookaside eviction uses
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index 280425eb56e..2266a9cd6f5 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -239,7 +239,10 @@ __txn_global_query_timestamp(
break;
}
__wt_readunlock(session, &txn_global->commit_timestamp_rwlock);
- } else if (WT_STRING_MATCH("oldest", cval.str, cval.len)) {
+ } else if (WT_STRING_MATCH("last_checkpoint", cval.str, cval.len))
+ /* Read-only value forever. No lock needed. */
+ __wt_timestamp_set(&ts, &txn_global->last_ckpt_timestamp);
+ else if (WT_STRING_MATCH("oldest", cval.str, cval.len)) {
if (!txn_global->has_oldest_timestamp)
return (WT_NOTFOUND);
WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
diff --git a/src/third_party/wiredtiger/test/csuite/rwlock/main.c b/src/third_party/wiredtiger/test/csuite/rwlock/main.c
index e1d00344ee2..f69628dca40 100644
--- a/src/third_party/wiredtiger/test/csuite/rwlock/main.c
+++ b/src/third_party/wiredtiger/test/csuite/rwlock/main.c
@@ -171,8 +171,8 @@ thread_dump(void *arg) {
sleep(1);
printf("\n"
"rwlock { current %" PRIu8 ", next %" PRIu8
- ", reader %" PRIu8 ", readers_active %" PRIu16
- ", readers_queued %" PRIu16 " }\n",
+ ", reader %" PRIu8 ", readers_active %" PRIu32
+ ", readers_queued %" PRIu8 " }\n",
rwlock.u.s.current,
rwlock.u.s.next,
rwlock.u.s.reader,
diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c
index 8d85d331c89..d46b0868887 100644
--- a/src/third_party/wiredtiger/test/format/config.c
+++ b/src/third_party/wiredtiger/test/format/config.c
@@ -191,9 +191,13 @@ config_setup(void)
/*
* Turn off truncate for LSM runs (some configurations with truncate
* always results in a timeout).
+ *
+ * WiredTiger doesn't currently support truncate and prepare at the
+ * same time, see WT-3922. For now, pick one on each run.
*/
- if (!config_is_perm("truncate") && DATASOURCE("lsm"))
- config_single("truncate=off", 0);
+ if (!config_is_perm("truncate"))
+ if (DATASOURCE("lsm") || mmrand(NULL, 0, 1) == 1)
+ config_single("truncate=off", 0);
/* Give Helium configuration a final review. */
if (DATASOURCE("helium"))
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index 4c54972516e..2466fe4d64d 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -403,10 +403,8 @@ snap_check(WT_CURSOR *cursor,
break;
case WT_NOTFOUND:
break;
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
default:
- testutil_die(ret, "WT_CURSOR.search");
+ return (ret);
}
/* Check for simple matches. */
@@ -644,6 +642,19 @@ prepare_transaction(TINFO *tinfo, WT_SESSION *session)
}
/*
+ * OP_FAILED --
+ * General error handling.
+ */
+#define OP_FAILED(notfound_ok) do { \
+ positioned = false; \
+ if (intxn && (ret == WT_CACHE_FULL || ret == WT_ROLLBACK)) \
+ goto deadlock; \
+ testutil_assert((notfound_ok && ret == WT_NOTFOUND) || \
+ ret == WT_CACHE_FULL || \
+ ret == WT_PREPARE_CONFLICT || ret == WT_ROLLBACK); \
+} while (0)
+
+/*
* ops --
* Per-thread operations.
*/
@@ -825,11 +836,8 @@ ops(void *arg)
if (ret == 0) {
positioned = true;
SNAP_TRACK(READ, tinfo);
- } else {
- if (ret == WT_ROLLBACK && intxn)
- goto deadlock;
- testutil_assert(ret == WT_NOTFOUND);
- }
+ } else
+ OP_FAILED(true);
}
/* Optionally reserve a row. */
@@ -847,12 +855,8 @@ ops(void *arg)
positioned = true;
__wt_yield(); /* Let other threads proceed. */
- } else {
- positioned = false;
- if (ret == WT_ROLLBACK && intxn)
- goto deadlock;
- testutil_assert(ret == WT_NOTFOUND);
- }
+ } else
+ OP_FAILED(true);
}
/* Perform the operation. */
@@ -881,11 +885,8 @@ ops(void *arg)
if (ret == 0) {
++tinfo->insert;
SNAP_TRACK(INSERT, tinfo);
- } else {
- if (ret == WT_ROLLBACK && intxn)
- goto deadlock;
- testutil_assert(ret == WT_ROLLBACK);
- }
+ } else
+ OP_FAILED(false);
break;
case MODIFY:
/*
@@ -907,13 +908,8 @@ ops(void *arg)
if (ret == 0) {
positioned = true;
SNAP_TRACK(MODIFY, tinfo);
- } else {
- positioned = false;
- if (ret == WT_ROLLBACK && intxn)
- goto deadlock;
- testutil_assert(
- ret == WT_NOTFOUND || ret == WT_ROLLBACK);
- }
+ } else
+ OP_FAILED(true);
break;
case READ:
++tinfo->search;
@@ -921,12 +917,8 @@ ops(void *arg)
if (ret == 0) {
positioned = true;
SNAP_TRACK(READ, tinfo);
- } else {
- positioned = false;
- if (ret == WT_ROLLBACK && intxn)
- goto deadlock;
- testutil_assert(ret == WT_NOTFOUND);
- }
+ } else
+ OP_FAILED(true);
break;
case REMOVE:
remove_instead_of_truncate:
@@ -946,12 +938,8 @@ remove_instead_of_truncate:
* previous state, but not necessarily set.
*/
SNAP_TRACK(REMOVE, tinfo);
- } else {
- positioned = false;
- if (ret == WT_ROLLBACK && intxn)
- goto deadlock;
- testutil_assert(ret == WT_NOTFOUND);
- }
+ } else
+ OP_FAILED(true);
break;
case TRUNCATE:
/*
@@ -1020,11 +1008,8 @@ remove_instead_of_truncate:
if (ret == 0) {
++tinfo->truncate;
SNAP_TRACK(TRUNCATE, tinfo);
- } else {
- testutil_assert(ret == WT_ROLLBACK);
- if (intxn)
- goto deadlock;
- }
+ } else
+ OP_FAILED(false);
break;
case UPDATE:
update_instead_of_chosen_op:
@@ -1041,12 +1026,8 @@ update_instead_of_chosen_op:
if (ret == 0) {
positioned = true;
SNAP_TRACK(UPDATE, tinfo);
- } else {
- positioned = false;
- if (ret == WT_ROLLBACK && intxn)
- goto deadlock;
- testutil_assert(ret == WT_ROLLBACK);
- }
+ } else
+ OP_FAILED(false);
break;
}
@@ -1061,9 +1042,8 @@ update_instead_of_chosen_op:
for (i = 0; i < j; ++i) {
if ((ret = nextprev(tinfo, cursor, next)) == 0)
continue;
- if (ret == WT_ROLLBACK && intxn)
- goto deadlock;
- testutil_assert(ret == WT_NOTFOUND);
+
+ OP_FAILED(true);
break;
}
}
@@ -1090,9 +1070,11 @@ update_instead_of_chosen_op:
goto deadlock;
}
- /* Prepare the transaction 10% of the time. */
- /* XXX: CONFIGURE PREPARE OFF FOR NOW */
- if (mmrand(&tinfo->rnd, 1, 10) == 0) {
+ /*
+ * Prepare the transaction 10% of the time.
+ * Currently doesn't work with truncation, see WT-3922.
+ */
+ if (g.c_truncate == 0 && mmrand(&tinfo->rnd, 1, 10) == 1) {
ret = prepare_transaction(tinfo, session);
testutil_assert(ret == 0 || ret == WT_PREPARE_CONFLICT);
if (ret == WT_PREPARE_CONFLICT)
@@ -1138,7 +1120,7 @@ deadlock: ++tinfo->deadlock;
/*
* wts_read_scan --
- * Read and verify all elements in a file.
+ * Read and verify a subset of the elements in a file.
*/
void
wts_read_scan(void)
@@ -1182,6 +1164,7 @@ wts_read_scan(void)
case 0:
case WT_NOTFOUND:
case WT_ROLLBACK:
+ case WT_PREPARE_CONFLICT:
break;
default:
testutil_die(
@@ -1209,11 +1192,6 @@ read_row_worker(
session = cursor->session;
- /* Log the operation */
- if (g.logging == LOG_OPS)
- (void)g.wt_api->msg_printf(g.wt_api,
- session, "%-10s%" PRIu64, "read", keyno);
-
/* Retrieve the key/value pair by key. */
switch (g.type) {
case FIX:
@@ -1254,12 +1232,15 @@ read_row_worker(
value->size = 1;
}
break;
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
default:
- testutil_die(ret, "read_row: read row %" PRIu64, keyno);
+ return (ret);
}
+ /* Log the operation */
+ if (g.logging == LOG_OPS)
+ (void)g.wt_api->msg_printf(g.wt_api,
+ session, "%-10s%" PRIu64, "read", keyno);
+
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
return (ret);
@@ -1394,24 +1375,39 @@ nextprev(TINFO *tinfo, WT_CURSOR *cursor, bool next)
break;
case WT_NOTFOUND:
break;
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
default:
- testutil_die(ret, "%s", which);
+ return (ret);
}
+ if (g.logging == LOG_OPS)
+ switch (g.type) {
+ case FIX:
+ (void)g.wt_api->msg_printf(g.wt_api,
+ cursor->session, "%-10s%" PRIu64 " {0x%02x}",
+ which, keyno, ((char *)value.data)[0]);
+ break;
+ case ROW:
+ (void)g.wt_api->msg_printf(g.wt_api,
+ cursor->session, "%-10s{%.*s}, {%.*s}",
+ which, (int)key.size, (char *)key.data,
+ (int)value.size, (char *)value.data);
+ break;
+ case VAR:
+ (void)g.wt_api->msg_printf(g.wt_api,
+ cursor->session, "%-10s%" PRIu64 " {%.*s}",
+ which, keyno, (int)value.size, (char *)value.data);
+ break;
+ }
+
#ifdef HAVE_BERKELEY_DB
if (!SINGLETHREADED)
return (ret);
{
WT_ITEM bdb_key, bdb_value;
- WT_SESSION *session;
int notfound;
char *p;
- session = cursor->session;
-
/* Retrieve the BDB key/value. */
bdb_np(next, &bdb_key.data, &bdb_key.size,
&bdb_value.data, &bdb_value.size, &notfound);
@@ -1444,26 +1440,6 @@ mismatch: if (g.type == ROW) {
print_item(" wt-value", &value);
testutil_die(0, NULL);
}
-
- if (g.logging == LOG_OPS)
- switch (g.type) {
- case FIX:
- (void)g.wt_api->msg_printf(g.wt_api,
- session, "%-10s%" PRIu64 " {0x%02x}", which,
- keyno, ((char *)value.data)[0]);
- break;
- case ROW:
- (void)g.wt_api->msg_printf(
- g.wt_api, session, "%-10s{%.*s}, {%.*s}", which,
- (int)key.size, (char *)key.data,
- (int)value.size, (char *)value.data);
- break;
- case VAR:
- (void)g.wt_api->msg_printf(g.wt_api, session,
- "%-10s%" PRIu64 " {%.*s}", which,
- keyno, (int)value.size, (char *)value.data);
- break;
- }
}
#endif
return (ret);
@@ -1483,24 +1459,14 @@ row_reserve(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
cursor->set_key(cursor, tinfo->key);
}
+ if ((ret = cursor->reserve(cursor)) != 0)
+ return (ret);
+
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
"%-10s{%.*s}", "reserve",
(int)tinfo->key->size, tinfo->key->data);
- switch (ret = cursor->reserve(cursor)) {
- case 0:
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- case WT_NOTFOUND:
- return (WT_NOTFOUND);
- default:
- testutil_die(ret,
- "row_reserve: reserve row %" PRIu64 " by key",
- tinfo->keyno);
- }
return (0);
}
@@ -1516,21 +1482,13 @@ col_reserve(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
if (!positioned)
cursor->set_key(cursor, tinfo->keyno);
+ if ((ret = cursor->reserve(cursor)) != 0)
+ return (ret);
+
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
"%-10s%" PRIu64, "reserve", tinfo->keyno);
- switch (ret = cursor->reserve(cursor)) {
- case 0:
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- case WT_NOTFOUND:
- return (WT_NOTFOUND);
- default:
- testutil_die(ret, "col_reserve: %" PRIu64, tinfo->keyno);
- }
return (0);
}
@@ -1577,19 +1535,10 @@ row_modify(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
}
modify_build(tinfo, entries, &nentries);
- switch (ret = cursor->modify(cursor, entries, nentries)) {
- case 0:
- testutil_check(cursor->get_value(cursor, tinfo->value));
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- case WT_NOTFOUND:
- return (WT_NOTFOUND);
- default:
- testutil_die(ret,
- "row_modify: modify row %" PRIu64 " by key", tinfo->keyno);
- }
+ if ((ret = cursor->modify(cursor, entries, nentries)) != 0)
+ return (ret);
+
+ testutil_check(cursor->get_value(cursor, tinfo->value));
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
@@ -1624,25 +1573,16 @@ col_modify(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
cursor->set_key(cursor, tinfo->keyno);
modify_build(tinfo, entries, &nentries);
- switch (ret = cursor->modify(cursor, entries, nentries)) {
- case 0:
- testutil_check(cursor->get_value(cursor, tinfo->value));
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- case WT_NOTFOUND:
- return (WT_NOTFOUND);
- default:
- testutil_die(ret,
- "col_modify: modify row %" PRIu64, tinfo->keyno);
- }
+ if ((ret = cursor->modify(cursor, entries, nentries)) != 0)
+ return (ret);
+
+ testutil_check(cursor->get_value(cursor, tinfo->value));
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
- "%-10s{%.*s}, {%.*s}",
+ "%-10s%" PRIu64 ", {%.*s}",
"modify",
- (int)tinfo->key->size, tinfo->key->data,
+ tinfo->keyno,
(int)tinfo->value->size, tinfo->value->data);
#ifdef HAVE_BERKELEY_DB
@@ -1698,24 +1638,15 @@ row_truncate(TINFO *tinfo, WT_CURSOR *cursor)
testutil_check(c2->close(c2));
}
+ if (ret != 0)
+ return (ret);
+
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, session,
"%-10s%" PRIu64 ", %" PRIu64,
"truncate",
tinfo->keyno, tinfo->last);
- switch (ret) {
- case 0:
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- default:
- testutil_die(ret,
- "row_truncate: row %" PRIu64 "-%" PRIu64,
- tinfo->keyno, tinfo->last);
- }
-
#ifdef HAVE_BERKELEY_DB
if (SINGLETHREADED)
bdb_truncate(tinfo->keyno, tinfo->last);
@@ -1724,49 +1655,6 @@ row_truncate(TINFO *tinfo, WT_CURSOR *cursor)
}
/*
- * row_update --
- * Update a row in a row-store file.
- */
-static int
-row_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
-{
- WT_DECL_RET;
-
- if (!positioned) {
- key_gen(tinfo->key, tinfo->keyno);
- cursor->set_key(cursor, tinfo->key);
- }
- val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno);
- cursor->set_value(cursor, tinfo->value);
-
- if (g.logging == LOG_OPS)
- (void)g.wt_api->msg_printf(g.wt_api, cursor->session,
- "%-10s{%.*s}, {%.*s}",
- "put",
- (int)tinfo->key->size, tinfo->key->data,
- (int)tinfo->value->size, tinfo->value->data);
-
- switch (ret = cursor->update(cursor)) {
- case 0:
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- default:
- testutil_die(ret,
- "row_update: update row %" PRIu64 " by key", tinfo->keyno);
- }
-
-#ifdef HAVE_BERKELEY_DB
- if (SINGLETHREADED)
- bdb_update(
- tinfo->key->data, tinfo->key->size,
- tinfo->value->data, tinfo->value->size);
-#endif
- return (0);
-}
-
-/*
* col_truncate --
* Truncate rows in a column-store file.
*/
@@ -1802,6 +1690,8 @@ col_truncate(TINFO *tinfo, WT_CURSOR *cursor)
ret = session->truncate(session, NULL, cursor, c2, NULL);
testutil_check(c2->close(c2));
}
+ if (ret != 0)
+ return (ret);
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, session,
@@ -1809,21 +1699,44 @@ col_truncate(TINFO *tinfo, WT_CURSOR *cursor)
"truncate",
tinfo->keyno, tinfo->last);
- switch (ret) {
- case 0:
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- default:
- testutil_die(ret,
- "col_truncate: row %" PRIu64 "-%" PRIu64,
- tinfo->keyno, tinfo->last);
+#ifdef HAVE_BERKELEY_DB
+ if (SINGLETHREADED)
+ bdb_truncate(tinfo->keyno, tinfo->last);
+#endif
+ return (0);
+}
+
+/*
+ * row_update --
+ * Update a row in a row-store file.
+ */
+static int
+row_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
+{
+ WT_DECL_RET;
+
+ if (!positioned) {
+ key_gen(tinfo->key, tinfo->keyno);
+ cursor->set_key(cursor, tinfo->key);
}
+ val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno);
+ cursor->set_value(cursor, tinfo->value);
+
+ if ((ret = cursor->update(cursor)) != 0)
+ return (ret);
+
+ if (g.logging == LOG_OPS)
+ (void)g.wt_api->msg_printf(g.wt_api, cursor->session,
+ "%-10s{%.*s}, {%.*s}",
+ "put",
+ (int)tinfo->key->size, tinfo->key->data,
+ (int)tinfo->value->size, tinfo->value->data);
#ifdef HAVE_BERKELEY_DB
if (SINGLETHREADED)
- bdb_truncate(tinfo->keyno, tinfo->last);
+ bdb_update(
+ tinfo->key->data, tinfo->key->size,
+ tinfo->value->data, tinfo->value->size);
#endif
return (0);
}
@@ -1845,6 +1758,9 @@ col_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
else
cursor->set_value(cursor, tinfo->value);
+ if ((ret = cursor->update(cursor)) != 0)
+ return (ret);
+
if (g.logging == LOG_OPS) {
if (g.type == FIX)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
@@ -1859,16 +1775,6 @@ col_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
(char *)tinfo->value->data);
}
- switch (ret = cursor->update(cursor)) {
- case 0:
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- default:
- testutil_die(ret, "col_update: %" PRIu64, tinfo->keyno);
- }
-
#ifdef HAVE_BERKELEY_DB
if (SINGLETHREADED) {
key_gen(tinfo->key, tinfo->keyno);
@@ -1999,6 +1905,9 @@ row_insert(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno);
cursor->set_value(cursor, tinfo->value);
+ if ((ret = cursor->insert(cursor)) != 0)
+ return (ret);
+
/* Log the operation */
if (g.logging == LOG_OPS)
(void)g.wt_api->msg_printf(g.wt_api, cursor->session,
@@ -2007,17 +1916,6 @@ row_insert(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
(int)tinfo->key->size, tinfo->key->data,
(int)tinfo->value->size, tinfo->value->data);
- switch (ret = cursor->insert(cursor)) {
- case 0:
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- default:
- testutil_die(ret,
- "row_insert: insert row %" PRIu64 " by key", tinfo->keyno);
- }
-
#ifdef HAVE_BERKELEY_DB
if (SINGLETHREADED)
bdb_update(
@@ -2041,15 +1939,10 @@ col_insert(TINFO *tinfo, WT_CURSOR *cursor)
cursor->set_value(cursor, *(uint8_t *)tinfo->value->data);
else
cursor->set_value(cursor, tinfo->value);
- switch (ret = cursor->insert(cursor)) {
- case 0:
- break;
- case WT_CACHE_FULL:
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- default:
- testutil_die(ret, "cursor.insert");
- }
+
+ if ((ret = cursor->insert(cursor)) != 0)
+ return (ret);
+
testutil_check(cursor->get_key(cursor, &tinfo->keyno));
table_append(tinfo->keyno); /* Extend the object. */
@@ -2093,23 +1986,16 @@ row_remove(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
cursor->set_key(cursor, tinfo->key);
}
- if (g.logging == LOG_OPS)
- (void)g.wt_api->msg_printf(g.wt_api,
- cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno);
-
/* We use the cursor in overwrite mode, check for existence. */
if ((ret = cursor->search(cursor)) == 0)
ret = cursor->remove(cursor);
- switch (ret) {
- case 0:
- case WT_NOTFOUND:
- break;
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- default:
- testutil_die(ret,
- "row_remove: remove %" PRIu64 " by key", tinfo->keyno);
- }
+
+ if (ret != 0 && ret != WT_NOTFOUND)
+ return (ret);
+
+ if (g.logging == LOG_OPS)
+ (void)g.wt_api->msg_printf(g.wt_api,
+ cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno);
#ifdef HAVE_BERKELEY_DB
if (SINGLETHREADED) {
@@ -2134,23 +2020,16 @@ col_remove(TINFO *tinfo, WT_CURSOR *cursor, bool positioned)
if (!positioned)
cursor->set_key(cursor, tinfo->keyno);
- if (g.logging == LOG_OPS)
- (void)g.wt_api->msg_printf(g.wt_api,
- cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno);
-
/* We use the cursor in overwrite mode, check for existence. */
if ((ret = cursor->search(cursor)) == 0)
ret = cursor->remove(cursor);
- switch (ret) {
- case 0:
- case WT_NOTFOUND:
- break;
- case WT_ROLLBACK:
- return (WT_ROLLBACK);
- default:
- testutil_die(ret,
- "col_remove: remove %" PRIu64 " by key", tinfo->keyno);
- }
+
+ if (ret != 0 && ret != WT_NOTFOUND)
+ return (ret);
+
+ if (g.logging == LOG_OPS)
+ (void)g.wt_api->msg_printf(g.wt_api,
+ cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno);
#ifdef HAVE_BERKELEY_DB
if (SINGLETHREADED) {
diff --git a/src/third_party/wiredtiger/test/suite/test_cursor14.py b/src/third_party/wiredtiger/test/suite/test_cursor14.py
new file mode 100644
index 00000000000..25bd0cec00a
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_cursor14.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2018 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+from wtdataset import SimpleDataSet, ComplexDataSet, ComplexLSMDataSet
+from wtscenario import make_scenarios
+
+# test_cursor14.py
+# Test that more than 64K cursors can be opened on a data source
+class test_cursor14(wttest.WiredTigerTestCase):
+ scenarios = make_scenarios([
+ ('file-r', dict(type='file:', keyfmt='r', dataset=SimpleDataSet)),
+ ('file-S', dict(type='file:', keyfmt='S', dataset=SimpleDataSet)),
+ ('lsm-S', dict(type='lsm:', keyfmt='S', dataset=SimpleDataSet)),
+ ('table-r', dict(type='table:', keyfmt='r', dataset=SimpleDataSet)),
+ ('table-S', dict(type='table:', keyfmt='S', dataset=SimpleDataSet)),
+ ('table-r-complex', dict(type='table:', keyfmt='r',
+ dataset=ComplexDataSet)),
+ ('table-S-complex', dict(type='table:', keyfmt='S',
+ dataset=ComplexDataSet)),
+ ('table-S-complex-lsm', dict(type='table:', keyfmt='S',
+ dataset=ComplexLSMDataSet)),
+ ])
+
+ def test_cursor14(self):
+ uri = self.type + 'cursor14'
+
+ ds = self.dataset(self, uri, 100, key_format=self.keyfmt)
+ ds.populate()
+
+ for i in xrange(66000):
+ cursor = self.session.open_cursor(uri, None, None)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_prepare04.py b/src/third_party/wiredtiger/test/suite/test_prepare04.py
new file mode 100644
index 00000000000..af5dd12b1e5
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_prepare04.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2018 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_prepare04.py
+# Prepare: prepare conflict with update and read operations
+#
+
+import random
+from suite_subprocess import suite_subprocess
+import wiredtiger, wttest
+from wtscenario import make_scenarios
+
+def timestamp_str(t):
+ return '%x' % t
+
+class test_prepare04(wttest.WiredTigerTestCase, suite_subprocess):
+ tablename = 'test_prepare_cursor'
+ uri = 'table:' + tablename
+ before_ts = timestamp_str(150)
+ prepare_ts = timestamp_str(200)
+ after_ts = timestamp_str(250)
+
+ types = [
+ ('col', dict(extra_config=',log=(enabled=false),key_format=r')),
+ ('lsm', dict(extra_config=',log=(enabled=false),type=lsm')),
+ ('row', dict(extra_config=',log=(enabled=false)')),
+ ]
+
+ # Various begin_transaction config
+ txncfg = [
+ ('before_ts', dict(txn_config='isolation=snapshot,read_timestamp=' + before_ts, after_ts=False)),
+ ('after_ts', dict(txn_config='isolation=snapshot,read_timestamp=' + after_ts, after_ts=True)),
+ ('no_ts', dict(txn_config='isolation=snapshot', after_ts=True)),
+ ]
+
+ preparecfg = [
+ ('ignore_false', dict(ignore_config=',ignore_prepare=false', ignore=False)),
+ ('ignore_true', dict(ignore_config=',ignore_prepare=true', ignore=True)),
+ ]
+ conn_config = 'log=(enabled)'
+
+ scenarios = make_scenarios(types, txncfg, preparecfg)
+
+ def test_prepare_conflict(self):
+ if not wiredtiger.timestamp_build():
+ self.skipTest('requires a timestamp build')
+
+ self.session.create(self.uri,
+ 'key_format=i,value_format=i' + self.extra_config)
+ c = self.session.open_cursor(self.uri)
+
+ # Insert keys 1..100 each with timestamp=key, in some order
+ orig_keys = range(1, 101)
+ keys = orig_keys[:]
+ random.shuffle(keys)
+
+ k = 1
+ self.session.begin_transaction()
+ c[k] = 1
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(100))
+
+ # Everything up to and including timestamp 100 has been committed.
+ self.assertTimestampsEqual(self.conn.query_timestamp(), timestamp_str(100))
+
+ # Bump the oldest timestamp, we're not going back...
+ self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(100))
+
+ # make prepared updates.
+ k = 1
+ self.session.begin_transaction('isolation=snapshot')
+ c.set_key(1)
+ c.set_value(2)
+ c.update()
+ self.session.prepare_transaction('prepare_timestamp=' + self.prepare_ts)
+ conflictmsg = '/conflict between concurrent operations/'
+ preparemsg = '/conflict with a prepared update/'
+
+ #'''
+ # Verify data visibility from a different session/transaction.
+ s_other = self.conn.open_session()
+ c_other = s_other.open_cursor(self.uri, None)
+ s_other.begin_transaction(self.txn_config + self.ignore_config)
+ c_other.set_key(1)
+ if self.ignore == False and self.after_ts == True:
+ self.assertRaises(wiredtiger.WiredTigerError, lambda:c_other.search())
+ else:
+ c_other.search()
+ self.assertTrue(c_other.get_value() == 1)
+ c_other.set_value(3)
+ self.assertRaises(wiredtiger.WiredTigerError, lambda:c_other.update())
+ s_other.commit_transaction()
+ #'''
+
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(300))
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp04.py b/src/third_party/wiredtiger/test/suite/test_timestamp04.py
index 48ec7fac9a6..83ed4e904a6 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp04.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp04.py
@@ -32,6 +32,7 @@
from suite_subprocess import suite_subprocess
import wiredtiger, wttest
+from wiredtiger import stat
from wtscenario import make_scenarios
def timestamp_str(t):
@@ -98,7 +99,7 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
def ConnectionOpen(self, cacheSize):
self.home = '.'
- conn_params = 'create,' + \
+ conn_params = 'create,statistics=(fast),' + \
cacheSize + ',error_prefix="%s" %s' % (self.shortid(), self.conn_config)
try:
self.conn = wiredtiger.wiredtiger_open(self.home, conn_params)
@@ -164,6 +165,12 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
stable_ts = timestamp_str(key_range / 2)
self.conn.set_timestamp('stable_timestamp=' + stable_ts)
self.conn.rollback_to_stable()
+ stat_cursor = self.session.open_cursor('statistics:', None, None)
+ calls = stat_cursor[stat.conn.txn_rollback_to_stable][2]
+ upd_aborted = stat_cursor[stat.conn.txn_rollback_upd_aborted][2]
+ stat_cursor.close()
+ self.assertEqual(calls, 1)
+ self.assertTrue(upd_aborted >= key_range/2)
# Check that we see the inserted value (i.e. 1) for all the keys in
# non-timestamp tables.
@@ -224,9 +231,20 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
# Scenario: 4
# Advance the stable_timestamp by a quarter range and rollback.
# Three-fourths of the later timestamps will be rolled back.
- stable_ts = timestamp_str(key_range + key_range / 4)
+ rolled_range = key_range + key_range / 4
+ stable_ts = timestamp_str(rolled_range)
self.conn.set_timestamp('stable_timestamp=' + stable_ts)
self.conn.rollback_to_stable()
+ stat_cursor = self.session.open_cursor('statistics:', None, None)
+ calls = stat_cursor[stat.conn.txn_rollback_to_stable][2]
+ upd_aborted = stat_cursor[stat.conn.txn_rollback_upd_aborted][2]
+ stat_cursor.close()
+ self.assertEqual(calls, 2)
+ #
+ # We rolled back half on the earlier call and now three-quarters on
+ # this call, which is one and one quarter of all keys rolled back.
+ #
+ self.assertTrue(upd_aborted >= rolled_range)
# Check that we see the updated value (i.e. 2) for all the keys in
# non-timestamped tables.
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp10.py b/src/third_party/wiredtiger/test/suite/test_timestamp10.py
index a798f5ff355..02b22e6afbe 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp10.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp10.py
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
#
# test_timestamp10.py
-# Timestamps: Saving and querying the checkpoint recovery timestamp
+# Timestamps: Saving and querying the last checkpoint and recovery timestamps
#
import fnmatch, os, shutil
@@ -101,6 +101,8 @@ class test_timestamp10(wttest.WiredTigerTestCase, suite_subprocess):
',stable_timestamp=' + timestamp_str(ts))
# This forces a different checkpoint timestamp for each table.
self.session.checkpoint()
+ q = self.conn.query_timestamp('get=last_checkpoint')
+ self.assertTimestampsEqual(q, timestamp_str(ts))
# Copy to a new database and then recover.
self.copy_dir(".", "RESTART")