summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2018-04-18 15:56:04 +1000
committerLuke Chen <luke.chen@mongodb.com>2018-04-18 15:56:04 +1000
commit0ea8ebafc20bb379955b2168b31099aefa220d7b (patch)
tree45ae9717fd0db22de6ca56240381c8106a633e7d
parentf1dce2d1934052cbac4032d0c5833d3857a0cfb2 (diff)
downloadmongo-0ea8ebafc20bb379955b2168b31099aefa220d7b.tar.gz
Import wiredtiger: ad25980c88b87d45dbcefdb10cdcf696d02a8ac2 from branch mongodb-3.8
ref: 5fc85c47ca..ad25980c88 for: 3.7.6 WT-3998 Fix a bug where stable timestamp was ignored on shutdown WT-4012 Fix lookaside entry counters WT-4019 Change test/format to test transaction prepare less often WT-4027 Yield cursor operations between restarted search/traverse WT-4031 on-page zero-length row-store values can be discarded from checkpoints WT-4035 Truncate information discarded while active WT-4036 Fix Coverity false positive: out-of-bounds access. WT-4042 Access data handles safely during cursor reopen
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c46
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c22
-rw-r--r--src/third_party/wiredtiger/src/cache/cache_las.c129
-rw-r--r--src/third_party/wiredtiger/src/config/config.c24
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c7
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_file.c8
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i12
-rw-r--r--src/third_party/wiredtiger/src/include/cell.i21
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.h6
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h7
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c56
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c33
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c9
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c17
-rw-r--r--src/third_party/wiredtiger/test/format/config.c36
-rw-r--r--src/third_party/wiredtiger/test/format/config.h4
-rw-r--r--src/third_party/wiredtiger/test/format/format.h18
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c44
19 files changed, 319 insertions, 182 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index cac59dc11d3..e02014e741c 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "5fc85c47caba5dbd4fc49ad6fa924fee4e3d5695",
+ "commit": "ad25980c88b87d45dbcefdb10cdcf696d02a8ac2",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-3.8"
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 9a30ee2c1a4..ed3cf6b5943 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -436,6 +436,20 @@ __cursor_row_modify(
}
/*
+ * __cursor_restart --
+ * Common cursor restart handling.
+ */
+static void
+__cursor_restart(
+ WT_SESSION_IMPL *session, uint64_t *yield_count, uint64_t *sleep_count)
+{
+ __wt_state_yield_sleep(yield_count, sleep_count);
+
+ WT_STAT_CONN_INCR(session, cursor_restart);
+ WT_STAT_DATA_INCR(session, cursor_restart);
+}
+
+/*
* __wt_btcur_reset --
* Invalidate the cursor position.
*/
@@ -719,11 +733,13 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ uint64_t yield_count, sleep_count;
bool append_key, valid;
btree = cbt->btree;
cursor = &cbt->iface;
session = (WT_SESSION_IMPL *)cursor->session;
+ yield_count = sleep_count = 0;
WT_STAT_CONN_INCR(session, cursor_insert);
WT_STAT_DATA_INCR(session, cursor_insert);
@@ -840,8 +856,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
}
err: if (ret == WT_RESTART) {
- WT_STAT_CONN_INCR(session, cursor_restart);
- WT_STAT_DATA_INCR(session, cursor_restart);
+ __cursor_restart(session, &yield_count, &sleep_count);
goto retry;
}
@@ -904,10 +919,12 @@ __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ uint64_t yield_count, sleep_count;
cursor = &cbt->iface;
btree = cbt->btree;
session = (WT_SESSION_IMPL *)cursor->session;
+ yield_count = sleep_count = 0;
/*
* The pinned page goes away if we do a search, get a local copy of any
@@ -929,8 +946,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
WT_ERR(__wt_illegal_value(session, NULL));
err: if (ret == WT_RESTART) {
- WT_STAT_CONN_INCR(session, cursor_restart);
- WT_STAT_DATA_INCR(session, cursor_restart);
+ __cursor_restart(session, &yield_count, &sleep_count);
goto retry;
}
@@ -955,11 +971,13 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ uint64_t yield_count, sleep_count;
bool iterating, valid;
btree = cbt->btree;
cursor = &cbt->iface;
session = (WT_SESSION_IMPL *)cursor->session;
+ yield_count = sleep_count = 0;
iterating = F_ISSET(cbt, WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV);
WT_STAT_CONN_INCR(session, cursor_remove);
@@ -1092,8 +1110,7 @@ retry: if (positioned == POSITIONED)
}
err: if (ret == WT_RESTART) {
- WT_STAT_CONN_INCR(session, cursor_restart);
- WT_STAT_DATA_INCR(session, cursor_restart);
+ __cursor_restart(session, &yield_count, &sleep_count);
goto retry;
}
@@ -1172,11 +1189,13 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type)
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ uint64_t yield_count, sleep_count;
bool valid;
btree = cbt->btree;
cursor = &cbt->iface;
session = (WT_SESSION_IMPL *)cursor->session;
+ yield_count = sleep_count = 0;
/* It's no longer possible to bulk-load into the tree. */
__cursor_disable_bulk(session, btree);
@@ -1268,8 +1287,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true));
}
err: if (ret == WT_RESTART) {
- WT_STAT_CONN_INCR(session, cursor_restart);
- WT_STAT_DATA_INCR(session, cursor_restart);
+ __cursor_restart(session, &yield_count, &sleep_count);
goto retry;
}
@@ -1608,6 +1626,9 @@ __cursor_truncate(WT_SESSION_IMPL *session,
int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int))
{
WT_DECL_RET;
+ uint64_t yield_count, sleep_count;
+
+ yield_count = sleep_count = 0;
/*
* First, call the cursor search method to re-position the cursor: we
@@ -1644,8 +1665,7 @@ retry: WT_ERR(__wt_btcur_search(start));
}
err: if (ret == WT_RESTART) {
- WT_STAT_CONN_INCR(session, cursor_restart);
- WT_STAT_DATA_INCR(session, cursor_restart);
+ __cursor_restart(session, &yield_count, &sleep_count);
goto retry;
}
@@ -1663,8 +1683,11 @@ __cursor_truncate_fix(WT_SESSION_IMPL *session,
int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int))
{
WT_DECL_RET;
+ uint64_t yield_count, sleep_count;
const uint8_t *value;
+ yield_count = sleep_count = 0;
+
/*
* Handle fixed-length column-store objects separately: for row-store
* and variable-length column-store objects we have "deleted" values
@@ -1702,8 +1725,7 @@ retry: WT_ERR(__wt_btcur_search(start));
}
err: if (ret == WT_RESTART) {
- WT_STAT_CONN_INCR(session, cursor_restart);
- WT_STAT_DATA_INCR(session, cursor_restart);
+ __cursor_restart(session, &yield_count, &sleep_count);
goto retry;
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index 345556c4c41..b98e994640d 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -113,7 +113,7 @@ __las_page_instantiate_verbose(WT_SESSION_IMPL *session, uint64_t las_pageid)
* Instantiate lookaside update records in a recently read page.
*/
static int
-__las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id)
+__las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
{
WT_CACHE *cache;
WT_CURSOR *cursor;
@@ -136,11 +136,12 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id)
locked = false;
total_incr = 0;
current_recno = recno = WT_RECNO_OOB;
+ las_pageid = ref->page_las->las_pageid;
session_flags = 0; /* [-Werror=maybe-uninitialized] */
WT_CLEAR(las_key);
cache = S2C(session)->cache;
- __las_page_instantiate_verbose(session, ref->page_las->las_pageid);
+ __las_page_instantiate_verbose(session, las_pageid);
WT_STAT_CONN_INCR(session, cache_read_lookaside);
WT_STAT_DATA_INCR(session, cache_read_lookaside);
@@ -159,11 +160,11 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id)
* for a key and then insert those updates into the page, then all the
* updates for the next key, and so on.
*/
- ret = __wt_las_cursor_position(
- cursor, btree_id, ref->page_las->las_pageid);
__wt_readlock(session, &cache->las_sweepwalk_lock);
locked = true;
- for (; ret == 0; ret = cursor->next(cursor)) {
+ for (ret = __wt_las_cursor_position(cursor, las_pageid);
+ ret == 0;
+ ret = cursor->next(cursor)) {
WT_ERR(cursor->get_key(cursor,
&las_pageid, &las_id, &las_counter, &las_key));
@@ -171,8 +172,7 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id)
* Confirm the search using the unique prefix; if not a match,
* we're done searching for records for this page.
*/
- if (las_id != btree_id ||
- las_pageid != ref->page_las->las_pageid)
+ if (las_pageid != ref->page_las->las_pageid)
break;
/* Allocate the WT_UPDATE structure. */
@@ -367,7 +367,6 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
static int
__page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
{
- WT_BTREE *btree;
WT_DECL_RET;
WT_ITEM tmp;
WT_PAGE *notused;
@@ -377,7 +376,6 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
const uint8_t *addr;
bool timer;
- btree = S2BT(session);
time_start = time_stop = 0;
/*
@@ -483,7 +481,7 @@ skip_read:
* then apply the delete.
*/
if (ref->page_las != NULL) {
- WT_ERR(__las_page_instantiate(session, ref, btree->id));
+ WT_ERR(__las_page_instantiate(session, ref));
ref->page_las->eviction_to_lookaside = false;
}
@@ -504,7 +502,7 @@ skip_read:
if (previous_state == WT_REF_LIMBO)
WT_STAT_CONN_INCR(session, cache_read_lookaside_delay);
- WT_ERR(__las_page_instantiate(session, ref, btree->id));
+ WT_ERR(__las_page_instantiate(session, ref));
ref->page_las->eviction_to_lookaside = false;
break;
}
@@ -518,7 +516,7 @@ skip_read:
*/
if (final_state == WT_REF_MEM && ref->page_las != NULL)
WT_IGNORE_RET(__wt_las_remove_block(
- session, btree->id, ref->page_las->las_pageid));
+ session, ref->page_las->las_pageid, false));
WT_PUBLISH(ref->state, final_state);
return (ret);
diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c
index 7ccc325523e..620800a8fb9 100644
--- a/src/third_party/wiredtiger/src/cache/cache_las.c
+++ b/src/third_party/wiredtiger/src/cache/cache_las.c
@@ -455,7 +455,7 @@ __wt_las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref)
*/
static int
__las_remove_block(WT_SESSION_IMPL *session,
- WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid, uint64_t *decrp)
+ WT_CURSOR *cursor, uint64_t pageid, bool lock_wait, uint64_t *remove_cntp)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
@@ -463,30 +463,32 @@ __las_remove_block(WT_SESSION_IMPL *session,
uint64_t las_counter, las_pageid;
uint32_t las_id;
- *decrp = 0;
+ *remove_cntp = 0;
conn = S2C(session);
- __wt_writelock(session, &conn->cache->las_sweepwalk_lock);
+ /* Prevent the sweep thread from removing the block. */
+ if (lock_wait)
+ __wt_writelock(session, &conn->cache->las_sweepwalk_lock);
+ else
+ WT_RET(__wt_try_writelock(
+ session, &conn->cache->las_sweepwalk_lock));
/*
* Search for the block's unique btree ID and page ID prefix and step
* through all matching records, removing them.
*/
- for (ret = __wt_las_cursor_position(cursor, btree_id, pageid);
+ for (ret = __wt_las_cursor_position(cursor, pageid);
ret == 0; ret = cursor->next(cursor)) {
WT_ERR(cursor->get_key(cursor,
&las_pageid, &las_id, &las_counter, &las_key));
- /*
- * Confirm the record matches; if not a match, we're done
- * searching for records for this page.
- */
- if (las_pageid != pageid || las_id != btree_id)
+ /* Confirm that we have a matching record. */
+ if (las_pageid != pageid)
break;
WT_ERR(cursor->remove(cursor));
- ++*decrp;
+ ++*remove_cntp;
}
WT_ERR_NOTFOUND_OK(ret);
@@ -580,7 +582,7 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
WT_SESSION_IMPL *las_session;
WT_TXN_ISOLATION saved_isolation;
WT_UPDATE *upd;
- uint64_t decrement_cnt, insert_cnt, insert_estimate;
+ uint64_t insert_cnt;
uint64_t las_counter, las_pageid;
uint32_t btree_id, i, slot;
uint8_t *p;
@@ -590,12 +592,11 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
conn = S2C(session);
WT_CLEAR(las_timestamp);
WT_CLEAR(las_value);
- decrement_cnt = insert_cnt = insert_estimate = 0;
+ insert_cnt = 0;
btree_id = btree->id;
local_txn = false;
- las_pageid = multi->page_las.las_pageid =
- __wt_atomic_add64(&conn->cache->las_pageid, 1);
+ las_pageid = __wt_atomic_add64(&conn->cache->las_pageid, 1);
if (!btree->lookaside_entries)
btree->lookaside_entries = true;
@@ -606,12 +607,18 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
WT_ERR(__wt_txn_begin(las_session, NULL));
local_txn = true;
+#ifdef HAVE_DIAGNOSTIC
+ {
+ uint64_t remove_cnt;
/*
- * Make sure there are no leftover entries (e.g., from a handle
- * reopen).
+ * There should never be any entries with the page ID we are about to
+ * use.
*/
- WT_ERR(__las_remove_block(
- session, cursor, btree_id, las_pageid, &decrement_cnt));
+ WT_ERR_BUSY_OK(__las_remove_block(
+ session, cursor, las_pageid, false, &remove_cnt));
+ WT_ASSERT(session, remove_cnt == 0);
+ }
+#endif
/* Enter each update in the boundary's list into the lookaside store. */
for (las_counter = 0, i = 0,
@@ -707,18 +714,6 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
upd->type, &las_value);
/*
- * If remove is running concurrently, it's possible for
- * records to be removed before the insert transaction
- * commit (remove is configured read-uncommitted). Make
- * sure increments stay ahead of decrements.
- */
- if (insert_estimate <= insert_cnt) {
- insert_estimate += 100;
- (void)__wt_atomic_add64(
- &conn->cache->las_entry_count, 100);
- }
-
- /*
* Using update looks a little strange because the keys
* are guaranteed to not exist, but since we're
* appending, we want the cursor to stay positioned in
@@ -731,34 +726,31 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
err: /* Resolve the transaction. */
if (local_txn) {
- if (ret == 0)
- ret = __wt_txn_commit(las_session, NULL);
- else
+ if (ret == 0) {
+ /*
+ * Adjust the entry count.
+ *
+ * For inserts, we increment before committing. As
+ * soon as we commit, sweep could catch up and remove
+ * the block, and we don't want the count to underflow.
+ * In the unlikely event that the commit fails, roll
+ * back the increment.
+ */
+ __wt_atomic_add64(
+ &conn->cache->las_entry_count, insert_cnt);
+ if ((ret = __wt_txn_commit(las_session, NULL)) != 0)
+ __wt_cache_decr_check_uint64(session,
+ &conn->cache->las_entry_count,
+ insert_cnt, "lookaside entry count");
+ } else
WT_TRET(__wt_txn_rollback(las_session, NULL));
}
__las_restore_isolation(las_session, saved_isolation);
- /*
- * If the transaction successfully committed and we inserted records,
- * adjust the final entry count. We may have also deleted records,
- * but we must have intended to insert records to be in this function
- * at all, checking the insert count is sufficient.
- */
- if (insert_cnt > 0) {
- if (ret == 0) {
- (void)__wt_atomic_add64(
- &conn->cache->las_entry_count,
- insert_estimate - insert_cnt);
- __wt_cache_decr_check_uint64(session,
- &conn->cache->las_entry_count,
- decrement_cnt, "lookaside entry count");
-
- ret = __las_insert_block_verbose(session, multi);
- } else
- __wt_cache_decr_check_uint64(session,
- &conn->cache->las_entry_count,
- insert_estimate, "lookaside entry count");
+ if (ret == 0 && insert_cnt > 0) {
+ multi->page_las.las_pageid = las_pageid;
+ ret = __las_insert_block_verbose(session, multi);
}
return (ret);
@@ -772,7 +764,7 @@ err: /* Resolve the transaction. */
* WT_CONNECTION::rollback_to_stable.
*/
int
-__wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid)
+__wt_las_cursor_position(WT_CURSOR *cursor, uint64_t pageid)
{
WT_ITEM las_key;
uint64_t las_counter, las_pageid;
@@ -796,7 +788,7 @@ __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid)
for (;;) {
WT_CLEAR(las_key);
cursor->set_key(cursor,
- pageid, btree_id, (uint64_t)0, &las_key);
+ pageid, (uint32_t)0, (uint64_t)0, &las_key);
WT_RET(cursor->search_near(cursor, &exact));
if (exact < 0) {
WT_RET(cursor->next(cursor));
@@ -813,8 +805,7 @@ __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid)
*/
WT_RET(cursor->get_key(cursor,
&las_pageid, &las_id, &las_counter, &las_key));
- if (las_pageid < pageid || (las_pageid == pageid &&
- las_id < btree_id))
+ if (las_pageid < pageid)
continue;
}
@@ -830,14 +821,14 @@ __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid)
*/
int
__wt_las_remove_block(
- WT_SESSION_IMPL *session, uint32_t btree_id, uint64_t pageid)
+ WT_SESSION_IMPL *session, uint64_t pageid, bool lock_wait)
{
WT_CONNECTION_IMPL *conn;
WT_CURSOR *cursor;
WT_DECL_RET;
WT_SESSION_IMPL *las_session;
WT_TXN_ISOLATION saved_isolation;
- uint64_t decrement_cnt;
+ uint64_t remove_cnt;
uint32_t session_flags;
conn = S2C(session);
@@ -856,7 +847,7 @@ __wt_las_remove_block(
WT_ERR(__wt_txn_begin(las_session, NULL));
ret = __las_remove_block(
- las_session, cursor, btree_id, pageid, &decrement_cnt);
+ las_session, cursor, pageid, lock_wait, &remove_cnt);
if (ret == 0)
ret = __wt_txn_commit(las_session, NULL);
else
@@ -864,7 +855,7 @@ __wt_las_remove_block(
if (ret == 0)
__wt_cache_decr_check_uint64(session,
&conn->cache->las_entry_count,
- decrement_cnt, "lookaside entry count");
+ remove_cnt, "lookaside entry count");
err: __las_restore_isolation(las_session, saved_isolation);
WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
@@ -993,7 +984,7 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
#else
wt_timestamp_t *val_ts;
#endif
- uint64_t cnt, decrement_cnt, las_counter, las_pageid, saved_pageid;
+ uint64_t cnt, remove_cnt, las_counter, las_pageid, saved_pageid;
uint64_t las_txnid;
uint32_t las_id, session_flags;
uint8_t upd_type;
@@ -1003,7 +994,7 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
cache = S2C(session)->cache;
cursor = NULL;
sweep_key = &cache->las_sweep_key;
- decrement_cnt = 0;
+ remove_cnt = 0;
session_flags = 0; /* [-Werror=maybe-uninitialized] */
local_txn = locked = false;
@@ -1020,6 +1011,9 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
WT_ERR(__wt_txn_begin(session, NULL));
local_txn = true;
+ /*
+ * Prevent other threads removing entries from underneath the sweep.
+ */
__wt_writelock(session, &cache->las_sweepwalk_lock);
locked = true;
@@ -1105,7 +1099,7 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
__bit_test(cache->las_sweep_dropmap,
las_id - cache->las_sweep_dropmin)) {
WT_ERR(cursor->remove(cursor));
- ++decrement_cnt;
+ ++remove_cnt;
saved_key->size = 0;
continue;
}
@@ -1152,12 +1146,9 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
}
WT_ERR(cursor->remove(cursor));
- ++decrement_cnt;
+ ++remove_cnt;
}
- __wt_writeunlock(session, &cache->las_sweepwalk_lock);
- locked = false;
-
/*
* If the loop terminates after completing a work unit, we will
* continue the table sweep next time. Get a local copy of the
@@ -1186,7 +1177,7 @@ err: __wt_buf_free(session, sweep_key);
if (ret == 0)
__wt_cache_decr_check_uint64(session,
&S2C(session)->cache->las_entry_count,
- decrement_cnt, "lookaside entry count");
+ remove_cnt, "lookaside entry count");
}
if (locked)
__wt_writeunlock(session, &cache->las_sweepwalk_lock);
diff --git a/src/third_party/wiredtiger/src/config/config.c b/src/third_party/wiredtiger/src/config/config.c
index b15bbdf83c7..799139b6b90 100644
--- a/src/third_party/wiredtiger/src/config/config.c
+++ b/src/third_party/wiredtiger/src/config/config.c
@@ -745,20 +745,36 @@ __wt_config_gets_def(WT_SESSION_IMPL *session,
const char **cfg, const char *key, int def, WT_CONFIG_ITEM *value)
{
WT_CONFIG_ITEM_STATIC_INIT(false_value);
+ const char **end;
*value = false_value;
value->val = def;
- if (cfg == NULL || cfg[0] == NULL || cfg[1] == NULL)
+ if (cfg == NULL)
return (0);
- if (cfg[2] == NULL) {
+ /*
+ * Checking the "length" of the pointer array is a little odd, but it's
+ * deliberate. The reason is because we pass variable length arrays of
+ * pointers as the configuration argument, some of which have only one
+ * element and the NULL termination. Static analyzers (like Coverity)
+ * complain if we read from an offset past the end of the array, even
+ * if we check there's no NULL slots before the offset.
+ */
+ for (end = cfg; *end != NULL; ++end)
+ ;
+ switch ((int)(end - cfg)) {
+ case 0: /* cfg[0] == NULL */
+ case 1: /* cfg[1] == NULL */
+ return (0);
+ case 2: /* cfg[2] == NULL */
WT_RET_NOTFOUND_OK(
__wt_config_getones(session, cfg[1], key, value));
return (0);
+ default:
+ return (__wt_config_gets(session, cfg, key, value));
}
-
- return (__wt_config_gets(session, cfg, key, value));
+ /* NOTREACHED */
}
/*
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index a8e906a9f19..e753dc1644a 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -1101,8 +1101,8 @@ err: /*
*/
F_SET(conn, WT_CONN_EVICTION_NO_LOOKASIDE);
- /* Shut down transactions (wait for in-flight operations to complete. */
- WT_TRET(__wt_txn_global_shutdown(session));
+ /* Wait for in-flight operations to complete. */
+ WT_TRET(__wt_txn_activity_drain(session));
/*
* Perform a system-wide checkpoint so that all tables are consistent
@@ -1135,6 +1135,9 @@ err: /*
}
}
+ /* Shut down the global transaction state. */
+ __wt_txn_global_shutdown(session);
+
if (ret != 0) {
__wt_err(session, ret,
"failure during close, disabling further writes");
diff --git a/src/third_party/wiredtiger/src/cursor/cur_file.c b/src/third_party/wiredtiger/src/cursor/cur_file.c
index 381f8bcc619..dff851f99d2 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_file.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_file.c
@@ -530,8 +530,9 @@ __curfile_cache(WT_CURSOR *cursor)
cbt = (WT_CURSOR_BTREE *)cursor;
session = (WT_SESSION_IMPL *)cursor->session;
+ cbt->dhandle = cbt->btree->dhandle;
- WT_TRET(__wt_cursor_cache(cursor, cbt->btree->dhandle));
+ WT_TRET(__wt_cursor_cache(cursor, cbt->dhandle));
WT_TRET(__wt_session_release_dhandle(session));
return (ret);
}
@@ -552,7 +553,7 @@ __curfile_reopen(WT_CURSOR *cursor, bool check_only)
is_dead = false;
cbt = (WT_CURSOR_BTREE *)cursor;
session = (WT_SESSION_IMPL *)cursor->session;
- dhandle = cbt->btree->dhandle;
+ dhandle = cbt->dhandle;
if (!WT_DHANDLE_CAN_REOPEN(dhandle))
ret = WT_NOTFOUND;
@@ -579,6 +580,9 @@ __curfile_reopen(WT_CURSOR *cursor, bool check_only)
* memory owned by the btree handle.
*/
if (ret == 0) {
+ WT_ASSERT(session,
+ dhandle->type == WT_DHANDLE_TYPE_BTREE);
+ cbt->btree = dhandle->handle;
cursor->internal_uri = cbt->btree->dhandle->name;
cursor->key_format = cbt->btree->key_format;
cursor->value_format = cbt->btree->value_format;
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 149f4304692..79d6634913e 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -202,6 +202,8 @@ static inline void
__wt_cache_decr_check_uint64(
WT_SESSION_IMPL *session, uint64_t *vp, uint64_t v, const char *fld)
{
+ uint64_t orig = *vp;
+
if (v == 0 || __wt_atomic_sub64(vp, v) < WT_EXABYTE)
return;
@@ -211,7 +213,8 @@ __wt_cache_decr_check_uint64(
*/
*vp = 0;
__wt_errx(session,
- "%s went negative with decrement of %" PRIu64, fld, v);
+ "%s was %" PRIu64 ", went negative with decrement of %" PRIu64, fld,
+ orig, v);
#ifdef HAVE_DIAGNOSTIC
__wt_abort(session);
@@ -1183,6 +1186,10 @@ __wt_page_del_active(
*
* We cannot evict dirty pages or split while a checkpoint is in progress,
* unless the checkpoint thread is doing the work.
+ *
+ * Also, during connection close, if we take a checkpoint as of a
+ * timestamp, eviction should not write dirty pages to avoid updates newer
+ * than the checkpoint timestamp leaking to disk.
*/
static inline bool
__wt_btree_can_evict_dirty(WT_SESSION_IMPL *session)
@@ -1190,7 +1197,8 @@ __wt_btree_can_evict_dirty(WT_SESSION_IMPL *session)
WT_BTREE *btree;
btree = S2BT(session);
- return (btree->checkpointing == WT_CKPT_OFF ||
+ return ((btree->checkpointing == WT_CKPT_OFF &&
+ !F_ISSET(S2C(session), WT_CONN_CLOSING_TIMESTAMP)) ||
WT_SESSION_IS_CHECKPOINT(session));
}
diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i
index 644222ad569..ff3486c1750 100644
--- a/src/third_party/wiredtiger/src/include/cell.i
+++ b/src/third_party/wiredtiger/src/include/cell.i
@@ -721,6 +721,27 @@ __wt_cell_unpack(WT_CELL *cell, WT_CELL_UNPACK *unpack)
}
/*
+ * __wt_cell_unpack_empty_value --
+ * Create an unpacked cell that looks like zero-length row-store value.
+ */
+static inline void
+__wt_cell_unpack_empty_value(WT_CELL_UNPACK *unpack)
+{
+ /*
+ * Row-store doesn't store zero-length values on pages, but this allows
+ * us to pretend.
+ */
+ unpack->cell = NULL;
+ unpack->v = 0;
+ unpack->data = "";
+ unpack->size = 0;
+ unpack->__len = 0;
+ unpack->prefix = 0;
+ unpack->raw = unpack->type = WT_CELL_VALUE;
+ unpack->ovfl = 0;
+}
+
+/*
* __cell_data_ref --
* Set a buffer to reference the data from an unpacked cell.
*/
diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h
index ec5c6689c3f..e84921ad035 100644
--- a/src/third_party/wiredtiger/src/include/cursor.h
+++ b/src/third_party/wiredtiger/src/include/cursor.h
@@ -89,7 +89,13 @@ struct __wt_cursor_backup {
struct __wt_cursor_btree {
WT_CURSOR iface;
+ /*
+ * The btree field is safe to use when the cursor is open. When the
+ * cursor is cached, the btree may be closed, so it is only safe
+ * initially to look at the underlying data handle.
+ */
WT_BTREE *btree; /* Enclosing btree */
+ WT_DATA_HANDLE *dhandle; /* Data handle for the btree */
/*
* The following fields are set by the search functions as a precursor
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index caa48180867..579a2e5ed36 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -211,8 +211,8 @@ extern int __wt_las_cursor_close(WT_SESSION_IMPL *session, WT_CURSOR **cursorp,
extern bool __wt_las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_PAGE *page, WT_MULTI *multi, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_las_remove_block(WT_SESSION_IMPL *session, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_cursor_position(WT_CURSOR *cursor, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_remove_block(WT_SESSION_IMPL *session, uint64_t pageid, bool lock_wait) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_save_dropped(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_sweep(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern uint32_t __wt_checksum_sw(const void *chunk, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
@@ -813,7 +813,8 @@ extern void __wt_txn_stats_update(WT_SESSION_IMPL *session);
extern void __wt_txn_destroy(WT_SESSION_IMPL *session);
extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_txn_global_destroy(WT_SESSION_IMPL *session);
-extern int __wt_txn_global_shutdown(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_activity_drain(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_txn_global_shutdown(WT_SESSION_IMPL *session);
extern int __wt_verbose_dump_txn_one(WT_SESSION_IMPL *session, WT_TXN *txn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_verbose_dump_txn(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 8bc022cd3e3..7a97d5ae959 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -1665,16 +1665,7 @@ __rec_child_deleted(WT_SESSION_IMPL *session,
* read into this part of the name space again, the cache read function
* instantiates an entirely new page.)
*/
- if (ref->addr != NULL && !__wt_page_del_active(session, ref, true))
- WT_RET(__wt_ref_block_free(session, ref));
-
- /*
- * If the original page is gone, we can skip the slot on the internal
- * page.
- */
- if (ref->addr == NULL) {
- *statep = WT_CHILD_IGNORE;
-
+ if (ref->addr != NULL && !__wt_page_del_active(session, ref, true)) {
/*
* Minor memory cleanup: if a truncate call deleted this page
* and we were ever forced to instantiate the page in memory,
@@ -1687,6 +1678,15 @@ __rec_child_deleted(WT_SESSION_IMPL *session,
__wt_free(session, ref->page_del);
}
+ WT_RET(__wt_ref_block_free(session, ref));
+ }
+
+ /*
+ * If the original page is gone, we can skip the slot on the internal
+ * page.
+ */
+ if (ref->addr == NULL) {
+ *statep = WT_CHILD_IGNORE;
return (0);
}
@@ -5328,6 +5328,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
key = &r->k;
val = &r->v;
+ vpack = &_vpack;
WT_RET(__rec_split_init(session, r, page, 0, btree->maxleafpage));
@@ -5376,14 +5377,19 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
__wt_cell_unpack(cell, kpack);
}
- /* Unpack the on-page value cell, and look for an update. */
+ /*
+ * Unpack the on-page value cell, and look for an update. Under
+ * some conditions, the underlying code returning updates will
+ * restructure the update list to include the original on-page
+ * value, represented by the unpacked-cell argument. Row-store
+ * doesn't store zero-length values on the page, so we build an
+ * unpacked cell that allows us to pretend.
+ */
if ((val_cell =
__wt_row_leaf_value_cell(page, rip, NULL)) == NULL)
- vpack = NULL;
- else {
- vpack = &_vpack;
+ __wt_cell_unpack_empty_value(vpack);
+ else
__wt_cell_unpack(val_cell, vpack);
- }
WT_ERR(__rec_txn_read(
session, r, NULL, rip, vpack, NULL, &upd));
@@ -5399,10 +5405,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
* copy, we have to create a new value item as the old
* item might have been discarded from the page.
*/
- if (vpack == NULL) {
- val->buf.data = NULL;
- val->cell_len = val->len = val->buf.size = 0;
- } else if (vpack->raw == WT_CELL_VALUE_COPY) {
+ if (vpack->raw == WT_CELL_VALUE_COPY) {
/* If the item is Huffman encoded, decode it. */
if (btree->huffman_value == NULL) {
p = vpack->data;
@@ -5478,8 +5481,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
* The first time we find an overflow record we're not
* going to use, discard the underlying blocks.
*/
- if (vpack != NULL &&
- vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM)
+ if (vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM)
WT_ERR(__wt_ovfl_remove(session,
page, vpack, F_ISSET(r, WT_REC_EVICT)));
@@ -6183,18 +6185,18 @@ __rec_las_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r)
{
WT_DECL_RET;
WT_MULTI *multi;
- uint32_t btree_id, i;
-
- btree_id = S2BT(session)->id;
+ uint64_t las_pageid;
+ uint32_t i;
/*
* Note the additional check for a non-zero lookaside page ID, that
* flags if lookaside table entries for this page have been written.
*/
for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
- if (multi->supd != NULL && multi->page_las.las_pageid != 0)
- WT_TRET(__wt_las_remove_block(session,
- btree_id, multi->page_las.las_pageid));
+ if (multi->supd != NULL &&
+ (las_pageid = multi->page_las.las_pageid) != 0)
+ WT_TRET(
+ __wt_las_remove_block(session, las_pageid, true));
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index b2952cbec46..400edb59e61 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -1380,17 +1380,15 @@ __wt_txn_global_destroy(WT_SESSION_IMPL *session)
}
/*
- * __wt_txn_global_shutdown --
- * Shut down the global transaction state.
+ * __wt_txn_activity_drain --
+ * Wait for transactions to quiesce.
*/
int
-__wt_txn_global_shutdown(WT_SESSION_IMPL *session)
+__wt_txn_activity_drain(WT_SESSION_IMPL *session)
{
bool txn_active;
/*
- * We're shutting down. Make sure everything gets freed.
- *
* It's possible that the eviction server is in the middle of a long
* operation, with a transaction ID pinned. In that case, we will loop
* here until the transaction ID is released, when the oldest
@@ -1405,15 +1403,30 @@ __wt_txn_global_shutdown(WT_SESSION_IMPL *session)
__wt_yield();
}
+ return (0);
+}
+
+/*
+ * __wt_txn_global_shutdown --
+ * Shut down the global transaction state.
+ */
+void
+__wt_txn_global_shutdown(WT_SESSION_IMPL *session)
+{
#ifdef HAVE_TIMESTAMPS
/*
- * Now that all transactions have completed, no timestamps should be
- * pinned.
+ * All application transactions have completed, ignore the pinned
+ * timestamp so that updates can be evicted from the cache during
+ * connection close.
+ *
+ * Note that we are relying on a special case in __wt_txn_visible_all
+ * that returns true during close when there is no pinned timestamp
+ * set.
*/
- __wt_timestamp_set_inf(&S2C(session)->txn_global.pinned_timestamp);
+ S2C(session)->txn_global.has_pinned_timestamp = false;
+#else
+ WT_UNUSED(session);
#endif
-
- return (0);
}
/*
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 5a71135918a..78197e838f4 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -385,8 +385,13 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
conn = S2C(session);
cache = conn->cache;
- /* Give up if scrubbing is disabled. */
- if (cache->eviction_checkpoint_target < DBL_EPSILON ||
+ /*
+ * Give up if scrubbing is disabled, including when checkpointing with
+ * a timestamp on close (we can't evict dirty pages in that case, so
+ * scrubbing cannot help).
+ */
+ if (F_ISSET(conn, WT_CONN_CLOSING_TIMESTAMP) ||
+ cache->eviction_checkpoint_target < DBL_EPSILON ||
cache->eviction_checkpoint_target >= cache->eviction_dirty_trigger)
return;
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index eef2fde5284..e0d5beea61a 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -22,13 +22,13 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
WT_DECL_TIMESTAMP(rollback_timestamp)
WT_ITEM las_key, las_timestamp, las_value;
WT_TXN_GLOBAL *txn_global;
- uint64_t las_counter, las_pageid, las_total, las_txnid, remove_cnt;
+ uint64_t las_counter, las_pageid, las_total, las_txnid;
uint32_t las_id, session_flags;
uint8_t upd_type;
conn = S2C(session);
cursor = NULL;
- las_total = remove_cnt = 0;
+ las_total = 0;
session_flags = 0; /* [-Werror=maybe-uninitialized] */
WT_CLEAR(las_timestamp);
@@ -51,6 +51,7 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
/* Walk the file. */
__wt_writelock(session, &conn->cache->las_sweepwalk_lock);
while ((ret = cursor->next(cursor)) == 0) {
+ ++las_total;
WT_ERR(cursor->get_key(cursor,
&las_pageid, &las_id, &las_counter, &las_key));
@@ -73,17 +74,15 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
if (__wt_timestamp_cmp(
&rollback_timestamp, las_timestamp.data) < 0) {
WT_ERR(cursor->remove(cursor));
- ++remove_cnt;
WT_STAT_CONN_INCR(session, txn_rollback_las_removed);
- } else
- ++las_total;
+ --las_total;
+ }
}
WT_ERR_NOTFOUND_OK(ret);
-err: __wt_writeunlock(session, &conn->cache->las_sweepwalk_lock);
+err: if (ret == 0)
+ conn->cache->las_entry_count = las_total;
+ __wt_writeunlock(session, &conn->cache->las_sweepwalk_lock);
WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
- __wt_cache_decr_check_uint64(session,
- &conn->cache->las_entry_count, remove_cnt, "lookaside entry count");
- WT_STAT_CONN_SET(session, cache_lookaside_entries, las_total);
F_CLR(session, WT_SESSION_READ_WONT_NEED);
diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c
index 0677b3b753c..7e54c7ad171 100644
--- a/src/third_party/wiredtiger/test/format/config.c
+++ b/src/third_party/wiredtiger/test/format/config.c
@@ -47,6 +47,7 @@ static void config_map_encryption(const char *, u_int *);
static void config_map_file_type(const char *, u_int *);
static void config_map_isolation(const char *, u_int *);
static void config_pct(void);
+static void config_prepare(void);
static void config_reset(void);
/*
@@ -171,6 +172,7 @@ config_setup(void)
config_isolation();
config_lrt();
config_pct();
+ config_prepare();
/*
* If this is an LSM run, ensure cache size sanity.
@@ -667,6 +669,40 @@ config_pct(void)
}
/*
+ * config_prepare --
+ * Transaction prepare configuration.
+ */
+static void
+config_prepare(void)
+{
+ /*
+ * We cannot prepare a transaction if logging is configured, or if
+ * timestamps are not configured.
+ *
+ * Prepare isn't configured often, let it control other features, unless
+ * they're explicitly set/not-set.
+ */
+ if (!g.c_prepare)
+ return;
+ if (config_is_perm("prepare")) {
+ if (g.c_logging && config_is_perm("logging"))
+ testutil_die(EINVAL,
+ "prepare is incompatible with logging");
+ if (!g.c_txn_timestamps &&
+ config_is_perm("transaction_timestamps"))
+ testutil_die(EINVAL,
+ "prepare requires transaction timestamps");
+ }
+ if (g.c_logging && config_is_perm("logging"))
+ return;
+ if (!g.c_txn_timestamps && config_is_perm("transaction_timestamps"))
+ return;
+
+ config_single("logging=off", 0);
+ config_single("transaction_timestamps=on", 0);
+}
+
+/*
* config_error --
* Display configuration information on error.
*/
diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h
index 1b5f170abf6..a897b04bf08 100644
--- a/src/third_party/wiredtiger/test/format/config.h
+++ b/src/third_party/wiredtiger/test/format/config.h
@@ -274,6 +274,10 @@ static CONFIG c[] = {
"minimum gain before prefix compression is used",
0x0, 0, 8, 256, &g.c_prefix_compression_min, NULL },
+ { "prepare",
+ "configure transaction prepare", /* 5% */
+ C_BOOL, 5, 0, 0, &g.c_prepare, NULL },
+
{ "quiet",
"quiet run (same as -q)",
C_IGNORE|C_BOOL, 0, 0, 1, &g.c_quiet, NULL },
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index 378883a314f..e929eb3207d 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -198,6 +198,7 @@ typedef struct {
uint32_t c_ops;
uint32_t c_prefix_compression;
uint32_t c_prefix_compression_min;
+ uint32_t c_prepare;
uint32_t c_quiet;
uint32_t c_read_pct;
uint32_t c_rebalance;
@@ -219,9 +220,9 @@ typedef struct {
uint32_t c_timing_stress_split_5;
uint32_t c_timing_stress_split_6;
uint32_t c_timing_stress_split_7;
+ uint32_t c_truncate;
uint32_t c_txn_freq;
uint32_t c_txn_timestamps;
- uint32_t c_truncate;
uint32_t c_value_max;
uint32_t c_value_min;
uint32_t c_verify;
@@ -276,22 +277,21 @@ typedef struct {
WT_RAND_STATE rnd; /* thread RNG state */
- uint64_t commit; /* transaction resolution */
- uint64_t prepare;
- uint64_t rollback;
- uint64_t deadlock;
-
uint64_t commit_timestamp; /* last committed timestamp */
uint64_t read_timestamp; /* read timestamp */
volatile bool quit; /* thread should quit */
- uint64_t search; /* operation counts */
+ uint64_t ops; /* total operations */
+ uint64_t commit; /* operation counts */
+ uint64_t deadlock;
uint64_t insert;
- uint64_t update;
+ uint64_t prepare;
uint64_t remove;
+ uint64_t rollback;
+ uint64_t search;
uint64_t truncate;
- uint64_t ops;
+ uint64_t update;
uint64_t keyno; /* key */
WT_ITEM *key, _key; /* key, value */
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index 54aa6d2b766..6930e470b8d 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -193,11 +193,12 @@ wts_ops(int lastrun)
tinfo = tinfo_list[i];
total.commit += tinfo->commit;
total.deadlock += tinfo->deadlock;
- total.prepare += tinfo->prepare;
total.insert += tinfo->insert;
+ total.prepare += tinfo->prepare;
total.remove += tinfo->remove;
total.rollback += tinfo->rollback;
total.search += tinfo->search;
+ total.truncate += tinfo->truncate;
total.update += tinfo->update;
switch (tinfo->state) {
@@ -496,26 +497,36 @@ begin_transaction(TINFO *tinfo, WT_SESSION *session, u_int *iso_configp)
u_int v;
const char *config;
char config_buf[64];
+ bool locked;
+
+ locked = false;
if ((v = g.c_isolation_flag) == ISOLATION_RANDOM)
- v = mmrand(&tinfo->rnd, 2, 4);
+ v = mmrand(&tinfo->rnd, 1, 3);
switch (v) {
- case ISOLATION_READ_UNCOMMITTED:
+ case 1:
+ v = ISOLATION_READ_UNCOMMITTED;
config = "isolation=read-uncommitted";
break;
- case ISOLATION_READ_COMMITTED:
+ case 2:
+ v = ISOLATION_READ_COMMITTED;
config = "isolation=read-committed";
break;
- case ISOLATION_SNAPSHOT:
+ case 3:
default:
v = ISOLATION_SNAPSHOT;
config = "isolation=snapshot";
+
if (g.c_txn_timestamps) {
/*
* Avoid starting a new reader when a prepare is in
* progress.
*/
- (void)pthread_rwlock_rdlock(&g.prepare_lock);
+ if (g.c_prepare) {
+ testutil_check(
+ pthread_rwlock_rdlock(&g.prepare_lock));
+ locked = true;
+ }
/*
* Set the thread's read timestamp to the current value
@@ -537,8 +548,8 @@ begin_transaction(TINFO *tinfo, WT_SESSION *session, u_int *iso_configp)
testutil_check(session->begin_transaction(session, config));
- if (v == ISOLATION_SNAPSHOT && g.c_txn_timestamps)
- (void)pthread_rwlock_unlock(&g.prepare_lock);
+ if (locked)
+ testutil_check(pthread_rwlock_unlock(&g.prepare_lock));
/*
* It's OK for the oldest timestamp to move past a running query, clear
@@ -630,12 +641,8 @@ prepare_transaction(TINFO *tinfo, WT_SESSION *session)
uint64_t ts;
char config_buf[64];
- /*
- * We cannot prepare a transaction if logging on the table is set.
- * Prepare also requires timestamps. Skip if not using timestamps,
- * if no timestamp has yet been set, or if using logging.
- */
- if (!g.c_txn_timestamps || g.timestamp == 0 || g.c_logging)
+ /* Skip if no timestamp has yet been set. */
+ if (g.timestamp == 0)
return (0);
/*
@@ -652,14 +659,14 @@ prepare_transaction(TINFO *tinfo, WT_SESSION *session)
* Prepare will return error if prepare timestamp is less than any
* active read timestamp.
*/
- (void)pthread_rwlock_wrlock(&g.prepare_lock);
+ testutil_check(pthread_rwlock_wrlock(&g.prepare_lock));
ts = set_commit_timestamp(tinfo);
testutil_check(__wt_snprintf(
config_buf, sizeof(config_buf), "prepare_timestamp=%" PRIx64, ts));
ret = session->prepare_transaction(session, config_buf);
- (void)pthread_rwlock_unlock(&g.prepare_lock);
+ testutil_check(pthread_rwlock_unlock(&g.prepare_lock));
return (ret);
}
@@ -1095,9 +1102,10 @@ update_instead_of_chosen_op:
}
/*
- * Prepare the transaction 10% of the time.
+ * If prepare configured, prepare the transaction 10% of the
+ * time.
*/
- if (mmrand(&tinfo->rnd, 1, 10) == 1) {
+ if (g.c_prepare && mmrand(&tinfo->rnd, 1, 10) == 1) {
ret = prepare_transaction(tinfo, session);
testutil_assert(ret == 0 || ret == WT_PREPARE_CONFLICT);
if (ret == WT_PREPARE_CONFLICT)