diff options
author | Luke Chen <luke.chen@mongodb.com> | 2018-04-18 15:56:04 +1000 |
---|---|---|
committer | Luke Chen <luke.chen@mongodb.com> | 2018-04-18 15:56:04 +1000 |
commit | 0ea8ebafc20bb379955b2168b31099aefa220d7b (patch) | |
tree | 45ae9717fd0db22de6ca56240381c8106a633e7d /src/third_party/wiredtiger | |
parent | f1dce2d1934052cbac4032d0c5833d3857a0cfb2 (diff) | |
download | mongo-0ea8ebafc20bb379955b2168b31099aefa220d7b.tar.gz |
Import wiredtiger: ad25980c88b87d45dbcefdb10cdcf696d02a8ac2 from branch mongodb-3.8
ref: 5fc85c47ca..ad25980c88
for: 3.7.6
WT-3998 Fix a bug where stable timestamp was ignored on shutdown
WT-4012 Fix lookaside entry counters
WT-4019 Change test/format to test transaction prepare less often
WT-4027 Yield cursor operations between restarted search/traverse
WT-4031 on-page zero-length row-store values can be discarded from checkpoints
WT-4035 Truncate information discarded while active
WT-4036 Fix Coverity false positive: out-of-bounds access.
WT-4042 Access data handles safely during cursor reopen
Diffstat (limited to 'src/third_party/wiredtiger')
19 files changed, 319 insertions, 182 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index cac59dc11d3..e02014e741c 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -1,5 +1,5 @@ { - "commit": "5fc85c47caba5dbd4fc49ad6fa924fee4e3d5695", + "commit": "ad25980c88b87d45dbcefdb10cdcf696d02a8ac2", "github": "wiredtiger/wiredtiger.git", "vendor": "wiredtiger", "branch": "mongodb-3.8" diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c index 9a30ee2c1a4..ed3cf6b5943 100644 --- a/src/third_party/wiredtiger/src/btree/bt_cursor.c +++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c @@ -436,6 +436,20 @@ __cursor_row_modify( } /* + * __cursor_restart -- + * Common cursor restart handling. + */ +static void +__cursor_restart( + WT_SESSION_IMPL *session, uint64_t *yield_count, uint64_t *sleep_count) +{ + __wt_state_yield_sleep(yield_count, sleep_count); + + WT_STAT_CONN_INCR(session, cursor_restart); + WT_STAT_DATA_INCR(session, cursor_restart); +} + +/* * __wt_btcur_reset -- * Invalidate the cursor position. */ @@ -719,11 +733,13 @@ __wt_btcur_insert(WT_CURSOR_BTREE *cbt) WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; + uint64_t yield_count, sleep_count; bool append_key, valid; btree = cbt->btree; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; + yield_count = sleep_count = 0; WT_STAT_CONN_INCR(session, cursor_insert); WT_STAT_DATA_INCR(session, cursor_insert); @@ -840,8 +856,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); } err: if (ret == WT_RESTART) { - WT_STAT_CONN_INCR(session, cursor_restart); - WT_STAT_DATA_INCR(session, cursor_restart); + __cursor_restart(session, &yield_count, &sleep_count); goto retry; } @@ -904,10 +919,12 @@ __wt_btcur_insert_check(WT_CURSOR_BTREE *cbt) WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; + uint64_t yield_count, sleep_count; cursor = &cbt->iface; btree = cbt->btree; session = (WT_SESSION_IMPL *)cursor->session; + yield_count = sleep_count = 0; /* * The pinned page goes away if we do a search, get a local copy of any @@ -929,8 +946,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); WT_ERR(__wt_illegal_value(session, NULL)); err: if (ret == WT_RESTART) { - WT_STAT_CONN_INCR(session, cursor_restart); - WT_STAT_DATA_INCR(session, cursor_restart); + __cursor_restart(session, &yield_count, &sleep_count); goto retry; } @@ -955,11 +971,13 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt) WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; + uint64_t yield_count, sleep_count; bool iterating, valid; btree = cbt->btree; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; + yield_count = sleep_count = 0; iterating = F_ISSET(cbt, WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV); WT_STAT_CONN_INCR(session, cursor_remove); @@ -1092,8 +1110,7 @@ retry: if (positioned == POSITIONED) } err: if (ret == WT_RESTART) { - WT_STAT_CONN_INCR(session, cursor_restart); - WT_STAT_DATA_INCR(session, cursor_restart); + __cursor_restart(session, &yield_count, &sleep_count); goto retry; } @@ -1172,11 +1189,13 @@ __btcur_update(WT_CURSOR_BTREE *cbt, WT_ITEM *value, u_int modify_type) WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; + uint64_t yield_count, sleep_count; bool valid; btree = cbt->btree; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; + yield_count = sleep_count = 0; /* It's no longer possible to bulk-load into the tree. */ __cursor_disable_bulk(session, btree); @@ -1268,8 +1287,7 @@ retry: WT_ERR(__cursor_func_init(cbt, true)); } err: if (ret == WT_RESTART) { - WT_STAT_CONN_INCR(session, cursor_restart); - WT_STAT_DATA_INCR(session, cursor_restart); + __cursor_restart(session, &yield_count, &sleep_count); goto retry; } @@ -1608,6 +1626,9 @@ __cursor_truncate(WT_SESSION_IMPL *session, int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int)) { WT_DECL_RET; + uint64_t yield_count, sleep_count; + + yield_count = sleep_count = 0; /* * First, call the cursor search method to re-position the cursor: we @@ -1644,8 +1665,7 @@ retry: WT_ERR(__wt_btcur_search(start)); } err: if (ret == WT_RESTART) { - WT_STAT_CONN_INCR(session, cursor_restart); - WT_STAT_DATA_INCR(session, cursor_restart); + __cursor_restart(session, &yield_count, &sleep_count); goto retry; } @@ -1663,8 +1683,11 @@ __cursor_truncate_fix(WT_SESSION_IMPL *session, int (*rmfunc)(WT_SESSION_IMPL *, WT_CURSOR_BTREE *, u_int)) { WT_DECL_RET; + uint64_t yield_count, sleep_count; const uint8_t *value; + yield_count = sleep_count = 0; + /* * Handle fixed-length column-store objects separately: for row-store * and variable-length column-store objects we have "deleted" values @@ -1702,8 +1725,7 @@ retry: WT_ERR(__wt_btcur_search(start)); } err: if (ret == WT_RESTART) { - WT_STAT_CONN_INCR(session, cursor_restart); - WT_STAT_DATA_INCR(session, cursor_restart); + __cursor_restart(session, &yield_count, &sleep_count); goto retry; } diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c index 345556c4c41..b98e994640d 100644 --- a/src/third_party/wiredtiger/src/btree/bt_read.c +++ b/src/third_party/wiredtiger/src/btree/bt_read.c @@ -113,7 +113,7 @@ __las_page_instantiate_verbose(WT_SESSION_IMPL *session, uint64_t las_pageid) * Instantiate lookaside update records in a recently read page. */ static int -__las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id) +__las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref) { WT_CACHE *cache; WT_CURSOR *cursor; @@ -136,11 +136,12 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id) locked = false; total_incr = 0; current_recno = recno = WT_RECNO_OOB; + las_pageid = ref->page_las->las_pageid; session_flags = 0; /* [-Werror=maybe-uninitialized] */ WT_CLEAR(las_key); cache = S2C(session)->cache; - __las_page_instantiate_verbose(session, ref->page_las->las_pageid); + __las_page_instantiate_verbose(session, las_pageid); WT_STAT_CONN_INCR(session, cache_read_lookaside); WT_STAT_DATA_INCR(session, cache_read_lookaside); @@ -159,11 +160,11 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id) * for a key and then insert those updates into the page, then all the * updates for the next key, and so on. */ - ret = __wt_las_cursor_position( - cursor, btree_id, ref->page_las->las_pageid); __wt_readlock(session, &cache->las_sweepwalk_lock); locked = true; - for (; ret == 0; ret = cursor->next(cursor)) { + for (ret = __wt_las_cursor_position(cursor, las_pageid); + ret == 0; + ret = cursor->next(cursor)) { WT_ERR(cursor->get_key(cursor, &las_pageid, &las_id, &las_counter, &las_key)); @@ -171,8 +172,7 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id) * Confirm the search using the unique prefix; if not a match, * we're done searching for records for this page. */ - if (las_id != btree_id || - las_pageid != ref->page_las->las_pageid) + if (las_pageid != ref->page_las->las_pageid) break; /* Allocate the WT_UPDATE structure. */ @@ -367,7 +367,6 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref) static int __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) { - WT_BTREE *btree; WT_DECL_RET; WT_ITEM tmp; WT_PAGE *notused; @@ -377,7 +376,6 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) const uint8_t *addr; bool timer; - btree = S2BT(session); time_start = time_stop = 0; /* @@ -483,7 +481,7 @@ skip_read: * then apply the delete. */ if (ref->page_las != NULL) { - WT_ERR(__las_page_instantiate(session, ref, btree->id)); + WT_ERR(__las_page_instantiate(session, ref)); ref->page_las->eviction_to_lookaside = false; } @@ -504,7 +502,7 @@ skip_read: if (previous_state == WT_REF_LIMBO) WT_STAT_CONN_INCR(session, cache_read_lookaside_delay); - WT_ERR(__las_page_instantiate(session, ref, btree->id)); + WT_ERR(__las_page_instantiate(session, ref)); ref->page_las->eviction_to_lookaside = false; break; } @@ -518,7 +516,7 @@ skip_read: */ if (final_state == WT_REF_MEM && ref->page_las != NULL) WT_IGNORE_RET(__wt_las_remove_block( - session, btree->id, ref->page_las->las_pageid)); + session, ref->page_las->las_pageid, false)); WT_PUBLISH(ref->state, final_state); return (ret); diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c index 7ccc325523e..620800a8fb9 100644 --- a/src/third_party/wiredtiger/src/cache/cache_las.c +++ b/src/third_party/wiredtiger/src/cache/cache_las.c @@ -455,7 +455,7 @@ __wt_las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref) */ static int __las_remove_block(WT_SESSION_IMPL *session, - WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid, uint64_t *decrp) + WT_CURSOR *cursor, uint64_t pageid, bool lock_wait, uint64_t *remove_cntp) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; @@ -463,30 +463,32 @@ __las_remove_block(WT_SESSION_IMPL *session, uint64_t las_counter, las_pageid; uint32_t las_id; - *decrp = 0; + *remove_cntp = 0; conn = S2C(session); - __wt_writelock(session, &conn->cache->las_sweepwalk_lock); + /* Prevent the sweep thread from removing the block. */ + if (lock_wait) + __wt_writelock(session, &conn->cache->las_sweepwalk_lock); + else + WT_RET(__wt_try_writelock( + session, &conn->cache->las_sweepwalk_lock)); /* * Search for the block's unique btree ID and page ID prefix and step * through all matching records, removing them. */ - for (ret = __wt_las_cursor_position(cursor, btree_id, pageid); + for (ret = __wt_las_cursor_position(cursor, pageid); ret == 0; ret = cursor->next(cursor)) { WT_ERR(cursor->get_key(cursor, &las_pageid, &las_id, &las_counter, &las_key)); - /* - * Confirm the record matches; if not a match, we're done - * searching for records for this page. - */ - if (las_pageid != pageid || las_id != btree_id) + /* Confirm that we have a matching record. */ + if (las_pageid != pageid) break; WT_ERR(cursor->remove(cursor)); - ++*decrp; + ++*remove_cntp; } WT_ERR_NOTFOUND_OK(ret); @@ -580,7 +582,7 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_SESSION_IMPL *las_session; WT_TXN_ISOLATION saved_isolation; WT_UPDATE *upd; - uint64_t decrement_cnt, insert_cnt, insert_estimate; + uint64_t insert_cnt; uint64_t las_counter, las_pageid; uint32_t btree_id, i, slot; uint8_t *p; @@ -590,12 +592,11 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, conn = S2C(session); WT_CLEAR(las_timestamp); WT_CLEAR(las_value); - decrement_cnt = insert_cnt = insert_estimate = 0; + insert_cnt = 0; btree_id = btree->id; local_txn = false; - las_pageid = multi->page_las.las_pageid = - __wt_atomic_add64(&conn->cache->las_pageid, 1); + las_pageid = __wt_atomic_add64(&conn->cache->las_pageid, 1); if (!btree->lookaside_entries) btree->lookaside_entries = true; @@ -606,12 +607,18 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_ERR(__wt_txn_begin(las_session, NULL)); local_txn = true; +#ifdef HAVE_DIAGNOSTIC + { + uint64_t remove_cnt; /* - * Make sure there are no leftover entries (e.g., from a handle - * reopen). + * There should never be any entries with the page ID we are about to + * use. */ - WT_ERR(__las_remove_block( - session, cursor, btree_id, las_pageid, &decrement_cnt)); + WT_ERR_BUSY_OK(__las_remove_block( + session, cursor, las_pageid, false, &remove_cnt)); + WT_ASSERT(session, remove_cnt == 0); + } +#endif /* Enter each update in the boundary's list into the lookaside store. */ for (las_counter = 0, i = 0, @@ -707,18 +714,6 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, upd->type, &las_value); /* - * If remove is running concurrently, it's possible for - * records to be removed before the insert transaction - * commit (remove is configured read-uncommitted). Make - * sure increments stay ahead of decrements. - */ - if (insert_estimate <= insert_cnt) { - insert_estimate += 100; - (void)__wt_atomic_add64( - &conn->cache->las_entry_count, 100); - } - - /* * Using update looks a little strange because the keys * are guaranteed to not exist, but since we're * appending, we want the cursor to stay positioned in @@ -731,34 +726,31 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, err: /* Resolve the transaction. */ if (local_txn) { - if (ret == 0) - ret = __wt_txn_commit(las_session, NULL); - else + if (ret == 0) { + /* + * Adjust the entry count. + * + * For inserts, we increment before committing. As + * soon as we commit, sweep could catch up and remove + * the block, and we don't want the count to underflow. + * In the unlikely event that the commit fails, roll + * back the increment. + */ + __wt_atomic_add64( + &conn->cache->las_entry_count, insert_cnt); + if ((ret = __wt_txn_commit(las_session, NULL)) != 0) + __wt_cache_decr_check_uint64(session, + &conn->cache->las_entry_count, + insert_cnt, "lookaside entry count"); + } else WT_TRET(__wt_txn_rollback(las_session, NULL)); } __las_restore_isolation(las_session, saved_isolation); - /* - * If the transaction successfully committed and we inserted records, - * adjust the final entry count. We may have also deleted records, - * but we must have intended to insert records to be in this function - * at all, checking the insert count is sufficient. - */ - if (insert_cnt > 0) { - if (ret == 0) { - (void)__wt_atomic_add64( - &conn->cache->las_entry_count, - insert_estimate - insert_cnt); - __wt_cache_decr_check_uint64(session, - &conn->cache->las_entry_count, - decrement_cnt, "lookaside entry count"); - - ret = __las_insert_block_verbose(session, multi); - } else - __wt_cache_decr_check_uint64(session, - &conn->cache->las_entry_count, - insert_estimate, "lookaside entry count"); + if (ret == 0 && insert_cnt > 0) { + multi->page_las.las_pageid = las_pageid; + ret = __las_insert_block_verbose(session, multi); } return (ret); @@ -772,7 +764,7 @@ err: /* Resolve the transaction. */ * WT_CONNECTION::rollback_to_stable. */ int -__wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) +__wt_las_cursor_position(WT_CURSOR *cursor, uint64_t pageid) { WT_ITEM las_key; uint64_t las_counter, las_pageid; @@ -796,7 +788,7 @@ __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) for (;;) { WT_CLEAR(las_key); cursor->set_key(cursor, - pageid, btree_id, (uint64_t)0, &las_key); + pageid, (uint32_t)0, (uint64_t)0, &las_key); WT_RET(cursor->search_near(cursor, &exact)); if (exact < 0) { WT_RET(cursor->next(cursor)); @@ -813,8 +805,7 @@ __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) */ WT_RET(cursor->get_key(cursor, &las_pageid, &las_id, &las_counter, &las_key)); - if (las_pageid < pageid || (las_pageid == pageid && - las_id < btree_id)) + if (las_pageid < pageid) continue; } @@ -830,14 +821,14 @@ __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) */ int __wt_las_remove_block( - WT_SESSION_IMPL *session, uint32_t btree_id, uint64_t pageid) + WT_SESSION_IMPL *session, uint64_t pageid, bool lock_wait) { WT_CONNECTION_IMPL *conn; WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *las_session; WT_TXN_ISOLATION saved_isolation; - uint64_t decrement_cnt; + uint64_t remove_cnt; uint32_t session_flags; conn = S2C(session); @@ -856,7 +847,7 @@ __wt_las_remove_block( WT_ERR(__wt_txn_begin(las_session, NULL)); ret = __las_remove_block( - las_session, cursor, btree_id, pageid, &decrement_cnt); + las_session, cursor, pageid, lock_wait, &remove_cnt); if (ret == 0) ret = __wt_txn_commit(las_session, NULL); else @@ -864,7 +855,7 @@ __wt_las_remove_block( if (ret == 0) __wt_cache_decr_check_uint64(session, &conn->cache->las_entry_count, - decrement_cnt, "lookaside entry count"); + remove_cnt, "lookaside entry count"); err: __las_restore_isolation(las_session, saved_isolation); WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); @@ -993,7 +984,7 @@ __wt_las_sweep(WT_SESSION_IMPL *session) #else wt_timestamp_t *val_ts; #endif - uint64_t cnt, decrement_cnt, las_counter, las_pageid, saved_pageid; + uint64_t cnt, remove_cnt, las_counter, las_pageid, saved_pageid; uint64_t las_txnid; uint32_t las_id, session_flags; uint8_t upd_type; @@ -1003,7 +994,7 @@ __wt_las_sweep(WT_SESSION_IMPL *session) cache = S2C(session)->cache; cursor = NULL; sweep_key = &cache->las_sweep_key; - decrement_cnt = 0; + remove_cnt = 0; session_flags = 0; /* [-Werror=maybe-uninitialized] */ local_txn = locked = false; @@ -1020,6 +1011,9 @@ __wt_las_sweep(WT_SESSION_IMPL *session) WT_ERR(__wt_txn_begin(session, NULL)); local_txn = true; + /* + * Prevent other threads removing entries from underneath the sweep. + */ __wt_writelock(session, &cache->las_sweepwalk_lock); locked = true; @@ -1105,7 +1099,7 @@ __wt_las_sweep(WT_SESSION_IMPL *session) __bit_test(cache->las_sweep_dropmap, las_id - cache->las_sweep_dropmin)) { WT_ERR(cursor->remove(cursor)); - ++decrement_cnt; + ++remove_cnt; saved_key->size = 0; continue; } @@ -1152,12 +1146,9 @@ __wt_las_sweep(WT_SESSION_IMPL *session) } WT_ERR(cursor->remove(cursor)); - ++decrement_cnt; + ++remove_cnt; } - __wt_writeunlock(session, &cache->las_sweepwalk_lock); - locked = false; - /* * If the loop terminates after completing a work unit, we will * continue the table sweep next time. Get a local copy of the @@ -1186,7 +1177,7 @@ err: __wt_buf_free(session, sweep_key); if (ret == 0) __wt_cache_decr_check_uint64(session, &S2C(session)->cache->las_entry_count, - decrement_cnt, "lookaside entry count"); + remove_cnt, "lookaside entry count"); } if (locked) __wt_writeunlock(session, &cache->las_sweepwalk_lock); diff --git a/src/third_party/wiredtiger/src/config/config.c b/src/third_party/wiredtiger/src/config/config.c index b15bbdf83c7..799139b6b90 100644 --- a/src/third_party/wiredtiger/src/config/config.c +++ b/src/third_party/wiredtiger/src/config/config.c @@ -745,20 +745,36 @@ __wt_config_gets_def(WT_SESSION_IMPL *session, const char **cfg, const char *key, int def, WT_CONFIG_ITEM *value) { WT_CONFIG_ITEM_STATIC_INIT(false_value); + const char **end; *value = false_value; value->val = def; - if (cfg == NULL || cfg[0] == NULL || cfg[1] == NULL) + if (cfg == NULL) return (0); - if (cfg[2] == NULL) { + /* + * Checking the "length" of the pointer array is a little odd, but it's + * deliberate. The reason is because we pass variable length arrays of + * pointers as the configuration argument, some of which have only one + * element and the NULL termination. Static analyzers (like Coverity) + * complain if we read from an offset past the end of the array, even + * if we check there's no NULL slots before the offset. + */ + for (end = cfg; *end != NULL; ++end) + ; + switch ((int)(end - cfg)) { + case 0: /* cfg[0] == NULL */ + case 1: /* cfg[1] == NULL */ + return (0); + case 2: /* cfg[2] == NULL */ WT_RET_NOTFOUND_OK( __wt_config_getones(session, cfg[1], key, value)); return (0); + default: + return (__wt_config_gets(session, cfg, key, value)); } - - return (__wt_config_gets(session, cfg, key, value)); + /* NOTREACHED */ } /* diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index a8e906a9f19..e753dc1644a 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -1101,8 +1101,8 @@ err: /* */ F_SET(conn, WT_CONN_EVICTION_NO_LOOKASIDE); - /* Shut down transactions (wait for in-flight operations to complete. */ - WT_TRET(__wt_txn_global_shutdown(session)); + /* Wait for in-flight operations to complete. */ + WT_TRET(__wt_txn_activity_drain(session)); /* * Perform a system-wide checkpoint so that all tables are consistent @@ -1135,6 +1135,9 @@ err: /* } } + /* Shut down the global transaction state. */ + __wt_txn_global_shutdown(session); + if (ret != 0) { __wt_err(session, ret, "failure during close, disabling further writes"); diff --git a/src/third_party/wiredtiger/src/cursor/cur_file.c b/src/third_party/wiredtiger/src/cursor/cur_file.c index 381f8bcc619..dff851f99d2 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_file.c +++ b/src/third_party/wiredtiger/src/cursor/cur_file.c @@ -530,8 +530,9 @@ __curfile_cache(WT_CURSOR *cursor) cbt = (WT_CURSOR_BTREE *)cursor; session = (WT_SESSION_IMPL *)cursor->session; + cbt->dhandle = cbt->btree->dhandle; - WT_TRET(__wt_cursor_cache(cursor, cbt->btree->dhandle)); + WT_TRET(__wt_cursor_cache(cursor, cbt->dhandle)); WT_TRET(__wt_session_release_dhandle(session)); return (ret); } @@ -552,7 +553,7 @@ __curfile_reopen(WT_CURSOR *cursor, bool check_only) is_dead = false; cbt = (WT_CURSOR_BTREE *)cursor; session = (WT_SESSION_IMPL *)cursor->session; - dhandle = cbt->btree->dhandle; + dhandle = cbt->dhandle; if (!WT_DHANDLE_CAN_REOPEN(dhandle)) ret = WT_NOTFOUND; @@ -579,6 +580,9 @@ __curfile_reopen(WT_CURSOR *cursor, bool check_only) * memory owned by the btree handle. */ if (ret == 0) { + WT_ASSERT(session, + dhandle->type == WT_DHANDLE_TYPE_BTREE); + cbt->btree = dhandle->handle; cursor->internal_uri = cbt->btree->dhandle->name; cursor->key_format = cbt->btree->key_format; cursor->value_format = cbt->btree->value_format; diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 149f4304692..79d6634913e 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -202,6 +202,8 @@ static inline void __wt_cache_decr_check_uint64( WT_SESSION_IMPL *session, uint64_t *vp, uint64_t v, const char *fld) { + uint64_t orig = *vp; + if (v == 0 || __wt_atomic_sub64(vp, v) < WT_EXABYTE) return; @@ -211,7 +213,8 @@ __wt_cache_decr_check_uint64( */ *vp = 0; __wt_errx(session, - "%s went negative with decrement of %" PRIu64, fld, v); + "%s was %" PRIu64 ", went negative with decrement of %" PRIu64, fld, + orig, v); #ifdef HAVE_DIAGNOSTIC __wt_abort(session); @@ -1183,6 +1186,10 @@ __wt_page_del_active( * * We cannot evict dirty pages or split while a checkpoint is in progress, * unless the checkpoint thread is doing the work. + * + * Also, during connection close, if we take a checkpoint as of a + * timestamp, eviction should not write dirty pages to avoid updates newer + * than the checkpoint timestamp leaking to disk. */ static inline bool __wt_btree_can_evict_dirty(WT_SESSION_IMPL *session) @@ -1190,7 +1197,8 @@ __wt_btree_can_evict_dirty(WT_SESSION_IMPL *session) WT_BTREE *btree; btree = S2BT(session); - return (btree->checkpointing == WT_CKPT_OFF || + return ((btree->checkpointing == WT_CKPT_OFF && + !F_ISSET(S2C(session), WT_CONN_CLOSING_TIMESTAMP)) || WT_SESSION_IS_CHECKPOINT(session)); } diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i index 644222ad569..ff3486c1750 100644 --- a/src/third_party/wiredtiger/src/include/cell.i +++ b/src/third_party/wiredtiger/src/include/cell.i @@ -721,6 +721,27 @@ __wt_cell_unpack(WT_CELL *cell, WT_CELL_UNPACK *unpack) } /* + * __wt_cell_unpack_empty_value -- + * Create an unpacked cell that looks like zero-length row-store value. + */ +static inline void +__wt_cell_unpack_empty_value(WT_CELL_UNPACK *unpack) +{ + /* + * Row-store doesn't store zero-length values on pages, but this allows + * us to pretend. + */ + unpack->cell = NULL; + unpack->v = 0; + unpack->data = ""; + unpack->size = 0; + unpack->__len = 0; + unpack->prefix = 0; + unpack->raw = unpack->type = WT_CELL_VALUE; + unpack->ovfl = 0; +} + +/* * __cell_data_ref -- * Set a buffer to reference the data from an unpacked cell. */ diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h index ec5c6689c3f..e84921ad035 100644 --- a/src/third_party/wiredtiger/src/include/cursor.h +++ b/src/third_party/wiredtiger/src/include/cursor.h @@ -89,7 +89,13 @@ struct __wt_cursor_backup { struct __wt_cursor_btree { WT_CURSOR iface; + /* + * The btree field is safe to use when the cursor is open. When the + * cursor is cached, the btree may be closed, so it is only safe + * initially to look at the underlying data handle. + */ WT_BTREE *btree; /* Enclosing btree */ + WT_DATA_HANDLE *dhandle; /* Data handle for the btree */ /* * The following fields are set by the search functions as a precursor diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index caa48180867..579a2e5ed36 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -211,8 +211,8 @@ extern int __wt_las_cursor_close(WT_SESSION_IMPL *session, WT_CURSOR **cursorp, extern bool __wt_las_page_skip_locked(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern bool __wt_las_page_skip(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_PAGE *page, WT_MULTI *multi, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_las_remove_block(WT_SESSION_IMPL *session, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_las_cursor_position(WT_CURSOR *cursor, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_las_remove_block(WT_SESSION_IMPL *session, uint64_t pageid, bool lock_wait) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_las_save_dropped(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_las_sweep(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern uint32_t __wt_checksum_sw(const void *chunk, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); @@ -813,7 +813,8 @@ extern void __wt_txn_stats_update(WT_SESSION_IMPL *session); extern void __wt_txn_destroy(WT_SESSION_IMPL *session); extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_txn_global_destroy(WT_SESSION_IMPL *session); -extern int __wt_txn_global_shutdown(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_activity_drain(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern void __wt_txn_global_shutdown(WT_SESSION_IMPL *session); extern int __wt_verbose_dump_txn_one(WT_SESSION_IMPL *session, WT_TXN *txn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_verbose_dump_txn(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index 8bc022cd3e3..7a97d5ae959 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -1665,16 +1665,7 @@ __rec_child_deleted(WT_SESSION_IMPL *session, * read into this part of the name space again, the cache read function * instantiates an entirely new page.) */ - if (ref->addr != NULL && !__wt_page_del_active(session, ref, true)) - WT_RET(__wt_ref_block_free(session, ref)); - - /* - * If the original page is gone, we can skip the slot on the internal - * page. - */ - if (ref->addr == NULL) { - *statep = WT_CHILD_IGNORE; - + if (ref->addr != NULL && !__wt_page_del_active(session, ref, true)) { /* * Minor memory cleanup: if a truncate call deleted this page * and we were ever forced to instantiate the page in memory, @@ -1687,6 +1678,15 @@ __rec_child_deleted(WT_SESSION_IMPL *session, __wt_free(session, ref->page_del); } + WT_RET(__wt_ref_block_free(session, ref)); + } + + /* + * If the original page is gone, we can skip the slot on the internal + * page. + */ + if (ref->addr == NULL) { + *statep = WT_CHILD_IGNORE; return (0); } @@ -5328,6 +5328,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session, key = &r->k; val = &r->v; + vpack = &_vpack; WT_RET(__rec_split_init(session, r, page, 0, btree->maxleafpage)); @@ -5376,14 +5377,19 @@ __rec_row_leaf(WT_SESSION_IMPL *session, __wt_cell_unpack(cell, kpack); } - /* Unpack the on-page value cell, and look for an update. */ + /* + * Unpack the on-page value cell, and look for an update. Under + * some conditions, the underlying code returning updates will + * restructure the update list to include the original on-page + * value, represented by the unpacked-cell argument. Row-store + * doesn't store zero-length values on the page, so we build an + * unpacked cell that allows us to pretend. + */ if ((val_cell = __wt_row_leaf_value_cell(page, rip, NULL)) == NULL) - vpack = NULL; - else { - vpack = &_vpack; + __wt_cell_unpack_empty_value(vpack); + else __wt_cell_unpack(val_cell, vpack); - } WT_ERR(__rec_txn_read( session, r, NULL, rip, vpack, NULL, &upd)); @@ -5399,10 +5405,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session, * copy, we have to create a new value item as the old * item might have been discarded from the page. */ - if (vpack == NULL) { - val->buf.data = NULL; - val->cell_len = val->len = val->buf.size = 0; - } else if (vpack->raw == WT_CELL_VALUE_COPY) { + if (vpack->raw == WT_CELL_VALUE_COPY) { /* If the item is Huffman encoded, decode it. */ if (btree->huffman_value == NULL) { p = vpack->data; @@ -5478,8 +5481,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session, * The first time we find an overflow record we're not * going to use, discard the underlying blocks. */ - if (vpack != NULL && - vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM) + if (vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM) WT_ERR(__wt_ovfl_remove(session, page, vpack, F_ISSET(r, WT_REC_EVICT))); @@ -6183,18 +6185,18 @@ __rec_las_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r) { WT_DECL_RET; WT_MULTI *multi; - uint32_t btree_id, i; - - btree_id = S2BT(session)->id; + uint64_t las_pageid; + uint32_t i; /* * Note the additional check for a non-zero lookaside page ID, that * flags if lookaside table entries for this page have been written. */ for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i) - if (multi->supd != NULL && multi->page_las.las_pageid != 0) - WT_TRET(__wt_las_remove_block(session, - btree_id, multi->page_las.las_pageid)); + if (multi->supd != NULL && + (las_pageid = multi->page_las.las_pageid) != 0) + WT_TRET( + __wt_las_remove_block(session, las_pageid, true)); return (ret); } diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index b2952cbec46..400edb59e61 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -1380,17 +1380,15 @@ __wt_txn_global_destroy(WT_SESSION_IMPL *session) } /* - * __wt_txn_global_shutdown -- - * Shut down the global transaction state. + * __wt_txn_activity_drain -- + * Wait for transactions to quiesce. */ int -__wt_txn_global_shutdown(WT_SESSION_IMPL *session) +__wt_txn_activity_drain(WT_SESSION_IMPL *session) { bool txn_active; /* - * We're shutting down. Make sure everything gets freed. - * * It's possible that the eviction server is in the middle of a long * operation, with a transaction ID pinned. In that case, we will loop * here until the transaction ID is released, when the oldest @@ -1405,15 +1403,30 @@ __wt_txn_global_shutdown(WT_SESSION_IMPL *session) __wt_yield(); } + return (0); +} + +/* + * __wt_txn_global_shutdown -- + * Shut down the global transaction state. + */ +void +__wt_txn_global_shutdown(WT_SESSION_IMPL *session) +{ #ifdef HAVE_TIMESTAMPS /* - * Now that all transactions have completed, no timestamps should be - * pinned. + * All application transactions have completed, ignore the pinned + * timestamp so that updates can be evicted from the cache during + * connection close. + * + * Note that we are relying on a special case in __wt_txn_visible_all + * that returns true during close when there is no pinned timestamp + * set. */ - __wt_timestamp_set_inf(&S2C(session)->txn_global.pinned_timestamp); + S2C(session)->txn_global.has_pinned_timestamp = false; +#else + WT_UNUSED(session); #endif - - return (0); } /* diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index 5a71135918a..78197e838f4 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -385,8 +385,13 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session) conn = S2C(session); cache = conn->cache; - /* Give up if scrubbing is disabled. */ - if (cache->eviction_checkpoint_target < DBL_EPSILON || + /* + * Give up if scrubbing is disabled, including when checkpointing with + * a timestamp on close (we can't evict dirty pages in that case, so + * scrubbing cannot help). + */ + if (F_ISSET(conn, WT_CONN_CLOSING_TIMESTAMP) || + cache->eviction_checkpoint_target < DBL_EPSILON || cache->eviction_checkpoint_target >= cache->eviction_dirty_trigger) return; diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index eef2fde5284..e0d5beea61a 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -22,13 +22,13 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session) WT_DECL_TIMESTAMP(rollback_timestamp) WT_ITEM las_key, las_timestamp, las_value; WT_TXN_GLOBAL *txn_global; - uint64_t las_counter, las_pageid, las_total, las_txnid, remove_cnt; + uint64_t las_counter, las_pageid, las_total, las_txnid; uint32_t las_id, session_flags; uint8_t upd_type; conn = S2C(session); cursor = NULL; - las_total = remove_cnt = 0; + las_total = 0; session_flags = 0; /* [-Werror=maybe-uninitialized] */ WT_CLEAR(las_timestamp); @@ -51,6 +51,7 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session) /* Walk the file. */ __wt_writelock(session, &conn->cache->las_sweepwalk_lock); while ((ret = cursor->next(cursor)) == 0) { + ++las_total; WT_ERR(cursor->get_key(cursor, &las_pageid, &las_id, &las_counter, &las_key)); @@ -73,17 +74,15 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session) if (__wt_timestamp_cmp( &rollback_timestamp, las_timestamp.data) < 0) { WT_ERR(cursor->remove(cursor)); - ++remove_cnt; WT_STAT_CONN_INCR(session, txn_rollback_las_removed); - } else - ++las_total; + --las_total; + } } WT_ERR_NOTFOUND_OK(ret); -err: __wt_writeunlock(session, &conn->cache->las_sweepwalk_lock); +err: if (ret == 0) + conn->cache->las_entry_count = las_total; + __wt_writeunlock(session, &conn->cache->las_sweepwalk_lock); WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); - __wt_cache_decr_check_uint64(session, - &conn->cache->las_entry_count, remove_cnt, "lookaside entry count"); - WT_STAT_CONN_SET(session, cache_lookaside_entries, las_total); F_CLR(session, WT_SESSION_READ_WONT_NEED); diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c index 0677b3b753c..7e54c7ad171 100644 --- a/src/third_party/wiredtiger/test/format/config.c +++ b/src/third_party/wiredtiger/test/format/config.c @@ -47,6 +47,7 @@ static void config_map_encryption(const char *, u_int *); static void config_map_file_type(const char *, u_int *); static void config_map_isolation(const char *, u_int *); static void config_pct(void); +static void config_prepare(void); static void config_reset(void); /* @@ -171,6 +172,7 @@ config_setup(void) config_isolation(); config_lrt(); config_pct(); + config_prepare(); /* * If this is an LSM run, ensure cache size sanity. @@ -667,6 +669,40 @@ config_pct(void) } /* + * config_prepare -- + * Transaction prepare configuration. + */ +static void +config_prepare(void) +{ + /* + * We cannot prepare a transaction if logging is configured, or if + * timestamps are not configured. + * + * Prepare isn't configured often, let it control other features, unless + * they're explicitly set/not-set. + */ + if (!g.c_prepare) + return; + if (config_is_perm("prepare")) { + if (g.c_logging && config_is_perm("logging")) + testutil_die(EINVAL, + "prepare is incompatible with logging"); + if (!g.c_txn_timestamps && + config_is_perm("transaction_timestamps")) + testutil_die(EINVAL, + "prepare requires transaction timestamps"); + } + if (g.c_logging && config_is_perm("logging")) + return; + if (!g.c_txn_timestamps && config_is_perm("transaction_timestamps")) + return; + + config_single("logging=off", 0); + config_single("transaction_timestamps=on", 0); +} + +/* * config_error -- * Display configuration information on error. */ diff --git a/src/third_party/wiredtiger/test/format/config.h b/src/third_party/wiredtiger/test/format/config.h index 1b5f170abf6..a897b04bf08 100644 --- a/src/third_party/wiredtiger/test/format/config.h +++ b/src/third_party/wiredtiger/test/format/config.h @@ -274,6 +274,10 @@ static CONFIG c[] = { "minimum gain before prefix compression is used", 0x0, 0, 8, 256, &g.c_prefix_compression_min, NULL }, + { "prepare", + "configure transaction prepare", /* 5% */ + C_BOOL, 5, 0, 0, &g.c_prepare, NULL }, + { "quiet", "quiet run (same as -q)", C_IGNORE|C_BOOL, 0, 0, 1, &g.c_quiet, NULL }, diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h index 378883a314f..e929eb3207d 100644 --- a/src/third_party/wiredtiger/test/format/format.h +++ b/src/third_party/wiredtiger/test/format/format.h @@ -198,6 +198,7 @@ typedef struct { uint32_t c_ops; uint32_t c_prefix_compression; uint32_t c_prefix_compression_min; + uint32_t c_prepare; uint32_t c_quiet; uint32_t c_read_pct; uint32_t c_rebalance; @@ -219,9 +220,9 @@ typedef struct { uint32_t c_timing_stress_split_5; uint32_t c_timing_stress_split_6; uint32_t c_timing_stress_split_7; + uint32_t c_truncate; uint32_t c_txn_freq; uint32_t c_txn_timestamps; - uint32_t c_truncate; uint32_t c_value_max; uint32_t c_value_min; uint32_t c_verify; @@ -276,22 +277,21 @@ typedef struct { WT_RAND_STATE rnd; /* thread RNG state */ - uint64_t commit; /* transaction resolution */ - uint64_t prepare; - uint64_t rollback; - uint64_t deadlock; - uint64_t commit_timestamp; /* last committed timestamp */ uint64_t read_timestamp; /* read timestamp */ volatile bool quit; /* thread should quit */ - uint64_t search; /* operation counts */ + uint64_t ops; /* total operations */ + uint64_t commit; /* operation counts */ + uint64_t deadlock; uint64_t insert; - uint64_t update; + uint64_t prepare; uint64_t remove; + uint64_t rollback; + uint64_t search; uint64_t truncate; - uint64_t ops; + uint64_t update; uint64_t keyno; /* key */ WT_ITEM *key, _key; /* key, value */ diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c index 54aa6d2b766..6930e470b8d 100644 --- a/src/third_party/wiredtiger/test/format/ops.c +++ b/src/third_party/wiredtiger/test/format/ops.c @@ -193,11 +193,12 @@ wts_ops(int lastrun) tinfo = tinfo_list[i]; total.commit += tinfo->commit; total.deadlock += tinfo->deadlock; - total.prepare += tinfo->prepare; total.insert += tinfo->insert; + total.prepare += tinfo->prepare; total.remove += tinfo->remove; total.rollback += tinfo->rollback; total.search += tinfo->search; + total.truncate += tinfo->truncate; total.update += tinfo->update; switch (tinfo->state) { @@ -496,26 +497,36 @@ begin_transaction(TINFO *tinfo, WT_SESSION *session, u_int *iso_configp) u_int v; const char *config; char config_buf[64]; + bool locked; + + locked = false; if ((v = g.c_isolation_flag) == ISOLATION_RANDOM) - v = mmrand(&tinfo->rnd, 2, 4); + v = mmrand(&tinfo->rnd, 1, 3); switch (v) { - case ISOLATION_READ_UNCOMMITTED: + case 1: + v = ISOLATION_READ_UNCOMMITTED; config = "isolation=read-uncommitted"; break; - case ISOLATION_READ_COMMITTED: + case 2: + v = ISOLATION_READ_COMMITTED; config = "isolation=read-committed"; break; - case ISOLATION_SNAPSHOT: + case 3: default: v = ISOLATION_SNAPSHOT; config = "isolation=snapshot"; + if (g.c_txn_timestamps) { /* * Avoid starting a new reader when a prepare is in * progress. */ - (void)pthread_rwlock_rdlock(&g.prepare_lock); + if (g.c_prepare) { + testutil_check( + pthread_rwlock_rdlock(&g.prepare_lock)); + locked = true; + } /* * Set the thread's read timestamp to the current value @@ -537,8 +548,8 @@ begin_transaction(TINFO *tinfo, WT_SESSION *session, u_int *iso_configp) testutil_check(session->begin_transaction(session, config)); - if (v == ISOLATION_SNAPSHOT && g.c_txn_timestamps) - (void)pthread_rwlock_unlock(&g.prepare_lock); + if (locked) + testutil_check(pthread_rwlock_unlock(&g.prepare_lock)); /* * It's OK for the oldest timestamp to move past a running query, clear @@ -630,12 +641,8 @@ prepare_transaction(TINFO *tinfo, WT_SESSION *session) uint64_t ts; char config_buf[64]; - /* - * We cannot prepare a transaction if logging on the table is set. - * Prepare also requires timestamps. Skip if not using timestamps, - * if no timestamp has yet been set, or if using logging. - */ - if (!g.c_txn_timestamps || g.timestamp == 0 || g.c_logging) + /* Skip if no timestamp has yet been set. */ + if (g.timestamp == 0) return (0); /* @@ -652,14 +659,14 @@ prepare_transaction(TINFO *tinfo, WT_SESSION *session) * Prepare will return error if prepare timestamp is less than any * active read timestamp. */ - (void)pthread_rwlock_wrlock(&g.prepare_lock); + testutil_check(pthread_rwlock_wrlock(&g.prepare_lock)); ts = set_commit_timestamp(tinfo); testutil_check(__wt_snprintf( config_buf, sizeof(config_buf), "prepare_timestamp=%" PRIx64, ts)); ret = session->prepare_transaction(session, config_buf); - (void)pthread_rwlock_unlock(&g.prepare_lock); + testutil_check(pthread_rwlock_unlock(&g.prepare_lock)); return (ret); } @@ -1095,9 +1102,10 @@ update_instead_of_chosen_op: } /* - * Prepare the transaction 10% of the time. + * If prepare configured, prepare the transaction 10% of the + * time. */ - if (mmrand(&tinfo->rnd, 1, 10) == 1) { + if (g.c_prepare && mmrand(&tinfo->rnd, 1, 10) == 1) { ret = prepare_transaction(tinfo, session); testutil_assert(ret == 0 || ret == WT_PREPARE_CONFLICT); if (ret == WT_PREPARE_CONFLICT) |