diff options
Diffstat (limited to 'src/third_party/wiredtiger')
59 files changed, 1482 insertions, 642 deletions
diff --git a/src/third_party/wiredtiger/build_posix/aclocal/strict.m4 b/src/third_party/wiredtiger/build_posix/aclocal/strict.m4 index 92e77815a18..9e2bec3f396 100644 --- a/src/third_party/wiredtiger/build_posix/aclocal/strict.m4 +++ b/src/third_party/wiredtiger/build_posix/aclocal/strict.m4 @@ -36,7 +36,6 @@ AC_DEFUN([AM_GCC_WARNINGS], [ w="$w -Wwrite-strings" # Non-fatal informational warnings. - w="$w -Wno-error=inline" w="$w -Wno-error=unsafe-loop-optimizations" # GCC 4.7 diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 78dc25850cf..93fdb0e6a4f 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -1137,6 +1137,10 @@ methods = { ]), 'WT_SESSION.begin_transaction' : Method([ + Config('ignore_prepare', 'false', r''' + whether to ignore the updates by other prepared transactions as part of + read operations of this transaction''', + type='boolean'), Config('isolation', '', r''' the isolation level for this transaction; defaults to the session's isolation level''', @@ -1182,6 +1186,14 @@ methods = { choices=['background', 'off', 'on']), ]), +'WT_SESSION.prepare_transaction' : Method([ + Config('prepare_timestamp', '', r''' + set the prepare timestamp for the updates of the current transaction. + The supplied value should not be older than any active read timestamps. + This configuration option is mandatory. See + @ref transaction_timestamps'''), +]), + 'WT_SESSION.timestamp_transaction' : Method([ Config('commit_timestamp', '', r''' set the commit timestamp for the current transaction. The supplied diff --git a/src/third_party/wiredtiger/dist/dist.py b/src/third_party/wiredtiger/dist/dist.py index e4b76bdbab4..7fe473c3abd 100644 --- a/src/third_party/wiredtiger/dist/dist.py +++ b/src/third_party/wiredtiger/dist/dist.py @@ -21,6 +21,8 @@ def all_c_files(): yield line for line in glob.iglob('../test/*/*.[ci]'): yield line + for line in glob.iglob('../test/*/*/*.[ci]'): + yield line # all_h_files -- # Return list of all WiredTiger C include file names. diff --git a/src/third_party/wiredtiger/dist/function.py b/src/third_party/wiredtiger/dist/function.py index 0e36a539cc4..69ebd4748dc 100644 --- a/src/third_party/wiredtiger/dist/function.py +++ b/src/third_party/wiredtiger/dist/function.py @@ -44,6 +44,7 @@ types = [ 'struct', 'union', 'enum', + 'TEST_', 'WT_', 'wt_', 'double', diff --git a/src/third_party/wiredtiger/dist/s_function b/src/third_party/wiredtiger/dist/s_function index 3259e215d0c..314a8c5bb06 100755 --- a/src/third_party/wiredtiger/dist/s_function +++ b/src/third_party/wiredtiger/dist/s_function @@ -73,10 +73,11 @@ done for f in `find bench examples ext src test -name '*.[ci]'`; do sed -n -e '/API_CALL.*;$/,/API_END.*;/{=;p;}' \ -e '/LSM_.*ENTER*;$/,/LSM_.*LEAVE*;/{=;p;}' \ + -e '/WT_TRACK_OP_INIT/,/WT_TRACK_OP_END/{=;p;}' \ -e '/va_start/,/va_end/{=;p;}' $f | \ sed 'N;s/\n/:/' | \ - egrep -w 'return|WT_RET' | \ - sed -e "s,^,$f:," -e 's/$/ [return skips API_END call]/' + egrep -w 'return;|return \(|WT_RET' | \ + sed -e "s,^,$f:," -e 's/$/ [return skips matching end call]/' done diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index 6cd3f219b4a..20f3c72b2b7 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -493,7 +493,7 @@ connection_stats = [ TxnStat('txn_checkpoint_time_total', 'transaction checkpoint total time (msecs)', 'no_clear,no_scale'), TxnStat('txn_commit', 'transactions committed'), TxnStat('txn_commit_queue_empty', 'commit timestamp queue insert to empty'), - TxnStat('txn_commit_queue_head', 'commit timestamp queue inserts to head'), + TxnStat('txn_commit_queue_tail', 'commit timestamp queue inserts to tail'), TxnStat('txn_commit_queue_inserts', 'commit timestamp queue inserts total'), TxnStat('txn_commit_queue_len', 'commit timestamp queue length'), TxnStat('txn_fail_cache', 'transaction failures due to cache overflow'), diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c index 7091a0a76d6..8e40b9f0b6c 100644 --- a/src/third_party/wiredtiger/examples/c/ex_all.c +++ b/src/third_party/wiredtiger/examples/c/ex_all.c @@ -900,6 +900,21 @@ transaction_ops(WT_SESSION *session_arg) error_check(session->commit_transaction(session, NULL)); /*! [transaction isolation] */ + /*! [transaction prepare] */ + /* + * Prepare a transaction which guarantees a subsequent commit will + * succeed. Only commit and rollback are allowed on a transaction after + * it has been prepared. + */ + error_check(session->open_cursor( + session, "table:mytable", NULL, NULL, &cursor)); + error_check(session->begin_transaction(session, NULL)); + cursor->set_key(cursor, "key"); + cursor->set_value(cursor, "value"); + session->prepare_transaction(session, "prepare_timestamp=2a"); + error_check(session->commit_transaction(session, NULL)); + /*! [transaction prepare] */ + /*! [session isolation configuration] */ /* Open a session configured for read-uncommitted isolation. */ error_check(conn->open_session( diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 2113eef94fe..d3d7ff86d32 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -1,5 +1,5 @@ { - "commit": "b85bcfde3b7e41a35017385fd219553e7028c427", + "commit": "bc82f0f0383af9ef41ece37a36ae9200ea4561a3", "github": "wiredtiger/wiredtiger.git", "vendor": "wiredtiger", "branch": "mongodb-3.6" diff --git a/src/third_party/wiredtiger/lang/java/java_doc.i b/src/third_party/wiredtiger/lang/java/java_doc.i index 768f06e4ec1..64dbf7850b0 100644 --- a/src/third_party/wiredtiger/lang/java/java_doc.i +++ b/src/third_party/wiredtiger/lang/java/java_doc.i @@ -48,6 +48,7 @@ COPYDOC(__wt_session, WT_SESSION, upgrade) COPYDOC(__wt_session, WT_SESSION, verify) COPYDOC(__wt_session, WT_SESSION, begin_transaction) COPYDOC(__wt_session, WT_SESSION, commit_transaction) +COPYDOC(__wt_session, WT_SESSION, prepare_transaction) COPYDOC(__wt_session, WT_SESSION, rollback_transaction) COPYDOC(__wt_session, WT_SESSION, timestamp_transaction) COPYDOC(__wt_session, WT_SESSION, checkpoint) diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c index 55df1527e98..73a3d13e307 100644 --- a/src/third_party/wiredtiger/src/block/block_ckpt.c +++ b/src/third_party/wiredtiger/src/block/block_ckpt.c @@ -221,10 +221,11 @@ __wt_block_checkpoint_start(WT_SESSION_IMPL *session, WT_BLOCK *block) case WT_CKPT_INPROGRESS: case WT_CKPT_PANIC_ON_FAILURE: case WT_CKPT_SALVAGE: - ret = __wt_block_panic(session, EINVAL, + __wt_err(session, EINVAL, "%s: an unexpected checkpoint start: the checkpoint " "has already started or was configured for salvage", block->name); + ret = __wt_block_panic(session); break; case WT_CKPT_NONE: block->ckpt_state = WT_CKPT_INPROGRESS; @@ -433,10 +434,11 @@ __ckpt_process(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_CKPT *ckptbase) break; case WT_CKPT_NONE: case WT_CKPT_PANIC_ON_FAILURE: - ret = __wt_block_panic(session, EINVAL, + __wt_err(session, EINVAL, "%s: an unexpected checkpoint attempt: the checkpoint " "was never started or has already completed", block->name); + ret = __wt_block_panic(session); break; case WT_CKPT_SALVAGE: /* Salvage doesn't use the standard checkpoint APIs. */ @@ -718,9 +720,11 @@ live_update: "list"); #endif -err: if (ret != 0 && fatal) - ret = __wt_block_panic(session, ret, +err: if (ret != 0 && fatal) { + __wt_err(session, ret, "%s: fatal checkpoint failure", block->name); + ret = __wt_block_panic(session); + } if (locked) __wt_spin_unlock(session, &block->live_lock); @@ -842,26 +846,30 @@ __wt_block_checkpoint_resolve( goto done; case WT_CKPT_NONE: case WT_CKPT_SALVAGE: - ret = __wt_block_panic(session, EINVAL, + __wt_err(session, EINVAL, "%s: an unexpected checkpoint resolution: the checkpoint " "was never started or completed, or configured for salvage", block->name); + ret = __wt_block_panic(session); break; case WT_CKPT_PANIC_ON_FAILURE: if (!failed) break; - ret = __wt_block_panic(session, EINVAL, + __wt_err(session, EINVAL, "%s: the checkpoint failed, the system must restart", block->name); + ret = __wt_block_panic(session); break; } WT_ERR(ret); if ((ret = __wt_block_extlist_merge( - session, block, &ci->ckpt_avail, &ci->avail)) != 0) - WT_ERR(__wt_block_panic(session, ret, + session, block, &ci->ckpt_avail, &ci->avail)) != 0) { + __wt_err(session, ret, "%s: fatal checkpoint failure during extent list merge", - block->name)); + block->name); + ret = __wt_block_panic(session); + } __wt_spin_unlock(session, &block->live_lock); /* Discard the lists remaining after the checkpoint call. */ diff --git a/src/third_party/wiredtiger/src/block/block_mgr.c b/src/third_party/wiredtiger/src/block/block_mgr.c index a2790863961..5fe4cb51fac 100644 --- a/src/third_party/wiredtiger/src/block/block_mgr.c +++ b/src/third_party/wiredtiger/src/block/block_mgr.c @@ -569,6 +569,7 @@ __bm_method_set(WT_BM *bm, bool readonly) bm->compact_page_skip = __bm_compact_page_skip; bm->compact_skip = __bm_compact_skip; bm->compact_start = __bm_compact_start; + bm->corrupt = __wt_bm_corrupt; bm->free = __bm_free; bm->is_mapped = __bm_is_mapped; bm->map_discard = __bm_map_discard; @@ -638,20 +639,9 @@ err: WT_TRET(bm->close(bm, session)); * Report an error, then panic the handle and the system. */ int -__wt_block_panic(WT_SESSION_IMPL *session, int error, const char *fmt, ...) +__wt_block_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_ATTRIBUTE((cold)) - WT_GCC_FUNC_ATTRIBUTE((format (printf, 3, 4))) { - va_list ap; - - /* - * Ignore error returns from underlying event handlers, we already have - * an error value to return. - */ - va_start(ap, fmt); - WT_IGNORE_RET(__wt_eventv(session, false, error, NULL, 0, fmt, ap)); - va_end(ap); - /* Switch the handle into read-only mode. */ __bm_method_set(S2BT(session)->bm, true); diff --git a/src/third_party/wiredtiger/src/block/block_read.c b/src/third_party/wiredtiger/src/block/block_read.c index cd419566e40..ec44885f56a 100644 --- a/src/third_party/wiredtiger/src/block/block_read.c +++ b/src/third_party/wiredtiger/src/block/block_read.c @@ -107,6 +107,77 @@ __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, return (0); } +/* + * __wt_bm_corrupt_dump -- + * Dump a block into the log in 1KB chunks. + */ +static int +__wt_bm_corrupt_dump(WT_SESSION_IMPL *session, + WT_ITEM *buf, wt_off_t offset, uint32_t size, uint32_t checksum) + WT_GCC_FUNC_ATTRIBUTE((cold)) +{ + WT_DECL_ITEM(tmp); + WT_DECL_RET; + size_t chunk, i, nchunks; + +#define WT_CORRUPT_FMT "{%" PRIuMAX ", %" PRIu32 ", %" PRIu32 "}" + if (buf->size == 0) { + __wt_errx(session, + WT_CORRUPT_FMT ": empty buffer, no dump available", + (uintmax_t)offset, size, checksum); + return (0); + } + + WT_RET(__wt_scr_alloc(session, 4 * 1024, &tmp)); + + nchunks = buf->size / 1024 + (buf->size % 1024 == 0 ? 0 : 1); + for (chunk = i = 0;;) { + WT_ERR(__wt_buf_catfmt( + session, tmp, "%02x ", ((uint8_t *)buf->data)[i])); + if (++i == buf->size || i % 1024 == 0) { + __wt_errx(session, + WT_CORRUPT_FMT + ": (chunk %" WT_SIZET_FMT " of %" WT_SIZET_FMT + "): %.*s", + (uintmax_t)offset, size, checksum, + ++chunk, nchunks, + (int)tmp->size, (char *)tmp->data); + if (i == buf->size) + break; + WT_ERR(__wt_buf_set(session, tmp, "", 0)); + } + } + +err: __wt_scr_free(session, &tmp); + return (ret); +} + +/* + * __wt_bm_corrupt -- + * Report a block has been corrupted, external API. + */ +int +__wt_bm_corrupt(WT_BM *bm, + WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) +{ + WT_DECL_ITEM(tmp); + WT_DECL_RET; + wt_off_t offset; + uint32_t checksum, size; + + /* Read the block. */ + WT_RET(__wt_scr_alloc(session, 0, &tmp)); + WT_ERR(__wt_bm_read(bm, session, tmp, addr, addr_size)); + + /* Crack the cookie, dump the block. */ + WT_ERR(__wt_block_buffer_to_addr( + bm->block, addr, &offset, &size, &checksum)); + WT_ERR(__wt_bm_corrupt_dump(session, tmp, offset, size, checksum)); + +err: __wt_scr_free(session, &tmp); + return (ret); +} + #ifdef HAVE_DIAGNOSTIC /* * __wt_block_read_off_blind -- @@ -221,6 +292,10 @@ __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, "of %" PRIu32, size, (uintmax_t)offset, swap.checksum, checksum); + if (!F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE)) + WT_IGNORE_RET( + __wt_bm_corrupt_dump(session, buf, offset, size, checksum)); + /* Panic if a checksum fails during an ordinary read. */ return (block->verify || F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE) ? diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c index 8c7170e6180..1372b964dbd 100644 --- a/src/third_party/wiredtiger/src/btree/bt_cursor.c +++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c @@ -59,6 +59,7 @@ __cursor_page_pinned(WT_CURSOR_BTREE *cbt) { WT_CURSOR *cursor; WT_SESSION_IMPL *session; + uint32_t current_state; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; @@ -68,7 +69,7 @@ __cursor_page_pinned(WT_CURSOR_BTREE *cbt) * external key. */ if (!F_ISSET(cbt, WT_CBT_ACTIVE)) { - WT_ASSERT((WT_SESSION_IMPL *)cursor->session, + WT_ASSERT(session, cbt->ref == NULL && !F_ISSET(cursor, WT_CURSTD_KEY_INT)); return (false); } @@ -89,12 +90,16 @@ __cursor_page_pinned(WT_CURSOR_BTREE *cbt) return (false); /* - * If we are doing an update, we need a page with history. Release the - * page so we get it again with history if required. + * If we are doing an update, we need a page with history, release the + * page so we get it again with history if required. Eviction may be + * locking the page, wait until we see a "normal" state and then test + * against that state (eviction may have already locked the page again). */ - if (F_ISSET(&session->txn, WT_TXN_UPDATE) && - cbt->ref->state != WT_REF_MEM) - return (false); + if (F_ISSET(&session->txn, WT_TXN_UPDATE)) { + while ((current_state = cbt->ref->state) == WT_REF_LOCKED) + __wt_yield(); + return (current_state == WT_REF_MEM); + } return (true); } @@ -654,6 +659,12 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp) } else if ((ret = __wt_btcur_next(cbt, false)) != WT_NOTFOUND) exact = 1; else { + /* + * The cursor next call may have overwritten our caller's key, + * restore it to its original value. + */ + __cursor_state_restore(cursor, &state); + WT_ERR(__cursor_func_init(cbt, true)); WT_ERR(btree->type == BTREE_ROW ? __cursor_row_search(session, cbt, NULL, true) : @@ -811,9 +822,9 @@ err: if (ret == WT_RESTART) { goto retry; } -done: /* Insert doesn't maintain a position across calls, clear resources. */ + /* Insert doesn't maintain a position across calls, clear resources. */ if (ret == 0) { - F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); +done: F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET); if (append_key) F_SET(cursor, WT_CURSTD_KEY_EXT); } @@ -921,10 +932,12 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt) WT_CURSOR *cursor; WT_DECL_RET; WT_SESSION_IMPL *session; + bool iterating; btree = cbt->btree; cursor = &cbt->iface; session = (WT_SESSION_IMPL *)cursor->session; + iterating = F_ISSET(cbt, WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV); WT_STAT_CONN_INCR(session, cursor_remove); WT_STAT_DATA_INCR(session, cursor_remove); @@ -991,35 +1004,23 @@ __wt_btcur_remove(WT_CURSOR_BTREE *cbt) __cursor_col_modify(session, cbt, WT_UPDATE_TOMBSTONE); if (ret == 0) goto done; - - /* - * The pinned page goes away if we fail for any reason, get a - * local copy of any pinned key and discard any value (remove - * discards any previous value on success or failure). (Restart - * could still use the pinned page, but that's an unlikely - * path.) Re-save the cursor state: we may retry but eventually - * fail. - */ - WT_TRET(__cursor_localkey(cursor)); - F_CLR(cursor, WT_CURSTD_VALUE_SET); - __cursor_state_save(cursor, &state); goto err; } - if (positioned == POSITIONED) - positioned = SEARCH_POSITION; - /* - * The pinned page goes away if we do a search, get a local copy of any - * pinned key and discard any value (remove discards any previous - * value on success or failure). Re-save the cursor state: we may retry - * but eventually fail. + * The pinned page goes away if we do a search, including as a result of + * a restart. Get a local copy of any pinned key and re-save the cursor + * state: we may retry but eventually fail. + * + * Note these steps must be repeatable, we'll continue to take this path + * as long as we encounter WT_RESTART. */ +retry: if (positioned == POSITIONED) + positioned = SEARCH_POSITION; WT_ERR(__cursor_localkey(cursor)); - F_CLR(cursor, WT_CURSTD_VALUE_SET); __cursor_state_save(cursor, &state); -retry: WT_ERR(__cursor_func_init(cbt, true)); + WT_ERR(__cursor_func_init(cbt, true)); if (btree->type == BTREE_ROW) { WT_ERR(__cursor_row_search(session, cbt, NULL, false)); @@ -1067,9 +1068,8 @@ err: if (ret == WT_RESTART) { goto retry; } -done: if (ret == 0) { - F_CLR(cursor, WT_CURSTD_VALUE_SET); - switch (positioned) { + if (ret == 0) { +done: switch (positioned) { case NO_POSITION: /* * Never positioned and we leave it that way, clear any @@ -1099,19 +1099,35 @@ done: if (ret == 0) { __cursor_state_restore(cursor, &state); /* - * If the cursor is configured to overwrite and the record isn't - * found, that is exactly what we want, return success. Note we - * set clear the return value after everything else, the clause + * If the record isn't found and the cursor is configured for + * overwrite, that is what we want, try to return success. + * + * We set the return to 0 after testing for success, the clause * above dealing with the cursor position is only correct if we * were successful. If search failed after positioned is set to * SEARCH_POSITION, we cannot return a key. The only action to * take is to set the cursor to its original key, which we just * did. + * + * Finally, if an iterating or positioned cursor was forced to + * give up its pinned page and then a search failed, we've + * lost our cursor position. Since no subsequent iteration can + * succeed, we cannot return success. */ - if (F_ISSET(cursor, WT_CURSTD_OVERWRITE) && ret == WT_NOTFOUND) + if (ret == WT_NOTFOUND && + F_ISSET(cursor, WT_CURSTD_OVERWRITE) && + !iterating && positioned == NO_POSITION) ret = 0; } + /* + * Upper level cursor removes don't expect the cursor value to be set + * after a successful remove (and check in diagnostic mode). Error + * handling may have converted failure to a success, do a final check. + */ + if (ret == 0) + F_CLR(cursor, WT_CURSTD_VALUE_SET); + return (ret); } @@ -1230,8 +1246,8 @@ err: if (ret == WT_RESTART) { * To make this work, we add a field to the btree cursor to pass back a * pointer to the modify function's allocated update structure. */ -done: if (ret == 0) - switch (modify_type) { + if (ret == 0) { +done: switch (modify_type) { case WT_UPDATE_STANDARD: /* * WT_CURSOR.update returns a key and a value. @@ -1258,6 +1274,7 @@ done: if (ret == 0) WT_TRET(__wt_illegal_value(session, NULL)); break; } + } if (ret != 0) { WT_TRET(__cursor_reset(cbt)); diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c index 325aec853e5..6575080c858 100644 --- a/src/third_party/wiredtiger/src/btree/bt_debug.c +++ b/src/third_party/wiredtiger/src/btree/bt_debug.c @@ -511,9 +511,8 @@ __debug_dsk_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk) * Pretty-print information about a page. */ static char * -__debug_tree_shape_info(WT_PAGE *page) +__debug_tree_shape_info(WT_PAGE *page, char *buf, size_t len) { - static char buf[128]; uint64_t v; const char *unit; @@ -532,7 +531,7 @@ __debug_tree_shape_info(WT_PAGE *page) unit = "B"; } - (void)__wt_snprintf(buf, sizeof(buf), "(%p, %" PRIu64 + (void)__wt_snprintf(buf, len, "(%p, %" PRIu64 "%s, evict gen %" PRIu64 ", create gen %" PRIu64 ")", (void *)page, v, unit, page->evict_pass_gen, page->cache_create_gen); @@ -548,12 +547,14 @@ __debug_tree_shape_worker(WT_DBG *ds, WT_PAGE *page, int level) { WT_REF *ref; WT_SESSION_IMPL *session; + char buf[128]; session = ds->session; if (WT_PAGE_IS_INTERNAL(page)) { WT_RET(ds->f(ds, "%*s" "I" "%d %s\n", - level * 3, " ", level, __debug_tree_shape_info(page))); + level * 3, " ", level, + __debug_tree_shape_info(page, buf, sizeof(buf)))); WT_INTL_FOREACH_BEGIN(session, page, ref) { if (ref->state == WT_REF_MEM) WT_RET(__debug_tree_shape_worker( @@ -561,7 +562,8 @@ __debug_tree_shape_worker(WT_DBG *ds, WT_PAGE *page, int level) } WT_INTL_FOREACH_END; } else WT_RET(ds->f(ds, "%*s" "L" " %s\n", - level * 3, " ", __debug_tree_shape_info(page))); + level * 3, " ", + __debug_tree_shape_info(page, buf, sizeof(buf)))); return (0); } diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index 370e81673d8..e6f8bad8e31 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -398,32 +398,18 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt) /* Debugging information */ WT_RET(__wt_config_gets(session, cfg, "assert.commit_timestamp", &cval)); - if (WT_STRING_MATCH("always", cval.str, cval.len)) { + btree->assert_flags = 0; + if (WT_STRING_MATCH("always", cval.str, cval.len)) FLD_SET(btree->assert_flags, WT_ASSERT_COMMIT_TS_ALWAYS); - FLD_CLR(btree->assert_flags, - WT_ASSERT_COMMIT_TS_KEYS | WT_ASSERT_COMMIT_TS_NEVER); - } else if (WT_STRING_MATCH("key_consistent", cval.str, cval.len)) { + else if (WT_STRING_MATCH("key_consistent", cval.str, cval.len)) FLD_SET(btree->assert_flags, WT_ASSERT_COMMIT_TS_KEYS); - FLD_CLR(btree->assert_flags, - WT_ASSERT_COMMIT_TS_ALWAYS | WT_ASSERT_COMMIT_TS_NEVER); - } else if (WT_STRING_MATCH("never", cval.str, cval.len)) { + else if (WT_STRING_MATCH("never", cval.str, cval.len)) FLD_SET(btree->assert_flags, WT_ASSERT_COMMIT_TS_NEVER); - FLD_CLR(btree->assert_flags, - WT_ASSERT_COMMIT_TS_ALWAYS | WT_ASSERT_COMMIT_TS_KEYS); - } else - FLD_CLR(btree->assert_flags, - WT_ASSERT_COMMIT_TS_ALWAYS | - WT_ASSERT_COMMIT_TS_KEYS | WT_ASSERT_COMMIT_TS_NEVER); WT_RET(__wt_config_gets(session, cfg, "assert.read_timestamp", &cval)); - if (WT_STRING_MATCH("always", cval.str, cval.len)) { + if (WT_STRING_MATCH("always", cval.str, cval.len)) FLD_SET(btree->assert_flags, WT_ASSERT_READ_TS_ALWAYS); - FLD_CLR(btree->assert_flags, WT_ASSERT_READ_TS_NEVER); - } else if (WT_STRING_MATCH("never", cval.str, cval.len)) { + else if (WT_STRING_MATCH("never", cval.str, cval.len)) FLD_SET(btree->assert_flags, WT_ASSERT_READ_TS_NEVER); - FLD_CLR(btree->assert_flags, WT_ASSERT_READ_TS_ALWAYS); - } else - FLD_CLR(btree->assert_flags, - WT_ASSERT_READ_TS_ALWAYS | WT_ASSERT_READ_TS_NEVER); /* Huffman encoding */ WT_RET(__wt_btree_huffman_open(session)); diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c index 007513fd581..4c108114438 100644 --- a/src/third_party/wiredtiger/src/btree/bt_io.c +++ b/src/third_party/wiredtiger/src/btree/bt_io.c @@ -154,7 +154,9 @@ corrupt: if (ret == 0) if (!F_ISSET(btree, WT_BTREE_VERIFY) && !F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE)) { __wt_err(session, ret, "%s", fail_msg); - ret = __wt_illegal_value(session, btree->dhandle->name); + WT_TRET(bm->corrupt(bm, session, addr, addr_size)); + WT_TRET( + __wt_illegal_value(session, btree->dhandle->name)); } } diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c index afaf6c82aa5..20e6c8c7b4d 100644 --- a/src/third_party/wiredtiger/src/btree/bt_read.c +++ b/src/third_party/wiredtiger/src/btree/bt_read.c @@ -209,6 +209,7 @@ __las_page_instantiate_verbose(WT_SESSION_IMPL *session, uint64_t las_pageid) static int __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id) { + WT_CACHE *cache; WT_CURSOR *cursor; WT_CURSOR_BTREE cbt; WT_DECL_ITEM(current_key); @@ -221,15 +222,18 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id) uint32_t las_id, session_flags; const uint8_t *p; uint8_t upd_type; + bool locked; cursor = NULL; page = ref->page; first_upd = last_upd = upd = NULL; + locked = false; total_incr = 0; current_recno = recno = WT_RECNO_OOB; session_flags = 0; /* [-Werror=maybe-uninitialized] */ WT_CLEAR(las_key); + cache = S2C(session)->cache; __las_page_instantiate_verbose(session, ref->page_las->las_pageid); WT_STAT_CONN_INCR(session, cache_read_lookaside); WT_STAT_DATA_INCR(session, cache_read_lookaside); @@ -251,6 +255,8 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id) */ ret = __wt_las_cursor_position( cursor, btree_id, ref->page_las->las_pageid); + __wt_readlock(session, &cache->las_sweepwalk_lock); + locked = true; for (; ret == 0; ret = cursor->next(cursor)) { WT_ERR(cursor->get_key(cursor, &las_pageid, &las_id, &las_counter, &las_key)); @@ -317,6 +323,8 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id) } upd = NULL; } + __wt_readunlock(session, &cache->las_sweepwalk_lock); + locked = false; WT_ERR_NOTFOUND_OK(ret); /* Insert the last set of updates, if any. */ @@ -369,7 +377,9 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id) } } -err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); +err: if (locked) + __wt_readunlock(session, &cache->las_sweepwalk_lock); + WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); WT_TRET(__wt_btcur_close(&cbt, true)); /* @@ -498,17 +508,9 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) final_state = WT_REF_MEM; - /* - * If we already have the page image, just instantiate the history. - * - * We need exclusive access because other threads could be reading the - * page without history and we can't change the state underneath them. - */ - if (previous_state == WT_REF_LIMBO) { - if (__wt_hazard_check(session, ref) != NULL) - goto err; + /* If we already have the page image, just instantiate the history. */ + if (previous_state == WT_REF_LIMBO) goto skip_read; - } /* * Get the address: if there is no address, the page was deleted or had @@ -608,7 +610,7 @@ skip_read: * Don't free WT_REF.page_las, there may be concurrent readers. */ WT_TRET(__wt_las_remove_block( - session, NULL, btree->id, ref->page_las->las_pageid)); + session, btree->id, ref->page_las->las_pageid)); ref->page_las->eviction_to_lookaside = false; break; diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c index 5fa46cb7fb2..1f0b9c4b285 100644 --- a/src/third_party/wiredtiger/src/cache/cache_las.c +++ b/src/third_party/wiredtiger/src/cache/cache_las.c @@ -19,6 +19,29 @@ WT_SESSION_NO_RECONCILE) /* + * __las_set_isolation -- + * Switch to read-uncommitted. + */ +static void +__las_set_isolation( + WT_SESSION_IMPL *session, WT_TXN_ISOLATION *saved_isolationp) +{ + *saved_isolationp = session->txn.isolation; + session->txn.isolation = WT_ISO_READ_UNCOMMITTED; +} + +/* + * __las_restore_isolation -- + * Restore isolation. + */ +static void +__las_restore_isolation( + WT_SESSION_IMPL *session, WT_TXN_ISOLATION saved_isolation) +{ + session->txn.isolation = saved_isolation; +} + +/* * __wt_las_nonempty -- * Return when there are entries in the lookaside table. */ @@ -56,11 +79,15 @@ __wt_las_stats_update(WT_SESSION_IMPL *session) if (!F_ISSET(conn, WT_CONN_LOOKASIDE_OPEN)) return; + /* Set the connection-wide statistics. */ + cstats = conn->stats; + WT_STAT_SET( + session, cstats, cache_lookaside_entries, cache->las_entry_count); + /* * We have a cursor, and we need the underlying data handle; we can get * to it by way of the underlying btree handle, but it's a little ugly. */ - cstats = conn->stats; dstats = ((WT_CURSOR_BTREE *) cache->las_session[0]->las_cursor)->btree->dhandle->stats; @@ -68,6 +95,7 @@ __wt_las_stats_update(WT_SESSION_IMPL *session) WT_STAT_SET(session, cstats, cache_lookaside_insert, v); v = WT_STAT_READ(dstats, cursor_remove); WT_STAT_SET(session, cstats, cache_lookaside_remove, v); + /* * If we're clearing stats we need to clear the cursor values we just * read. This does not clear the rest of the statistics in the @@ -326,6 +354,51 @@ __wt_las_cursor_close( } /* + * __las_remove_block -- + * Remove all records for a given page from the lookaside store. + */ +static int +__las_remove_block(WT_SESSION_IMPL *session, + WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid, uint64_t *decrp) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_ITEM las_key; + uint64_t las_counter, las_pageid; + uint32_t las_id; + + *decrp = 0; + + conn = S2C(session); + + __wt_writelock(session, &conn->cache->las_sweepwalk_lock); + + /* + * Search for the block's unique btree ID and page ID prefix and step + * through all matching records, removing them. + */ + for (ret = __wt_las_cursor_position(cursor, btree_id, pageid); + ret == 0; ret = cursor->next(cursor)) { + WT_ERR(cursor->get_key(cursor, + &las_pageid, &las_id, &las_counter, &las_key)); + + /* + * Confirm the record matches; if not a match, we're done + * searching for records for this page. + */ + if (las_pageid != pageid || las_id != btree_id) + break; + + WT_ERR(cursor->remove(cursor)); + ++*decrp; + } + WT_ERR_NOTFOUND_OK(ret); + +err: __wt_writeunlock(session, &conn->cache->las_sweepwalk_lock); + return (ret); +} + +/* * __las_insert_block_verbose -- * Display a verbose message once per checkpoint with details about the * cache state when performing a lookaside table write. @@ -335,14 +408,13 @@ __las_insert_block_verbose(WT_SESSION_IMPL *session, WT_MULTI *multi) { WT_CACHE *cache; WT_CONNECTION_IMPL *conn; + double pct_dirty, pct_full; + uint64_t ckpt_gen_current, ckpt_gen_last; + uint32_t btree_id; #ifdef HAVE_TIMESTAMPS char hex_timestamp[2 * WT_TIMESTAMP_SIZE + 1]; -#else - char hex_timestamp[9]; /* Enough for disabled string */ #endif - uint64_t ckpt_gen_current, ckpt_gen_last; - uint32_t btree_id; - double pct_dirty, pct_full; + const char *ts; btree_id = S2BT(session)->id; @@ -370,9 +442,9 @@ __las_insert_block_verbose(WT_SESSION_IMPL *session, WT_MULTI *multi) #ifdef HAVE_TIMESTAMPS WT_RET(__wt_timestamp_to_hex_string( session, hex_timestamp, &multi->page_las.min_timestamp)); + ts = hex_timestamp; #else - WT_RET(__wt_snprintf( - hex_timestamp, sizeof(hex_timestamp), "disabled")); + ts = "disabled"; #endif __wt_verbose(session, WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY, @@ -384,7 +456,7 @@ __las_insert_block_verbose(WT_SESSION_IMPL *session, WT_MULTI *multi) "cache use: %2.3f%%", btree_id, multi->page_las.las_pageid, multi->page_las.las_max_txn, - hex_timestamp, + ts, multi->page_las.las_skew_newest ? "newest" : "oldest", WT_STAT_READ(conn->stats, cache_lookaside_entries), pct_dirty, pct_full); @@ -398,44 +470,52 @@ __las_insert_block_verbose(WT_SESSION_IMPL *session, WT_MULTI *multi) /* * __wt_las_insert_block -- - * Copy one set of saved updates into the database's lookaside buffer. + * Copy one set of saved updates into the database's lookaside table. */ int __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_PAGE *page, WT_MULTI *multi, WT_ITEM *key) { WT_BTREE *btree; + WT_CONNECTION_IMPL *conn; WT_DECL_RET; WT_ITEM las_timestamp, las_value; WT_SAVE_UPD *list; WT_SESSION_IMPL *las_session; + WT_TXN_ISOLATION saved_isolation; WT_UPDATE *upd; - uint64_t insert_cnt, las_counter, las_pageid; + uint64_t decrement_cnt, insert_cnt, insert_estimate; + uint64_t las_counter, las_pageid; uint32_t btree_id, i, slot; uint8_t *p; + bool local_txn; + btree = S2BT(session); + conn = S2C(session); WT_CLEAR(las_timestamp); WT_CLEAR(las_value); - insert_cnt = 0; - - btree = S2BT(session); + decrement_cnt = insert_cnt = insert_estimate = 0; btree_id = btree->id; + local_txn = false; + las_pageid = multi->page_las.las_pageid = - __wt_atomic_add64(&S2C(session)->cache->las_pageid, 1); + __wt_atomic_add64(&conn->cache->las_pageid, 1); if (!btree->lookaside_entries) btree->lookaside_entries = true; /* Wrap all the updates in a transaction. */ las_session = (WT_SESSION_IMPL *)cursor->session; - WT_RET(__wt_txn_begin(las_session, NULL)); - las_session->txn.isolation = WT_ISO_READ_UNCOMMITTED; + __las_set_isolation(las_session, &saved_isolation); + WT_ERR(__wt_txn_begin(las_session, NULL)); + local_txn = true; /* * Make sure there are no leftover entries (e.g., from a handle * reopen). */ - WT_ERR(__wt_las_remove_block(session, cursor, btree_id, las_pageid)); + WT_ERR(__las_remove_block( + session, cursor, btree_id, las_pageid, &decrement_cnt)); /* Enter each update in the boundary's list into the lookaside store. */ for (las_counter = 0, i = 0, @@ -531,6 +611,18 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, upd->type, &las_value); /* + * If remove is running concurrently, it's possible for + * records to be removed before the insert transaction + * commit (remove is configured read-uncommitted). Make + * sure increments stay ahead of decrements. + */ + if (insert_estimate <= insert_cnt) { + insert_estimate += 100; + (void)__wt_atomic_add64( + &conn->cache->las_entry_count, 100); + } + + /* * Using update looks a little strange because the keys * are guaranteed to not exist, but since we're * appending, we want the cursor to stay positioned in @@ -541,21 +633,38 @@ __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, } while ((upd = upd->next) != NULL); } +err: /* Resolve the transaction. */ + if (local_txn) { + if (ret == 0) + ret = __wt_txn_commit(las_session, NULL); + else + WT_TRET(__wt_txn_rollback(las_session, NULL)); + } + + __las_restore_isolation(las_session, saved_isolation); + + /* + * If the transaction successfully committed and we inserted records, + * adjust the final entry count. We may have also deleted records, + * but we must have intended to insert records to be in this function + * at all, checking the insert count is sufficient. + */ if (insert_cnt > 0) { - WT_STAT_CONN_INCRV( - session, cache_lookaside_entries, insert_cnt); - (void)__wt_atomic_add64( - &S2C(session)->cache->las_entry_count, insert_cnt); - WT_ERR(__las_insert_block_verbose(session, multi)); + if (ret == 0) { + (void)__wt_atomic_add64( + &conn->cache->las_entry_count, + insert_estimate - insert_cnt); + __wt_cache_decr_check_uint64(session, + &conn->cache->las_entry_count, + decrement_cnt, "lookaside entry count"); + + ret = __las_insert_block_verbose(session, multi); + } else + __wt_cache_decr_check_uint64(session, + &conn->cache->las_entry_count, + insert_estimate, "lookaside entry count"); } -err: /* Resolve the transaction. */ - if (ret == 0) - ret = __wt_txn_commit(las_session, NULL); - else - WT_TRET(__wt_txn_rollback(las_session, NULL)); - __wt_free(session, multi->supd); - multi->supd_entries = 0; return (ret); } @@ -621,74 +730,49 @@ __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) /* * __wt_las_remove_block -- - * Remove all records for a given page from the lookaside store. + * Remove all records for a given page from the lookaside table. */ int -__wt_las_remove_block(WT_SESSION_IMPL *session, - WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) +__wt_las_remove_block( + WT_SESSION_IMPL *session, uint32_t btree_id, uint64_t pageid) { + WT_CONNECTION_IMPL *conn; + WT_CURSOR *cursor; WT_DECL_RET; - WT_ITEM las_key; WT_SESSION_IMPL *las_session; - uint64_t las_counter, las_pageid, remove_cnt; - uint32_t las_id, session_flags; - bool local_cursor, local_txn; + WT_TXN_ISOLATION saved_isolation; + uint64_t decrement_cnt; + uint32_t session_flags; - remove_cnt = 0; + conn = S2C(session); session_flags = 0; /* [-Wconditional-uninitialized] */ - local_cursor = local_txn = false; - if (cursor == NULL) { - __wt_las_cursor(session, &cursor, &session_flags); - local_cursor = true; - } - las_session = (WT_SESSION_IMPL *)cursor->session; - /* - * Wrap all of the removes in a transaction, unless this remove is part - * of a larger operation. + * This is an external API for removing records from the lookaside + * table, first acquiring a lookaside table cursor and enclosing + * transaction, then calling an underlying function to do the work. */ - if (local_cursor) { - WT_ERR(__wt_txn_begin(las_session, NULL)); - las_session->txn.isolation = WT_ISO_READ_UNCOMMITTED; - local_txn = true; - } + __wt_las_cursor(session, &cursor, &session_flags); - /* - * Search for the block's unique prefix and step through all matching - * records, removing them. - */ - ret = __wt_las_cursor_position(cursor, btree_id, pageid); - for (; ret == 0; ret = cursor->next(cursor)) { - WT_ERR(cursor->get_key(cursor, - &las_pageid, &las_id, &las_counter, &las_key)); + las_session = (WT_SESSION_IMPL *)cursor->session; + __las_set_isolation(las_session, &saved_isolation); - /* - * Confirm the search using the unique prefix; if not a match, - * we're done searching for records for this page. - */ - if (las_pageid != pageid || las_id != btree_id) - break; + WT_ERR(__wt_txn_begin(las_session, NULL)); - WT_ERR(cursor->remove(cursor)); - ++remove_cnt; - } - WT_ERR_NOTFOUND_OK(ret); + ret = __las_remove_block( + las_session, cursor, btree_id, pageid, &decrement_cnt); + if (ret == 0) + ret = __wt_txn_commit(las_session, NULL); + else + WT_TRET(__wt_txn_rollback(las_session, NULL)); + if (ret == 0) + __wt_cache_decr_check_uint64(session, + &conn->cache->las_entry_count, + decrement_cnt, "lookaside entry count"); + +err: __las_restore_isolation(las_session, saved_isolation); + WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); -err: if (local_txn) { - if (ret == 0) - ret = __wt_txn_commit(las_session, NULL); - else - WT_TRET(__wt_txn_rollback(las_session, NULL)); - } - if (local_cursor) - WT_TRET(__wt_las_cursor_close( - session, &cursor, session_flags)); - - WT_STAT_CONN_DECRV(session, cache_lookaside_entries, remove_cnt); - __wt_cache_decr_check_uint64(session, - &S2C(session)->cache->las_entry_count, remove_cnt, - "lookaside entry count"); return (ret); } @@ -715,6 +799,34 @@ err: __wt_spin_unlock(session, &cache->las_sweep_lock); } /* + * __las_sweep_count -- + * Calculate how many records to examine per sweep step. + */ +static inline uint64_t +__las_sweep_count(WT_CACHE *cache) +{ + /* + * The sweep server wakes up every 10 seconds (by default), it's a slow + * moving thread. Try to review the entire lookaside table once every 5 + * minutes, or every 30 calls. + * + * The reason is because the lookaside table exists because we're seeing + * cache/eviction pressure (it allows us to trade performance and disk + * space for cache space), and it's likely lookaside blocks are being + * evicted, and reading them back in doesn't help things. A trickier, + * but possibly better, alternative might be to review all lookaside + * blocks in the cache in order to get rid of them, and slowly review + * lookaside blocks that have already been evicted. + * + * Put upper and lower bounds on the calculation: since reads of pages + * with lookaside entries are blocked during sweep, make sure we do + * some work but don't block reads for too long. + */ + return ((uint64_t)WT_MAX(100, WT_MIN(10 * WT_THOUSAND, + cache->las_entry_count / 30))); +} + +/* * __las_sweep_init -- * Prepare to start a lookaside sweep. */ @@ -726,11 +838,19 @@ __las_sweep_init(WT_SESSION_IMPL *session) u_int i; cache = S2C(session)->cache; + cache->las_sweep_cnt = __las_sweep_count(cache); __wt_spin_lock(session, &cache->las_sweep_lock); - /* If no files have been dropped, there's nothing to do. */ - if (cache->las_dropped_next == 0) - WT_ERR(WT_NOTFOUND); + + /* + * If no files have been dropped and the lookaside file is empty, + * there's nothing to do. + */ + if (cache->las_dropped_next == 0) { + if (cache->las_entry_count == 0) + ret = WT_NOTFOUND; + goto err; + } /* Scan the btree IDs to find min/max. */ cache->las_sweep_dropmin = UINT32_MAX; @@ -767,22 +887,43 @@ __wt_las_sweep(WT_SESSION_IMPL *session) { WT_CACHE *cache; WT_CURSOR *cursor; + WT_DECL_ITEM(saved_key); WT_DECL_RET; - WT_ITEM *key, las_key; - uint64_t cnt, las_counter, las_pageid, remove_cnt; + WT_ITEM las_key, las_timestamp, las_value; + WT_ITEM *sweep_key; + WT_TXN_ISOLATION saved_isolation; +#ifdef HAVE_TIMESTAMPS + wt_timestamp_t timestamp, *val_ts; +#else + wt_timestamp_t *val_ts; +#endif + uint64_t cnt, decrement_cnt, las_counter, las_pageid, txnid; uint32_t las_id, session_flags; + uint8_t upd_type; int notused; + bool local_txn, locked; cache = S2C(session)->cache; cursor = NULL; - key = &cache->las_sweep_key; - remove_cnt = 0; + sweep_key = &cache->las_sweep_key; + decrement_cnt = 0; session_flags = 0; /* [-Werror=maybe-uninitialized] */ + local_txn = locked = false; - __wt_las_cursor(session, &cursor, &session_flags); + WT_RET(__wt_scr_alloc(session, 0, &saved_key)); - /* We should have our own session. */ + /* + * Allocate a cursor and wrap all the updates in a transaction. + * We should have our own lookaside cursor. + */ + __wt_las_cursor(session, &cursor, &session_flags); WT_ASSERT(session, cursor->session == &session->iface); + __las_set_isolation(session, &saved_isolation); + WT_ERR(__wt_txn_begin(session, NULL)); + local_txn = true; + + __wt_writelock(session, &cache->las_sweepwalk_lock); + locked = true; /* * When continuing a sweep, position the cursor using the key from the @@ -792,8 +933,8 @@ __wt_las_sweep(WT_SESSION_IMPL *session) * Otherwise, we're starting a new sweep, gather the list of trees to * sweep. */ - if (key->size != 0) { - __wt_cursor_set_raw_key(cursor, key); + if (sweep_key->size != 0) { + __wt_cursor_set_raw_key(cursor, sweep_key); ret = cursor->search_near(cursor, ¬used); /* @@ -803,51 +944,41 @@ __wt_las_sweep(WT_SESSION_IMPL *session) * table. Searching for the same key could leave us stuck at * the end of the table, repeatedly checking the same rows. */ - key->size = 0; + sweep_key->size = 0; } else ret = __las_sweep_init(session); - if (ret != 0) goto srch_notfound; /* - * The sweep server wakes up every 10 seconds (by default), it's a slow - * moving thread. Try to review the entire lookaside table once every 5 - * minutes, or every 30 calls. - * - * The reason is because the lookaside table exists because we're seeing - * cache/eviction pressure (it allows us to trade performance and disk - * space for cache space), and it's likely lookaside blocks are being - * evicted, and reading them back in doesn't help things. A trickier, - * but possibly better, alternative might be to review all lookaside - * blocks in the cache in order to get rid of them, and slowly review - * lookaside blocks that have already been evicted. + * Walk at least the number we calculated at the beginning of the + * sweep, or more if there have been additional records inserted in the + * meantime. Don't just repeat the calculation here since sweep + * removes entries and that would cause sweep to do less and less work + * rather than driving the lookaside table to empty. */ - cnt = (uint64_t)WT_MAX(100, cache->las_entry_count / 30); + cnt = __las_sweep_count(cache); + if (cnt < cache->las_sweep_cnt) + cnt = cache->las_sweep_cnt; /* Walk the file. */ - for (; cnt > 0 && (ret = cursor->next(cursor)) == 0; --cnt) { + while ((ret = cursor->next(cursor)) == 0) { /* - * Give up if the cache is stuck: we are ignoring the cache - * size while scanning the lookaside table, so we're making - * things worse. + * Stop if the cache is stuck: we are ignoring the cache size + * while scanning the lookaside table, so we're making things + * worse. */ if (__wt_cache_stuck(session)) - cnt = 1; + cnt = 0; /* - * If the loop terminates after completing a work unit, we will - * continue the table sweep next time. Get a local copy of the - * sweep key, we're going to reset the cursor; do so before - * calling cursor.remove, cursor.remove can discard our hazard - * pointer and the page could be evicted from underneath us. + * If we have processed enough entries and we are between + * blocks, give up. */ - if (cnt == 1) { - WT_ERR(__wt_cursor_get_raw_key(cursor, key)); - if (!WT_DATA_IN_ITEM(key)) - WT_ERR(__wt_buf_set( - session, key, key->data, key->size)); - } + if (cnt > 0) + --cnt; + else if (saved_key->size == 0) + break; WT_ERR(cursor->get_key(cursor, &las_pageid, &las_id, &las_counter, &las_key)); @@ -859,34 +990,102 @@ __wt_las_sweep(WT_SESSION_IMPL *session) * should another thread remove the record before we do (not * expected for dropped trees), and the cursor remains * positioned in that case. - * - * TODO it would also be good to remove entries in lookaside - * from live files that have aged out. If we track for each - * entry whether it was the on-page value chosen by - * reconciliation, we can safely remove entries from that point - * on (for the given key) that are visible to all readers. */ if (las_id >= cache->las_sweep_dropmin && las_id <= cache->las_sweep_dropmax && __bit_test(cache->las_sweep_dropmap, las_id - cache->las_sweep_dropmin)) { WT_ERR(cursor->remove(cursor)); - ++remove_cnt; + ++decrement_cnt; + saved_key->size = 0; + continue; } + + /* + * Remove entries from the lookaside that have aged out and are + * now no longer needed. + */ + WT_ERR(cursor->get_value(cursor, + &txnid, &las_timestamp, &upd_type, &las_value)); +#ifdef HAVE_TIMESTAMPS + WT_ASSERT(session, las_timestamp.size == WT_TIMESTAMP_SIZE); + memcpy(×tamp, las_timestamp.data, las_timestamp.size); + val_ts = ×tamp; +#else + val_ts = NULL; +#endif + + /* + * If this entry isn't globally visible we cannot remove it. + * If it is visible then perform additional checks to see + * whether it has aged out of a live file. + */ + if (!__wt_txn_visible_all(session, txnid, val_ts)) { + saved_key->size = 0; + continue; + } + + /* + * Save our key for comparing with older entries if we + * don't have one or it is different. + */ + if (saved_key->size != las_key.size || + memcmp(saved_key->data, las_key.data, las_key.size) != 0) { + /* If we have processed enough entries, give up. */ + if (cnt == 0) + break; + + WT_ERR(__wt_buf_set(session, saved_key, + las_key.data, las_key.size)); + + if (upd_type != WT_UPDATE_BIRTHMARK) + continue; + } + + WT_ERR(cursor->remove(cursor)); + ++decrement_cnt; + } + + __wt_writeunlock(session, &cache->las_sweepwalk_lock); + locked = false; + + /* + * If the loop terminates after completing a work unit, we will + * continue the table sweep next time. Get a local copy of the + * sweep key, we're going to reset the cursor; do so before + * calling cursor.remove, cursor.remove can discard our hazard + * pointer and the page could be evicted from underneath us. + */ + if (ret == 0) { + WT_ERR(__wt_cursor_get_raw_key(cursor, sweep_key)); + if (!WT_DATA_IN_ITEM(sweep_key)) + WT_ERR(__wt_buf_set(session, sweep_key, + sweep_key->data, sweep_key->size)); } srch_notfound: WT_ERR_NOTFOUND_OK(ret); if (0) { -err: __wt_buf_free(session, key); +err: __wt_buf_free(session, sweep_key); + } + if (local_txn) { + if (ret == 0) + ret = __wt_txn_commit(session, NULL); + else + WT_TRET(__wt_txn_rollback(session, NULL)); + if (ret == 0) + __wt_cache_decr_check_uint64(session, + &S2C(session)->cache->las_entry_count, + decrement_cnt, "lookaside entry count"); } + if (locked) + __wt_writeunlock(session, &cache->las_sweepwalk_lock); WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); + __las_restore_isolation(session, saved_isolation); - __wt_cache_decr_check_uint64(session, - &S2C(session)->cache->las_entry_count, remove_cnt, - "lookaside entry count"); + __wt_scr_free(session, &saved_key); return (ret); } diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index cc3ea7c9d52..98f3ca6a633 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -247,6 +247,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_alter[] = { }; static const WT_CONFIG_CHECK confchk_WT_SESSION_begin_transaction[] = { + { "ignore_prepare", "boolean", NULL, NULL, NULL, 0 }, { "isolation", "string", NULL, "choices=[\"read-uncommitted\",\"read-committed\"," "\"snapshot\"]", @@ -443,6 +444,11 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = { { NULL, NULL, NULL, NULL, NULL, 0 } }; +static const WT_CONFIG_CHECK confchk_WT_SESSION_prepare_transaction[] = { + { "prepare_timestamp", "string", NULL, NULL, NULL, 0 }, + { NULL, NULL, NULL, NULL, NULL, 0 } +}; + static const WT_CONFIG_CHECK confchk_WT_SESSION_reconfigure[] = { { "ignore_cache_size", "boolean", NULL, NULL, NULL, 0 }, { "isolation", "string", @@ -1258,9 +1264,9 @@ static const WT_CONFIG_ENTRY config_entries[] = { confchk_WT_SESSION_alter, 4 }, { "WT_SESSION.begin_transaction", - "isolation=,name=,priority=0,read_timestamp=," - "round_to_oldest=false,snapshot=,sync=", - confchk_WT_SESSION_begin_transaction, 7 + "ignore_prepare=false,isolation=,name=,priority=0,read_timestamp=" + ",round_to_oldest=false,snapshot=,sync=", + confchk_WT_SESSION_begin_transaction, 8 }, { "WT_SESSION.checkpoint", "drop=,force=false,name=,target=,use_timestamp=true", @@ -1325,6 +1331,10 @@ static const WT_CONFIG_ENTRY config_entries[] = { "target=", confchk_WT_SESSION_open_cursor, 13 }, + { "WT_SESSION.prepare_transaction", + "prepare_timestamp=", + confchk_WT_SESSION_prepare_transaction, 1 + }, { "WT_SESSION.rebalance", "", NULL, 0 diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c index 871190380f7..00de16e6c21 100644 --- a/src/third_party/wiredtiger/src/conn/conn_cache.c +++ b/src/third_party/wiredtiger/src/conn/conn_cache.c @@ -252,6 +252,7 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_RET_MSG(NULL, ret, "Failed to create session for eviction walks"); + WT_RET(__wt_rwlock_init(session, &cache->las_sweepwalk_lock)); WT_RET(__wt_spin_init(session, &cache->las_lock, "lookaside table")); WT_RET(__wt_spin_init( session, &cache->las_sweep_lock, "lookaside sweep")); @@ -400,6 +401,7 @@ __wt_cache_destroy(WT_SESSION_IMPL *session) __wt_spin_destroy(session, &cache->evict_walk_lock); __wt_spin_destroy(session, &cache->las_lock); __wt_spin_destroy(session, &cache->las_sweep_lock); + __wt_rwlock_destroy(session, &cache->las_sweepwalk_lock); wt_session = &cache->walk_session->iface; if (wt_session != NULL) WT_TRET(wt_session->close(wt_session, NULL)); diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index 39c84764070..71410a5a731 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -2274,7 +2274,7 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server) WT_TRACK_OP_INIT(session); - WT_RET(__evict_get_ref(session, is_server, &btree, &ref)); + WT_RET_TRACK(__evict_get_ref(session, is_server, &btree, &ref)); WT_ASSERT(session, ref->state == WT_REF_LOCKED); app_timer = false; @@ -2443,7 +2443,6 @@ err: if (timer) { done: WT_TRACK_OP_END(session); return (ret); - /* NOTREACHED */ } /* diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h index 01a7617789a..1e8fc5f34c9 100644 --- a/src/third_party/wiredtiger/src/include/block.h +++ b/src/third_party/wiredtiger/src/include/block.h @@ -174,6 +174,7 @@ struct __wt_bm { (WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t, bool *); int (*compact_skip)(WT_BM *, WT_SESSION_IMPL *, bool *); int (*compact_start)(WT_BM *, WT_SESSION_IMPL *); + int (*corrupt)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); int (*free)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t); bool (*is_mapped)(WT_BM *, WT_SESSION_IMPL *); int (*map_discard)(WT_BM *, WT_SESSION_IMPL *, void *, size_t); diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 3a6413162f3..9c29b72dc67 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -178,7 +178,7 @@ static inline void __wt_cache_decr_check_size( WT_SESSION_IMPL *session, size_t *vp, size_t v, const char *fld) { - if (__wt_atomic_subsize(vp, v) < WT_EXABYTE) + if (v == 0 || __wt_atomic_subsize(vp, v) < WT_EXABYTE) return; /* @@ -202,7 +202,7 @@ static inline void __wt_cache_decr_check_uint64( WT_SESSION_IMPL *session, uint64_t *vp, uint64_t v, const char *fld) { - if (__wt_atomic_sub64(vp, v) < WT_EXABYTE) + if (v == 0 || __wt_atomic_sub64(vp, v) < WT_EXABYTE) return; /* diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h index 7a49f388826..9203e692291 100644 --- a/src/third_party/wiredtiger/src/include/cache.h +++ b/src/third_party/wiredtiger/src/include/cache.h @@ -200,7 +200,9 @@ struct __wt_cache { uint64_t las_entry_count; /* Count of entries in lookaside */ uint64_t las_pageid; /* Lookaside table page ID counter */ - WT_SPINLOCK las_sweep_lock; + uint64_t las_sweep_cnt; /* Entries to walk per sweep. */ + WT_RWLOCK las_sweepwalk_lock; + WT_SPINLOCK las_sweep_lock; WT_ITEM las_sweep_key; /* Track sweep position. */ uint32_t las_sweep_dropmin; /* Minimum btree ID in current set. */ uint8_t *las_sweep_dropmap; /* Bitmap of dropped btree IDs. */ diff --git a/src/third_party/wiredtiger/src/include/config.h b/src/third_party/wiredtiger/src/include/config.h index 4764ce0fd9f..541e811aa33 100644 --- a/src/third_party/wiredtiger/src/include/config.h +++ b/src/third_party/wiredtiger/src/include/config.h @@ -78,29 +78,30 @@ struct __wt_config_parser_impl { #define WT_CONFIG_ENTRY_WT_SESSION_log_flush 26 #define WT_CONFIG_ENTRY_WT_SESSION_log_printf 27 #define WT_CONFIG_ENTRY_WT_SESSION_open_cursor 28 -#define WT_CONFIG_ENTRY_WT_SESSION_rebalance 29 -#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 30 -#define WT_CONFIG_ENTRY_WT_SESSION_rename 31 -#define WT_CONFIG_ENTRY_WT_SESSION_reset 32 -#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 33 -#define WT_CONFIG_ENTRY_WT_SESSION_salvage 34 -#define WT_CONFIG_ENTRY_WT_SESSION_snapshot 35 -#define WT_CONFIG_ENTRY_WT_SESSION_strerror 36 -#define WT_CONFIG_ENTRY_WT_SESSION_timestamp_transaction 37 -#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 38 -#define WT_CONFIG_ENTRY_WT_SESSION_truncate 39 -#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 40 -#define WT_CONFIG_ENTRY_WT_SESSION_verify 41 -#define WT_CONFIG_ENTRY_colgroup_meta 42 -#define WT_CONFIG_ENTRY_file_config 43 -#define WT_CONFIG_ENTRY_file_meta 44 -#define WT_CONFIG_ENTRY_index_meta 45 -#define WT_CONFIG_ENTRY_lsm_meta 46 -#define WT_CONFIG_ENTRY_table_meta 47 -#define WT_CONFIG_ENTRY_wiredtiger_open 48 -#define WT_CONFIG_ENTRY_wiredtiger_open_all 49 -#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 50 -#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 51 +#define WT_CONFIG_ENTRY_WT_SESSION_prepare_transaction 29 +#define WT_CONFIG_ENTRY_WT_SESSION_rebalance 30 +#define WT_CONFIG_ENTRY_WT_SESSION_reconfigure 31 +#define WT_CONFIG_ENTRY_WT_SESSION_rename 32 +#define WT_CONFIG_ENTRY_WT_SESSION_reset 33 +#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 34 +#define WT_CONFIG_ENTRY_WT_SESSION_salvage 35 +#define WT_CONFIG_ENTRY_WT_SESSION_snapshot 36 +#define WT_CONFIG_ENTRY_WT_SESSION_strerror 37 +#define WT_CONFIG_ENTRY_WT_SESSION_timestamp_transaction 38 +#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 39 +#define WT_CONFIG_ENTRY_WT_SESSION_truncate 40 +#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 41 +#define WT_CONFIG_ENTRY_WT_SESSION_verify 42 +#define WT_CONFIG_ENTRY_colgroup_meta 43 +#define WT_CONFIG_ENTRY_file_config 44 +#define WT_CONFIG_ENTRY_file_meta 45 +#define WT_CONFIG_ENTRY_index_meta 46 +#define WT_CONFIG_ENTRY_lsm_meta 47 +#define WT_CONFIG_ENTRY_table_meta 48 +#define WT_CONFIG_ENTRY_wiredtiger_open 49 +#define WT_CONFIG_ENTRY_wiredtiger_open_all 50 +#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 51 +#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 52 /* * configuration section: END * DO NOT EDIT: automatically built by dist/flags.py. diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index aef6b2d6777..be21fcb6456 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -193,7 +193,6 @@ struct __wt_connection_impl { WT_FH *optrack_map_fh; /* Name to id translation file. */ WT_SPINLOCK optrack_map_spinlock; /* Translation file spinlock. */ uintmax_t optrack_pid; /* Cache the process ID. */ - uint16_t optrack_uid; /* Unique function ID */ void **foc; /* Free-on-close array */ size_t foc_cnt; /* Array entries */ diff --git a/src/third_party/wiredtiger/src/include/error.h b/src/third_party/wiredtiger/src/include/error.h index a4ef4757ec9..95edf7ed659 100644 --- a/src/third_party/wiredtiger/src/include/error.h +++ b/src/third_party/wiredtiger/src/include/error.h @@ -46,6 +46,13 @@ if ((__ret = (a)) != 0) \ return (__ret); \ } while (0) +#define WT_RET_TRACK(a) do { \ + int __ret; \ + if ((__ret = (a)) != 0) { \ + WT_TRACK_OP_END(session); \ + return (__ret); \ + } \ +} while (0) #define WT_RET_MSG(session, v, ...) do { \ int __ret = (v); \ __wt_err(session, __ret, __VA_ARGS__); \ diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 3674d9218da..a293b1ac516 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -45,7 +45,7 @@ extern void __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el); extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, size_t *lengthp, void *mapped_cookiep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, size_t length, void *mapped_cookie) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_block_panic(WT_SESSION_IMPL *session, int error, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_block_panic(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_manager_drop( WT_SESSION_IMPL *session, const char *filename, bool durable) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_block_configure_first_fit(WT_BLOCK *block, bool on); @@ -57,6 +57,7 @@ extern int __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t extern int __wt_block_manager_named_size( WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_bm_preload( WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_bm_corrupt(WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_read_off_blind( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset, uint32_t size, uint32_t checksum) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_block_ext_alloc(WT_SESSION_IMPL *session, WT_EXT **extp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -208,7 +209,7 @@ extern void __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_PAGE *page, WT_MULTI *multi, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); -extern int __wt_las_remove_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_las_remove_block( WT_SESSION_IMPL *session, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_las_save_dropped(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_las_sweep(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern uint32_t __wt_checksum_sw(const void *chunk, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); @@ -661,7 +662,6 @@ extern int __wt_decrypt(WT_SESSION_IMPL *session, WT_ENCRYPTOR *encryptor, size_ extern int __wt_encrypt(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t skip, WT_ITEM *in, WT_ITEM *out) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_encrypt_size(WT_SESSION_IMPL *session, WT_KEYED_ENCRYPTOR *kencryptor, size_t incoming_size, size_t *sizep); extern void __wt_event_handler_set(WT_SESSION_IMPL *session, WT_EVENT_HANDLER *handler); -extern int __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, const char *file_name, int line_number, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_err(WT_SESSION_IMPL *session, int error, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))); extern void __wt_errx(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3))); extern int __wt_ext_err_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); @@ -793,6 +793,7 @@ extern int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_F extern int __wt_txn_reconfigure(WT_SESSION_IMPL *session, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern void __wt_txn_release(WT_SESSION_IMPL *session); extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_rollback_required(WT_SESSION_IMPL *session, const char *reason) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_txn_init(WT_SESSION_IMPL *session, WT_SESSION_IMPL *session_ret) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index 8b8c3a55a6c..e5cfb534db5 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -611,7 +611,7 @@ struct __wt_connection_stats { int64_t child_modify_blocked_page; int64_t tree_descend_blocked; int64_t txn_commit_queue_empty; - int64_t txn_commit_queue_head; + int64_t txn_commit_queue_tail; int64_t txn_commit_queue_inserts; int64_t txn_commit_queue_len; int64_t txn_snapshots_created; diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h index 3f48368b303..19eaf87cbd3 100644 --- a/src/third_party/wiredtiger/src/include/txn.h +++ b/src/third_party/wiredtiger/src/include/txn.h @@ -241,6 +241,7 @@ struct __wt_txn { TAILQ_ENTRY(__wt_txn) commit_timestampq; TAILQ_ENTRY(__wt_txn) read_timestampq; + bool clear_ts_queue; /* Set if we need to clear from the queue */ /* Array of modifications by this transaction. */ WT_TXN_OP *mod; @@ -262,22 +263,23 @@ struct __wt_txn { const char *rollback_reason; /* If rollback, the reason */ /* AUTOMATIC FLAG VALUE GENERATION START */ -#define WT_TXN_AUTOCOMMIT 0x0001u -#define WT_TXN_ERROR 0x0002u -#define WT_TXN_HAS_ID 0x0004u -#define WT_TXN_HAS_SNAPSHOT 0x0008u -#define WT_TXN_HAS_TS_COMMIT 0x0010u -#define WT_TXN_HAS_TS_READ 0x0020u -#define WT_TXN_NAMED_SNAPSHOT 0x0040u -#define WT_TXN_PUBLIC_TS_COMMIT 0x0080u -#define WT_TXN_PUBLIC_TS_READ 0x0100u -#define WT_TXN_READONLY 0x0200u -#define WT_TXN_RUNNING 0x0400u -#define WT_TXN_SYNC_SET 0x0800u -#define WT_TXN_TS_COMMIT_ALWAYS 0x1000u -#define WT_TXN_TS_COMMIT_KEYS 0x2000u -#define WT_TXN_TS_COMMIT_NEVER 0x4000u -#define WT_TXN_UPDATE 0x8000u +#define WT_TXN_AUTOCOMMIT 0x00001u +#define WT_TXN_ERROR 0x00002u +#define WT_TXN_HAS_ID 0x00004u +#define WT_TXN_HAS_SNAPSHOT 0x00008u +#define WT_TXN_HAS_TS_COMMIT 0x00010u +#define WT_TXN_HAS_TS_READ 0x00020u +#define WT_TXN_IGNORE_PREPARE 0x00040u +#define WT_TXN_NAMED_SNAPSHOT 0x00080u +#define WT_TXN_PUBLIC_TS_COMMIT 0x00100u +#define WT_TXN_PUBLIC_TS_READ 0x00200u +#define WT_TXN_READONLY 0x00400u +#define WT_TXN_RUNNING 0x00800u +#define WT_TXN_SYNC_SET 0x01000u +#define WT_TXN_TS_COMMIT_ALWAYS 0x02000u +#define WT_TXN_TS_COMMIT_KEYS 0x04000u +#define WT_TXN_TS_COMMIT_NEVER 0x08000u +#define WT_TXN_UPDATE 0x10000u /* AUTOMATIC FLAG VALUE GENERATION STOP */ uint32_t flags; }; diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 53067bf44ab..fef1a935983 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -1717,6 +1717,9 @@ struct __wt_session { * * @param session the session handle * @configstart{WT_SESSION.begin_transaction, see dist/api_data.py} + * @config{ignore_prepare, whether to ignore the updates by other + * prepared transactions as part of read operations of this + * transaction., a boolean flag; default \c false.} * @config{isolation, the isolation level for this transaction; defaults * to the session's isolation level., a string\, chosen from the * following options: \c "read-uncommitted"\, \c "read-committed"\, \c @@ -1776,6 +1779,31 @@ struct __wt_session { int __F(commit_transaction)(WT_SESSION *session, const char *config); /*! + * Prepare the current transaction. + * + * A transaction must be in progress when this method is called. + * Preparing a transaction will guarantee subsequent commit will + * succeed. Only commit and rollback are allowed on a transaction after + * it has been prepared. At the moment, prepare transaction is designed + * to support MongoDB exclusively. + * + * @requires_transaction + * + * @snippet ex_all.c transaction prepare + * + * @param session the session handle + * @configstart{WT_SESSION.prepare_transaction, see dist/api_data.py} + * @config{prepare_timestamp, set the prepare timestamp for the updates + * of the current transaction. The supplied value should not be older + * than any active read timestamps. This configuration option is + * mandatory. See @ref transaction_timestamps., a string; default + * empty.} + * @configend + * @errors + */ + int __F(prepare_transaction)(WT_SESSION *session, const char *config); + + /*! * Roll back the current transaction. * * A transaction must be in progress when this method is called. @@ -5455,8 +5483,8 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_TREE_DESCEND_BLOCKED 1296 /*! transaction: commit timestamp queue insert to empty */ #define WT_STAT_CONN_TXN_COMMIT_QUEUE_EMPTY 1297 -/*! transaction: commit timestamp queue inserts to head */ -#define WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD 1298 +/*! transaction: commit timestamp queue inserts to tail */ +#define WT_STAT_CONN_TXN_COMMIT_QUEUE_TAIL 1298 /*! transaction: commit timestamp queue inserts total */ #define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1299 /*! transaction: commit timestamp queue length */ diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c index c5a3935813a..d737c09c391 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c @@ -865,6 +865,45 @@ err: API_END_RET(session, ret); } /* + * __clsm_position_chunk -- + * Position a chunk cursor. + */ +static int +__clsm_position_chunk( + WT_CURSOR_LSM *clsm, WT_CURSOR *c, bool forward, int *cmpp) +{ + WT_CURSOR *cursor; + WT_SESSION_IMPL *session; + + cursor = &clsm->iface; + session = (WT_SESSION_IMPL *)cursor->session; + + c->set_key(c, &cursor->key); + WT_RET(c->search_near(c, cmpp)); + + while (forward ? *cmpp < 0 : *cmpp > 0) { + WT_RET(forward ? c->next(c) : c->prev(c)); + + /* + * With higher isolation levels, where we have stable reads, + * we're done: the cursor is now positioned as expected. + * + * With read-uncommitted isolation, a new record could have + * appeared in between the search and stepping forward / back. + * In that case, keep going until we see a key in the expected + * range. + */ + if (session->txn.isolation != WT_ISO_READ_UNCOMMITTED) + return (0); + + WT_RET(WT_LSM_CURCMP(session, + clsm->lsm_tree, c, cursor, *cmpp)); + } + + return (0); +} + +/* * __clsm_next -- * WT_CURSOR->next method for the LSM cursor type. */ @@ -877,7 +916,7 @@ __clsm_next(WT_CURSOR *cursor) WT_SESSION_IMPL *session; u_int i; int cmp; - bool check, deleted; + bool deleted; clsm = (WT_CURSOR_LSM *)cursor; @@ -887,29 +926,17 @@ __clsm_next(WT_CURSOR *cursor) /* If we aren't positioned for a forward scan, get started. */ if (clsm->current == NULL || !F_ISSET(clsm, WT_CLSM_ITERATE_NEXT)) { - F_CLR(clsm, WT_CLSM_MULTIPLE); WT_FORALL_CURSORS(clsm, c, i) { if (!F_ISSET(cursor, WT_CURSTD_KEY_SET)) { WT_ERR(c->reset(c)); ret = c->next(c); - } else if (c != clsm->current) { - c->set_key(c, &cursor->key); - if ((ret = c->search_near(c, &cmp)) == 0) { - if (cmp < 0) - ret = c->next(c); - else if (cmp == 0) { - if (clsm->current == NULL) - clsm->current = c; - else - F_SET(clsm, - WT_CLSM_MULTIPLE); - } - } else - F_CLR(c, WT_CURSTD_KEY_SET); - } + } else if (c != clsm->current && (ret = + __clsm_position_chunk(clsm, c, true, &cmp)) == 0 && + cmp == 0 && clsm->current == NULL) + clsm->current = c; WT_ERR_NOTFOUND_OK(ret); } - F_SET(clsm, WT_CLSM_ITERATE_NEXT); + F_SET(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_MULTIPLE); F_CLR(clsm, WT_CLSM_ITERATE_PREV); /* We just positioned *at* the key, now move. */ @@ -921,19 +948,16 @@ retry: /* * forward. */ if (F_ISSET(clsm, WT_CLSM_MULTIPLE)) { - check = false; WT_FORALL_CURSORS(clsm, c, i) { if (!F_ISSET(c, WT_CURSTD_KEY_INT)) continue; - if (check) { + if (c != clsm->current) { WT_ERR(WT_LSM_CURCMP(session, clsm->lsm_tree, c, clsm->current, cmp)); if (cmp == 0) WT_ERR_NOTFOUND_OK(c->next(c)); } - if (c == clsm->current) - check = true; } } @@ -1055,7 +1079,7 @@ __clsm_prev(WT_CURSOR *cursor) WT_SESSION_IMPL *session; u_int i; int cmp; - bool check, deleted; + bool deleted; clsm = (WT_CURSOR_LSM *)cursor; @@ -1065,28 +1089,17 @@ __clsm_prev(WT_CURSOR *cursor) /* If we aren't positioned for a reverse scan, get started. */ if (clsm->current == NULL || !F_ISSET(clsm, WT_CLSM_ITERATE_PREV)) { - F_CLR(clsm, WT_CLSM_MULTIPLE); WT_FORALL_CURSORS(clsm, c, i) { if (!F_ISSET(cursor, WT_CURSTD_KEY_SET)) { WT_ERR(c->reset(c)); ret = c->prev(c); - } else if (c != clsm->current) { - c->set_key(c, &cursor->key); - if ((ret = c->search_near(c, &cmp)) == 0) { - if (cmp > 0) - ret = c->prev(c); - else if (cmp == 0) { - if (clsm->current == NULL) - clsm->current = c; - else - F_SET(clsm, - WT_CLSM_MULTIPLE); - } - } - } + } else if (c != clsm->current && (ret = + __clsm_position_chunk(clsm, c, false, &cmp)) == 0 && + cmp == 0 && clsm->current == NULL) + clsm->current = c; WT_ERR_NOTFOUND_OK(ret); } - F_SET(clsm, WT_CLSM_ITERATE_PREV); + F_SET(clsm, WT_CLSM_ITERATE_PREV | WT_CLSM_MULTIPLE); F_CLR(clsm, WT_CLSM_ITERATE_NEXT); /* We just positioned *at* the key, now move. */ @@ -1098,23 +1111,20 @@ retry: /* * backwards. */ if (F_ISSET(clsm, WT_CLSM_MULTIPLE)) { - check = false; WT_FORALL_CURSORS(clsm, c, i) { if (!F_ISSET(c, WT_CURSTD_KEY_INT)) continue; - if (check) { + if (c != clsm->current) { WT_ERR(WT_LSM_CURCMP(session, clsm->lsm_tree, c, clsm->current, cmp)); if (cmp == 0) WT_ERR_NOTFOUND_OK(c->prev(c)); } - if (c == clsm->current) - check = true; } } - /* Move the smallest cursor backwards. */ + /* Move the largest cursor backwards. */ c = clsm->current; WT_ERR_NOTFOUND_OK(c->prev(c)); } @@ -1279,6 +1289,7 @@ __clsm_search(WT_CURSOR *cursor) WT_ERR(__cursor_needkey(cursor)); __cursor_novalue(cursor); WT_ERR(__clsm_enter(clsm, true, false)); + F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV); ret = __clsm_lookup(clsm, &cursor->value); diff --git a/src/third_party/wiredtiger/src/optrack/optrack.c b/src/third_party/wiredtiger/src/optrack/optrack.c index 8258a715927..dd630785cd5 100644 --- a/src/third_party/wiredtiger/src/optrack/optrack.c +++ b/src/third_party/wiredtiger/src/optrack/optrack.c @@ -10,12 +10,13 @@ /* * __wt_optrack_record_funcid -- - * Record optrack function id + * Allocate and record optrack function ID. */ void __wt_optrack_record_funcid( WT_SESSION_IMPL *session, const char *func, uint16_t *func_idp) { + static uint16_t optrack_uid = 0; /* Unique for the process lifetime. */ WT_CONNECTION_IMPL *conn; WT_DECL_ITEM(tmp); WT_DECL_RET; @@ -27,7 +28,7 @@ __wt_optrack_record_funcid( __wt_spin_lock(session, &conn->optrack_map_spinlock); if (*func_idp == 0) { - *func_idp = ++conn->optrack_uid; + *func_idp = ++optrack_uid; WT_ERR(__wt_buf_fmt( session, tmp, "%" PRIu16 " %s\n", *func_idp, func)); diff --git a/src/third_party/wiredtiger/src/os_common/os_abort.c b/src/third_party/wiredtiger/src/os_common/os_abort.c index a725ad9151d..85dcc741855 100644 --- a/src/third_party/wiredtiger/src/os_common/os_abort.c +++ b/src/third_party/wiredtiger/src/os_common/os_abort.c @@ -24,7 +24,7 @@ __wt_abort(WT_SESSION_IMPL *session) /* Sleep forever, the debugger will interrupt us when it attaches. */ for (i = 0; i < WT_MILLION; ++i) - __wt_sleep(10, 0); + __wt_sleep(100, 0); #else __wt_errx(session, "aborting WiredTiger library"); #endif diff --git a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c index daaf55d65d2..b944bbda520 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c +++ b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c @@ -66,8 +66,10 @@ __wt_cond_wait_signal(WT_SESSION_IMPL *session, WT_CONDVAR *cond, /* Fast path if already signalled. */ *signalled = true; - if (__wt_atomic_addi32(&cond->waiters, 1) == 0) + if (__wt_atomic_addi32(&cond->waiters, 1) == 0) { + WT_TRACK_OP_END(session); return; + } __wt_verbose(session, WT_VERB_MUTEX, "wait %s", cond->name); WT_STAT_CONN_INCR(session, cond_wait); @@ -138,10 +140,10 @@ err: (void)__wt_atomic_subi32(&cond->waiters, 1); if (locked) WT_TRET(pthread_mutex_unlock(&cond->mtx)); - if (ret == 0) { - WT_TRACK_OP_END(session); + + WT_TRACK_OP_END(session); + if (ret == 0) return; - } WT_PANIC_MSG(session, ret, "pthread_cond_wait: %s", cond->name); } diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c index d9b415a76cd..044d31b8fbd 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_write.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c @@ -6155,10 +6155,14 @@ __rec_las_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r) __wt_las_cursor(session, &cursor, &session_flags); for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i) - if (multi->supd != NULL) + if (multi->supd != NULL) { WT_ERR(__wt_las_insert_block( session, cursor, r->page, multi, key)); + __wt_free(session, multi->supd); + multi->supd_entries = 0; + } + err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); __wt_scr_free(session, &key); @@ -6184,7 +6188,7 @@ __rec_las_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r) */ for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i) if (multi->supd != NULL && multi->page_las.las_pageid != 0) - WT_TRET(__wt_las_remove_block(session, NULL, + WT_TRET(__wt_las_remove_block(session, btree_id, multi->page_las.las_pageid)); return (ret); diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c index fd091cb5b13..be3a5d93473 100644 --- a/src/third_party/wiredtiger/src/session/session_api.c +++ b/src/third_party/wiredtiger/src/session/session_api.c @@ -109,6 +109,37 @@ __wt_session_release_resources(WT_SESSION_IMPL *session) } /* + * __session_clear_commit_queue -- + * We're about to clear the session and overwrite the txn structure. + * Remove ourselves from the commit timestamp queue if we're on it. + */ +static void +__session_clear_commit_queue(WT_SESSION_IMPL *session) +{ + WT_TXN *txn; + WT_TXN_GLOBAL *txn_global; + + txn = &session->txn; + txn_global = &S2C(session)->txn_global; + + if (!txn->clear_ts_queue) + return; + + __wt_writelock(session, &txn_global->commit_timestamp_rwlock); + /* + * Recheck after acquiring the lock. + */ + if (txn->clear_ts_queue) { + TAILQ_REMOVE( + &txn_global->commit_timestamph, txn, commit_timestampq); + --txn_global->commit_timestampq_len; + txn->clear_ts_queue = false; + } + __wt_writeunlock(session, &txn_global->commit_timestamp_rwlock); + +} + +/* * __session_clear -- * Clear a session structure. */ @@ -127,6 +158,7 @@ __session_clear(WT_SESSION_IMPL *session) * * For these reasons, be careful when clearing the session structure. */ + __session_clear_commit_queue(session); memset(session, 0, WT_SESSION_CLEAR_SIZE); WT_INIT_LSN(&session->bg_sync_lsn); @@ -1482,6 +1514,31 @@ err: API_END_RET(session, ret); } /* + * __session_prepare_transaction -- + * WT_SESSION->prepare_transaction method. + */ +static int +__session_prepare_transaction(WT_SESSION *wt_session, const char *config) +{ + WT_DECL_RET; + WT_SESSION_IMPL *session; + + session = (WT_SESSION_IMPL *)wt_session; + SESSION_API_CALL(session, prepare_transaction, config, cfg); + + WT_ERR(__wt_txn_context_check(session, true)); + + WT_TRET(__wt_txn_prepare(session, cfg)); + + /* + * Below code to be corrected as part of prepare functionality + * implementation, coded as below to avoid setting error to transaction. + */ + +err: API_END_RET_NO_TXN_ERROR(session, ret); +} + +/* * __session_rollback_transaction -- * WT_SESSION->rollback_transaction method. */ @@ -1825,6 +1882,7 @@ __open_session(WT_CONNECTION_IMPL *conn, __session_verify, __session_begin_transaction, __session_commit_transaction, + __session_prepare_transaction, __session_rollback_transaction, __session_timestamp_transaction, __session_checkpoint, @@ -1855,6 +1913,7 @@ __open_session(WT_CONNECTION_IMPL *conn, __session_verify, __session_begin_transaction, __session_commit_transaction, + __session_prepare_transaction, __session_rollback_transaction, __session_timestamp_transaction, __session_checkpoint_readonly, diff --git a/src/third_party/wiredtiger/src/support/err.c b/src/third_party/wiredtiger/src/support/err.c index c7cfa0c3932..0569d0545e6 100644 --- a/src/third_party/wiredtiger/src/support/err.c +++ b/src/third_party/wiredtiger/src/support/err.c @@ -169,11 +169,11 @@ __wt_event_handler_set(WT_SESSION_IMPL *session, WT_EVENT_HANDLER *handler) } while (0) /* - * __wt_eventv -- + * __eventv -- * Report a message to an event handler. */ -int -__wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, +static int +__eventv(WT_SESSION_IMPL *session, bool msg_event, int error, const char *file_name, int line_number, const char *fmt, va_list ap) WT_GCC_FUNC_ATTRIBUTE((cold)) { @@ -193,7 +193,7 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, * SECURITY: * Buffer placed at the end of the stack in case snprintf overflows. */ - char s[2048]; + char s[4 * 1024]; p = s; remain = sizeof(s); @@ -279,6 +279,17 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error, __handler_failure(session, ret, "error", true); } + /* + * The buffer is fixed sized, complain if we overflow. (The test is for + * no more bytes remaining in the buffer, so technically we might have + * filled it exactly.) Be cautious changing this code, it's a recursive + * call. + */ + if (ret == 0 && remain == 0) + __wt_err(session, ENOMEM, + "error or message truncated: internal WiredTiger buffer " + "too small"); + if (ret != 0) { err: if (fprintf(stderr, "WiredTiger Error%s%s: ", @@ -314,7 +325,7 @@ __wt_err(WT_SESSION_IMPL *session, int error, const char *fmt, ...) * an error value to return. */ va_start(ap, fmt); - WT_IGNORE_RET(__wt_eventv(session, false, error, NULL, 0, fmt, ap)); + WT_IGNORE_RET(__eventv(session, false, error, NULL, 0, fmt, ap)); va_end(ap); } @@ -334,7 +345,7 @@ __wt_errx(WT_SESSION_IMPL *session, const char *fmt, ...) * an error value to return. */ va_start(ap, fmt); - WT_IGNORE_RET(__wt_eventv(session, false, 0, NULL, 0, fmt, ap)); + WT_IGNORE_RET(__eventv(session, false, 0, NULL, 0, fmt, ap)); va_end(ap); } @@ -355,7 +366,7 @@ __wt_ext_err_printf( session = ((WT_CONNECTION_IMPL *)wt_api->conn)->default_session; va_start(ap, fmt); - ret = __wt_eventv(session, false, 0, NULL, 0, fmt, ap); + ret = __eventv(session, false, 0, NULL, 0, fmt, ap); va_end(ap); return (ret); } @@ -372,7 +383,7 @@ __wt_verbose_worker(WT_SESSION_IMPL *session, const char *fmt, ...) va_list ap; va_start(ap, fmt); - WT_IGNORE_RET(__wt_eventv(session, true, 0, NULL, 0, fmt, ap)); + WT_IGNORE_RET(__eventv(session, true, 0, NULL, 0, fmt, ap)); va_end(ap); } @@ -493,7 +504,7 @@ __wt_assert(WT_SESSION_IMPL *session, va_list ap; va_start(ap, fmt); - WT_IGNORE_RET(__wt_eventv( + WT_IGNORE_RET(__eventv( session, false, error, file_name, line_number, fmt, ap)); va_end(ap); diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index 926176d6024..97c22cd5031 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -1023,7 +1023,7 @@ static const char * const __stats_connection_desc[] = { "thread-yield: page reconciliation yielded due to child modification", "thread-yield: tree descend one level yielded for split page index update", "transaction: commit timestamp queue insert to empty", - "transaction: commit timestamp queue inserts to head", + "transaction: commit timestamp queue inserts to tail", "transaction: commit timestamp queue inserts total", "transaction: commit timestamp queue length", "transaction: number of named snapshots created", @@ -1403,7 +1403,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->child_modify_blocked_page = 0; stats->tree_descend_blocked = 0; stats->txn_commit_queue_empty = 0; - stats->txn_commit_queue_head = 0; + stats->txn_commit_queue_tail = 0; stats->txn_commit_queue_inserts = 0; stats->txn_commit_queue_len = 0; stats->txn_snapshots_created = 0; @@ -1909,8 +1909,8 @@ __wt_stat_connection_aggregate( to->tree_descend_blocked += WT_STAT_READ(from, tree_descend_blocked); to->txn_commit_queue_empty += WT_STAT_READ(from, txn_commit_queue_empty); - to->txn_commit_queue_head += - WT_STAT_READ(from, txn_commit_queue_head); + to->txn_commit_queue_tail += + WT_STAT_READ(from, txn_commit_queue_tail); to->txn_commit_queue_inserts += WT_STAT_READ(from, txn_commit_queue_inserts); to->txn_commit_queue_len += WT_STAT_READ(from, txn_commit_queue_len); diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index 5d70acb90f0..627bafa7483 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -449,6 +449,14 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_RET(__wt_txn_parse_timestamp(session, "read", &ts, &cval)); /* + * Prepare transactions are supported only in timestamp build. + */ + WT_RET(__wt_config_gets_def(session, + cfg, "ignore_prepare", 0, &cval)); + if (cval.val) + F_SET(txn, WT_TXN_IGNORE_PREPARE); + + /* * Read the configuration here to reduce the span of the * critical section. */ @@ -608,11 +616,12 @@ __wt_txn_release(WT_SESSION_IMPL *session) static inline int __txn_commit_timestamp_validate(WT_SESSION_IMPL *session) { + WT_DECL_TIMESTAMP(op_timestamp) WT_TXN *txn; WT_TXN_OP *op; WT_UPDATE *upd; u_int i; - bool op_used_ts, upd_used_ts; + bool op_zero_ts, upd_zero_ts; txn = &session->txn; @@ -644,10 +653,12 @@ __txn_commit_timestamp_validate(WT_SESSION_IMPL *session) if (op->type == WT_TXN_OP_BASIC_TS || op->type == WT_TXN_OP_BASIC) { /* - * Skip over any aborted update structures. + * Skip over any aborted update structures or ones + * from our own transaction. */ upd = op->u.upd->next; - while (upd != NULL && upd->txnid == WT_TXN_ABORTED) + while (upd != NULL && (upd->txnid == WT_TXN_ABORTED || + upd->txnid == txn->id)) upd = upd->next; /* @@ -660,16 +671,31 @@ __txn_commit_timestamp_validate(WT_SESSION_IMPL *session) /* * Check for consistent per-key timestamp usage. * If timestamps are or are not used originally then - * they should be used the same way always. Check - * timestamps are used in order. + * they should be used the same way always. For this + * transaction, timestamps are in use anytime the + * commit timestamp is set. + * Check timestamps are used in order. */ - op_used_ts = - __wt_timestamp_iszero(&op->u.upd->timestamp); - upd_used_ts = __wt_timestamp_iszero(&upd->timestamp); - if (op_used_ts != upd_used_ts) + op_zero_ts = !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT); + upd_zero_ts = __wt_timestamp_iszero(&upd->timestamp); + if (op_zero_ts != upd_zero_ts) WT_RET_MSG(session, EINVAL, "per-key timestamps used inconsistently"); - if (__wt_timestamp_cmp(&op->u.upd->timestamp, + /* + * If we aren't using timestamps for this transaction + * then we are done checking. Don't check the timestamp + * because the one in the transaction is not cleared. + */ + if (op_zero_ts) + continue; + op_timestamp = op->u.upd->timestamp; + /* + * Only if the update structure doesn't have a timestamp + * then use the one in the transaction structure. + */ + if (__wt_timestamp_iszero(&op->u.upd->timestamp)) + op_timestamp = txn->commit_timestamp; + if (__wt_timestamp_cmp(&op_timestamp, &upd->timestamp) < 0) WT_RET_MSG(session, EINVAL, "out of order timestamps"); @@ -941,6 +967,22 @@ err: /* } /* + * __wt_txn_prepare -- + * Prepare the current transaction. + */ +int +__wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[]) +{ + WT_UNUSED(cfg); + +#ifdef HAVE_TIMESTAMPS + WT_RET_MSG(session, ENOTSUP, "prepare_transaction is not supported"); +#else + WT_RET_MSG(session, ENOTSUP, "prepare_transaction requires a version " + "of WiredTiger built with timestamp support"); +#endif +} +/* * __wt_txn_rollback -- * Roll back the current transaction. */ diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index 0af70c4090d..d2d07b9e6d7 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -22,13 +22,13 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session) WT_DECL_TIMESTAMP(rollback_timestamp) WT_ITEM las_key, las_timestamp, las_value; WT_TXN_GLOBAL *txn_global; - uint64_t las_counter, las_pageid, las_total, las_txnid; + uint64_t las_counter, las_pageid, las_total, las_txnid, remove_cnt; uint32_t las_id, session_flags; uint8_t upd_type; conn = S2C(session); cursor = NULL; - las_total = 0; + las_total = remove_cnt = 0; session_flags = 0; /* [-Werror=maybe-uninitialized] */ WT_CLEAR(las_timestamp); @@ -49,6 +49,7 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session) F_SET(session, WT_SESSION_READ_WONT_NEED); /* Walk the file. */ + __wt_writelock(session, &conn->cache->las_sweepwalk_lock); while ((ret = cursor->next(cursor)) == 0) { WT_ERR(cursor->get_key(cursor, &las_pageid, &las_id, &las_counter, &las_key)); @@ -70,13 +71,17 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session) * be removed. */ if (__wt_timestamp_cmp( - &rollback_timestamp, las_timestamp.data) < 0) + &rollback_timestamp, las_timestamp.data) < 0) { WT_ERR(cursor->remove(cursor)); - else + ++remove_cnt; + } else ++las_total; } WT_ERR_NOTFOUND_OK(ret); -err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); +err: __wt_writeunlock(session, &conn->cache->las_sweepwalk_lock); + WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags)); + __wt_cache_decr_check_uint64(session, + &conn->cache->las_entry_count, remove_cnt, "lookaside entry count"); WT_STAT_CONN_SET(session, cache_lookaside_entries, las_total); F_CLR(session, WT_SESSION_READ_WONT_NEED); @@ -424,8 +429,14 @@ __wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[]) * Mark that a rollback operation is in progress and wait for eviction * to drain. This is necessary because lookaside eviction uses * transactions and causes the check for a quiescent system to fail. + * + * Configuring lookaside eviction off isn't atomic, safe because the + * flag is only otherwise set when closing down the database. Assert + * to avoid confusion in the future. */ + WT_ASSERT(session, !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE)); F_SET(conn, WT_CONN_EVICTION_NO_LOOKASIDE); + WT_ERR(__wt_conn_btree_apply(session, NULL, __txn_rollback_eviction_drain, NULL, cfg)); diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c index 41ac970f14e..d07bfecd47c 100644 --- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c +++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c @@ -212,11 +212,20 @@ __txn_global_query_timestamp( /* Compare with the oldest running transaction. */ __wt_readlock(session, &txn_global->commit_timestamp_rwlock); - txn = TAILQ_FIRST(&txn_global->commit_timestamph); - if (txn != NULL && - __wt_timestamp_cmp(&txn->first_commit_timestamp, &ts) < 0) { - __wt_timestamp_set(&ts, &txn->first_commit_timestamp); - WT_ASSERT(session, !__wt_timestamp_iszero(&ts)); + TAILQ_FOREACH(txn, &txn_global->commit_timestamph, + commit_timestampq) { + if (txn->clear_ts_queue) + continue; + /* + * Compare on the first real running transaction. + */ + if (__wt_timestamp_cmp( + &txn->first_commit_timestamp, &ts) < 0) { + __wt_timestamp_set( + &ts, &txn->first_commit_timestamp); + WT_ASSERT(session, !__wt_timestamp_iszero(&ts)); + } + break; } __wt_readunlock(session, &txn_global->commit_timestamp_rwlock); } else if (WT_STRING_MATCH("oldest", cval.str, cval.len)) { @@ -652,7 +661,7 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) void __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session) { - WT_TXN *prev, *txn; + WT_TXN *qtxn, *txn, *txn_tmp; WT_TXN_GLOBAL *txn_global; wt_timestamp_t ts; @@ -668,27 +677,65 @@ __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session) * fixed. */ __wt_timestamp_set(&ts, &txn->commit_timestamp); - __wt_timestamp_set(&txn->first_commit_timestamp, &ts); __wt_writelock(session, &txn_global->commit_timestamp_rwlock); - prev = TAILQ_LAST(&txn_global->commit_timestamph, __wt_txn_cts_qh); - if (prev == NULL) - WT_STAT_CONN_INCR(session, txn_commit_queue_empty); - for (; prev != NULL && - __wt_timestamp_cmp(&prev->first_commit_timestamp, &ts) > 0; - prev = TAILQ_PREV(prev, __wt_txn_cts_qh, commit_timestampq)) - ; - if (prev == NULL) { + /* + * If our transaction is on the queue remove it first. The timestamp + * may move earlier so we otherwise might not remove ourselves before + * finding where to insert ourselves (which would result in a list + * loop) and we don't want to walk more of the list than needed. + */ + if (txn->clear_ts_queue) { + TAILQ_REMOVE(&txn_global->commit_timestamph, + txn, commit_timestampq); + WT_PUBLISH(txn->clear_ts_queue, false); + --txn_global->commit_timestampq_len; + } + /* + * Walk the list to look for where to insert our own transaction + * and remove any transactions that are not active. We stop when + * we get to the location where we want to insert. + */ + if (TAILQ_EMPTY(&txn_global->commit_timestamph)) { TAILQ_INSERT_HEAD( &txn_global->commit_timestamph, txn, commit_timestampq); - WT_STAT_CONN_INCR(session, txn_commit_queue_head); - } else - TAILQ_INSERT_AFTER(&txn_global->commit_timestamph, - prev, txn, commit_timestampq); + WT_STAT_CONN_INCR(session, txn_commit_queue_empty); + } else { + TAILQ_FOREACH_SAFE(qtxn, &txn_global->commit_timestamph, + commit_timestampq, txn_tmp) { + if (qtxn->clear_ts_queue) { + TAILQ_REMOVE(&txn_global->commit_timestamph, + qtxn, commit_timestampq); + WT_PUBLISH(qtxn->clear_ts_queue, false); + --txn_global->commit_timestampq_len; + continue; + } + /* + * Only walk the list up until we get to the place where + * we want to insert our timestamp. Some other thread + * will remove any later transactions. + */ + if (__wt_timestamp_cmp( + &qtxn->first_commit_timestamp, &ts) > 0) + break; + } + /* + * If we got to the end, then our timestamp is larger than + * the last element's timestamp. Insert at the end. + */ + if (qtxn == NULL) { + TAILQ_INSERT_TAIL(&txn_global->commit_timestamph, + txn, commit_timestampq); + WT_STAT_CONN_INCR(session, txn_commit_queue_tail); + } else + TAILQ_INSERT_BEFORE(qtxn, txn, commit_timestampq); + } + __wt_timestamp_set(&txn->first_commit_timestamp, &ts); ++txn_global->commit_timestampq_len; WT_STAT_CONN_INCR(session, txn_commit_queue_inserts); - __wt_writeunlock(session, &txn_global->commit_timestamp_rwlock); + txn->clear_ts_queue = false; F_SET(txn, WT_TXN_HAS_TS_COMMIT | WT_TXN_PUBLIC_TS_COMMIT); + __wt_writeunlock(session, &txn_global->commit_timestamp_rwlock); } /* @@ -699,19 +746,22 @@ void __wt_txn_clear_commit_timestamp(WT_SESSION_IMPL *session) { WT_TXN *txn; - WT_TXN_GLOBAL *txn_global; + uint32_t flags; txn = &session->txn; - txn_global = &S2C(session)->txn_global; if (!F_ISSET(txn, WT_TXN_PUBLIC_TS_COMMIT)) return; + flags = txn->flags; + LF_CLR(WT_TXN_PUBLIC_TS_COMMIT); - __wt_writelock(session, &txn_global->commit_timestamp_rwlock); - TAILQ_REMOVE(&txn_global->commit_timestamph, txn, commit_timestampq); - --txn_global->commit_timestampq_len; - __wt_writeunlock(session, &txn_global->commit_timestamp_rwlock); - F_CLR(txn, WT_TXN_PUBLIC_TS_COMMIT); + /* + * Notify other threads that our transaction is inactive and can be + * cleaned up safely from the commit timestamp queue whenever the next + * thread walks the queue. We do not need to remove it now. + */ + WT_PUBLISH(txn->clear_ts_queue, true); + WT_PUBLISH(txn->flags, flags); } /* diff --git a/src/third_party/wiredtiger/test/csuite/rwlock/main.c b/src/third_party/wiredtiger/test/csuite/rwlock/main.c index 2b4e9144fe4..e1d00344ee2 100644 --- a/src/third_party/wiredtiger/test/csuite/rwlock/main.c +++ b/src/third_party/wiredtiger/test/csuite/rwlock/main.c @@ -50,8 +50,8 @@ void *thread_dump(void *); int main(int argc, char *argv[]) { - TEST_OPTS *opts, _opts; struct timespec te, ts; + TEST_OPTS *opts, _opts; pthread_t dump_id, id[MAX_THREADS]; int i; diff --git a/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c b/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c index 85a581d73c9..3e64b86599d 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2447_join_main_table/main.c @@ -58,8 +58,8 @@ get_stat_total(WT_SESSION *session, WT_CURSOR *jcursor, const char *descmatch, WT_CURSOR *statcursor; uint64_t val; int ret; - bool match; char *desc, *valstr; + bool match; match = false; *pval = 0; @@ -91,8 +91,8 @@ main(int argc, char *argv[]) WT_SESSION *session; uint64_t maincount; int half, i, j; - const char *tablename; char bloom_cfg[128], index1uri[256], index2uri[256], joinuri[256]; + const char *tablename; opts = &_opts; memset(opts, 0, sizeof(*opts)); diff --git a/src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c b/src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c index 8a130f8d958..39c0cbdbce8 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2695_checksum/main.c @@ -48,8 +48,8 @@ main(int argc, char *argv[]) WT_RAND_STATE rnd; size_t len; uint32_t hw, sw; - u_int i, j; uint8_t *data; + u_int i, j; opts = &_opts; memset(opts, 0, sizeof(*opts)); diff --git a/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c b/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c index de05c459e10..58df56b50b1 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2834_join_bloom_fix/main.c @@ -48,16 +48,16 @@ int main(int argc, char *argv[]) { TEST_OPTS *opts, _opts; - WT_CURSOR *maincur; WT_CURSOR *balancecur, *flagcur, *joincur, *postcur; + WT_CURSOR *maincur; WT_SESSION *session; int balance, count, flag, key, key2, post, ret; - char cfg[128]; - const char *tablename; - char posturi[256]; char balanceuri[256]; + char cfg[128]; char flaguri[256]; char joinuri[256]; + char posturi[256]; + const char *tablename; /* Ignore unless requested */ if (!testutil_is_flag_set("TESTUTIL_ENABLE_LONG_TESTS")) @@ -177,10 +177,10 @@ void populate(TEST_OPTS *opts) { WT_CURSOR *maincur; + WT_RAND_STATE rnd; WT_SESSION *session; uint32_t key; int balance, i, flag, post; - WT_RAND_STATE rnd; __wt_random_init_seed(NULL, &rnd); diff --git a/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c b/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c index fd713e50ba0..6c463297e93 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2909_checkpoint_integrity/main.c @@ -477,10 +477,10 @@ static WT_EVENT_HANDLER event_handler = { static void subtest_main(int argc, char *argv[], bool close_test) { + struct rlimit rlim; TEST_OPTS *opts, _opts; WT_SESSION *session; char config[1024], filename[1024]; - struct rlimit rlim; opts = &_opts; memset(opts, 0, sizeof(*opts)); diff --git a/src/third_party/wiredtiger/test/csuite/wt2999_join_extractor/main.c b/src/third_party/wiredtiger/test/csuite/wt2999_join_extractor/main.c index 502c0d05a31..cf6b931c027 100644 --- a/src/third_party/wiredtiger/test/csuite/wt2999_join_extractor/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt2999_join_extractor/main.c @@ -90,8 +90,8 @@ main(int argc, char *argv[]) WT_CURSOR *cursor1, *cursor2, *jcursor; WT_ITEM k, v; WT_SESSION *session; - int i, ret; int32_t key, val[2]; + int i, ret; opts = &_opts; memset(opts, 0, sizeof(*opts)); diff --git a/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c b/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c index 82d8cae5d04..4a541c31e3a 100644 --- a/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt3120_filesys/main.c @@ -44,8 +44,8 @@ main(int argc, char *argv[]) TEST_OPTS *opts, _opts; WT_CURSOR *cursor; WT_SESSION *session; - char *kstr, *vstr; char buf[1024]; + char *kstr, *vstr; opts = &_opts; memset(opts, 0, sizeof(*opts)); diff --git a/src/third_party/wiredtiger/test/csuite/wt3874_pad_byte_collator/main.c b/src/third_party/wiredtiger/test/csuite/wt3874_pad_byte_collator/main.c index c985b1f8f32..cb8e1de5d6e 100644 --- a/src/third_party/wiredtiger/test/csuite/wt3874_pad_byte_collator/main.c +++ b/src/third_party/wiredtiger/test/csuite/wt3874_pad_byte_collator/main.c @@ -38,18 +38,11 @@ * account the collator. */ -#include <assert.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> - -#include <wiredtiger.h> - #define KEY_SIZE 20 static int my_compare(WT_COLLATOR *collator, WT_SESSION *session, - const WT_ITEM *v1, const WT_ITEM *v2, int *cmp) + const WT_ITEM *v1, const WT_ITEM *v2, int *cmp) { (void)collator; (void)session; @@ -67,10 +60,10 @@ main(int argc, char *argv[]) { TEST_OPTS *opts, _opts; WT_CONNECTION *conn; - WT_SESSION *session; WT_CURSOR *cursor; - char buf[KEY_SIZE]; WT_ITEM key; + WT_SESSION *session; + char buf[KEY_SIZE]; opts = &_opts; memset(opts, 0, sizeof(*opts)); diff --git a/src/third_party/wiredtiger/test/format/bdb.c b/src/third_party/wiredtiger/test/format/bdb.c index a3a9ad3a62f..adf32713cd2 100644 --- a/src/third_party/wiredtiger/test/format/bdb.c +++ b/src/third_party/wiredtiger/test/format/bdb.c @@ -116,7 +116,7 @@ bdb_insert( } void -bdb_np(int next, +bdb_np(bool next, void *keyp, size_t *keysizep, void *valuep, size_t *valuesizep, int *notfoundp) { diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h index 9191a73a134..a80c7de5c92 100644 --- a/src/third_party/wiredtiger/test/format/format.h +++ b/src/third_party/wiredtiger/test/format/format.h @@ -278,6 +278,10 @@ typedef struct { uint64_t remove; uint64_t ops; + uint64_t keyno; /* current key, value */ + WT_ITEM *key, _key; + WT_ITEM *value, _value; + #define TINFO_RUNNING 1 /* Running */ #define TINFO_COMPLETE 2 /* Finished */ #define TINFO_JOINED 3 /* Resolved */ @@ -287,7 +291,7 @@ typedef struct { #ifdef HAVE_BERKELEY_DB void bdb_close(void); void bdb_insert(const void *, size_t, const void *, size_t); -void bdb_np(int, void *, size_t *, void *, size_t *, int *); +void bdb_np(bool, void *, size_t *, void *, size_t *, int *); void bdb_open(void); void bdb_read(uint64_t, void *, size_t *, int *); void bdb_remove(uint64_t, int *); @@ -314,7 +318,7 @@ WT_THREAD_RET lrt(void *); void path_setup(const char *); void print_item(const char *, WT_ITEM *); void print_item_data(const char *, const uint8_t *, size_t); -int read_row(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t); +int read_row_worker(WT_CURSOR *, uint64_t, WT_ITEM *, WT_ITEM *, bool); uint32_t rng(WT_RAND_STATE *); WT_THREAD_RET timestamp(void *); void track(const char *, uint64_t, TINFO *); diff --git a/src/third_party/wiredtiger/test/format/lrt.c b/src/third_party/wiredtiger/test/format/lrt.c index fdf91508dd6..9d99933ef64 100644 --- a/src/third_party/wiredtiger/test/format/lrt.c +++ b/src/third_party/wiredtiger/test/format/lrt.c @@ -71,12 +71,12 @@ lrt(void *arg) for (pinned = 0;;) { if (pinned) { /* Re-read the record at the end of the table. */ - while ((ret = read_row( - cursor, &key, &value, saved_keyno)) == WT_ROLLBACK) + while ((ret = read_row_worker(cursor, + saved_keyno, &key, &value, false)) == WT_ROLLBACK) ; if (ret != 0) testutil_die(ret, - "read_row %" PRIu64, saved_keyno); + "read_row_worker %" PRIu64, saved_keyno); /* Compare the previous value with the current one. */ if (g.type == FIX) { @@ -131,13 +131,14 @@ lrt(void *arg) saved_keyno = mmrand(NULL, (u_int)(g.key_cnt - g.key_cnt / 10), (u_int)g.key_cnt); - while ((ret = read_row(cursor, - &key, &value, saved_keyno)) == WT_ROLLBACK) + while ((ret = read_row_worker(cursor, + saved_keyno, + &key, &value, false)) == WT_ROLLBACK) ; } while (ret == WT_NOTFOUND); if (ret != 0) testutil_die(ret, - "read_row %" PRIu64, saved_keyno); + "read_row_worker %" PRIu64, saved_keyno); /* Copy the cursor's value. */ if (g.type == FIX) { @@ -160,12 +161,13 @@ lrt(void *arg) */ do { keyno = mmrand(NULL, 1, (u_int)g.key_cnt / 5); - while ((ret = read_row(cursor, - &key, &value, keyno)) == WT_ROLLBACK) + while ((ret = read_row_worker(cursor, + keyno, &key, &value, false)) == WT_ROLLBACK) ; } while (ret == WT_NOTFOUND); if (ret != 0) - testutil_die(ret, "read_row %" PRIu64, keyno); + testutil_die(ret, + "read_row_worker %" PRIu64, keyno); pinned = 1; } diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c index 6ac2f10af95..bc11c2ba8f8 100644 --- a/src/third_party/wiredtiger/test/format/ops.c +++ b/src/third_party/wiredtiger/test/format/ops.c @@ -28,23 +28,19 @@ #include "format.h" -static int col_insert(TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *); -static int col_modify( - TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); -static int col_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool); -static int col_reserve(WT_CURSOR *, uint64_t, bool); -static int col_update( - TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); -static int nextprev(WT_CURSOR *, int); +static int col_insert(TINFO *, WT_CURSOR *); +static int col_modify(TINFO *, WT_CURSOR *, bool); +static int col_remove(TINFO *, WT_CURSOR *, bool); +static int col_reserve(TINFO *, WT_CURSOR *, bool); +static int col_update(TINFO *, WT_CURSOR *, bool); +static int nextprev(TINFO *, WT_CURSOR *, bool); static WT_THREAD_RET ops(void *); -static int row_insert( - TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); -static int row_modify( - TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); -static int row_remove(WT_CURSOR *, WT_ITEM *, uint64_t, bool); -static int row_reserve(WT_CURSOR *, WT_ITEM *, uint64_t, bool); -static int row_update( - TINFO *, WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, bool); +static int read_row(TINFO *, WT_CURSOR *); +static int row_insert(TINFO *, WT_CURSOR *, bool); +static int row_modify(TINFO *, WT_CURSOR *, bool); +static int row_remove(TINFO *, WT_CURSOR *, bool); +static int row_reserve(TINFO *, WT_CURSOR *, bool); +static int row_update(TINFO *, WT_CURSOR *, bool); static void table_append_init(void); #ifdef HAVE_BERKELEY_DB @@ -273,8 +269,10 @@ wts_ops(int lastrun) free(tinfo_list); } +typedef enum { INSERT, MODIFY, READ, REMOVE, UPDATE } thread_op; typedef struct { - uint64_t keyno; /* Row number */ + thread_op op; /* Operation */ + uint64_t keyno; /* Row number */ void *kdata; /* If an insert, the generated key */ size_t ksize; @@ -283,37 +281,36 @@ typedef struct { void *vdata; /* If not a delete, the value */ size_t vsize; size_t vmemsize; - - bool deleted; /* Delete operation */ - bool insert; /* Insert operation */ } SNAP_OPS; -#define SNAP_TRACK \ - (snap != NULL && (size_t)(snap - snap_list) < WT_ELEMENTS(snap_list)) +#define SNAP_TRACK(op, keyno, key, value) do { \ + if (snap != NULL && \ + (size_t)(snap - snap_list) < WT_ELEMENTS(snap_list)) \ + snap_track(snap++, op, keyno, key, value); \ +} while (0) /* * snap_track -- * Add a single snapshot isolation returned value to the list. */ static void -snap_track(SNAP_OPS *snap, uint64_t keyno, WT_ITEM *key, WT_ITEM *value) +snap_track( + SNAP_OPS *snap, thread_op op, uint64_t keyno, WT_ITEM *key, WT_ITEM *value) { + snap->op = op; snap->keyno = keyno; - if (key == NULL) - snap->insert = false; - else { - snap->insert = true; + testutil_assert(key == NULL || (op == INSERT && g.type == ROW)); + if (key != NULL) { if (snap->kmemsize < key->size) { snap->kdata = drealloc(snap->kdata, key->size); snap->kmemsize = key->size; } memcpy(snap->kdata, key->data, snap->ksize = key->size); } - if (value == NULL) - snap->deleted = true; - else { - snap->deleted = false; + + testutil_assert(value != NULL || op == REMOVE); + if (value != NULL) { if (snap->vmemsize < value->size) { snap->vdata = drealloc(snap->vdata, value->size); snap->vmemsize = value->size; @@ -346,7 +343,11 @@ snap_check(WT_CURSOR *cursor, * unique generated key we saved, else generate the key from the * key number. */ - if (start->insert == 0) { + if (start->op == INSERT && g.type == ROW) { + key->data = start->kdata; + key->size = start->ksize; + cursor->set_key(cursor, key); + } else { switch (g.type) { case FIX: case VAR: @@ -357,10 +358,6 @@ snap_check(WT_CURSOR *cursor, cursor->set_key(cursor, key); break; } - } else { - key->data = start->kdata; - key->size = start->ksize; - cursor->set_key(cursor, key); } if ((ret = cursor->search(cursor)) == 0) { if (g.type == FIX) { @@ -376,11 +373,11 @@ snap_check(WT_CURSOR *cursor, return (ret); /* Check for simple matches. */ - if (ret == 0 && !start->deleted && + if (ret == 0 && start->op != REMOVE && value->size == start->vsize && memcmp(value->data, start->vdata, value->size) == 0) continue; - if (ret == WT_NOTFOUND && start->deleted) + if (ret == WT_NOTFOUND && start->op == REMOVE) continue; /* @@ -392,7 +389,7 @@ snap_check(WT_CURSOR *cursor, if (ret == WT_NOTFOUND && start->vsize == 1 && *(uint8_t *)start->vdata == 0) continue; - if (start->deleted && + if (start->op == REMOVE && value->size == 1 && *(uint8_t *)value->data == 0) continue; } @@ -404,7 +401,7 @@ snap_check(WT_CURSOR *cursor, "snapshot-isolation: %" PRIu64 " search: " "expected {0x%02x}, found {0x%02x}", start->keyno, - start->deleted ? 0 : *(uint8_t *)start->vdata, + start->op == REMOVE ? 0 : *(uint8_t *)start->vdata, ret == WT_NOTFOUND ? 0 : *(uint8_t *)value->data); /* NOTREACHED */ case ROW: @@ -412,7 +409,7 @@ snap_check(WT_CURSOR *cursor, "snapshot-isolation %.*s search mismatch\n", (int)key->size, (const char *)key->data); - if (start->deleted) + if (start->op == REMOVE) fprintf(stderr, "expected {deleted}\n"); else print_item_data( @@ -432,7 +429,7 @@ snap_check(WT_CURSOR *cursor, "snapshot-isolation %" PRIu64 " search mismatch\n", start->keyno); - if (start->deleted) + if (start->op == REMOVE) fprintf(stderr, "expected {deleted}\n"); else print_item_data( @@ -547,19 +544,17 @@ commit_transaction(TINFO *tinfo, WT_SESSION *session) static WT_THREAD_RET ops(void *arg) { - enum { INSERT, MODIFY, READ, REMOVE, UPDATE } op; - SNAP_OPS *snap, snap_list[64]; + SNAP_OPS *snap, snap_list[128]; TINFO *tinfo; WT_CONNECTION *conn; WT_CURSOR *cursor; WT_DECL_RET; - WT_ITEM *key, _key, *value, _value; WT_SESSION *session; - uint64_t keyno, reset_op, session_op; + thread_op op; + uint64_t reset_op, session_op; uint32_t rnd; - u_int i, iso_config; - int dir; - bool intxn, positioned, readonly; + u_int i, j, iso_config; + bool intxn, next, positioned, readonly; tinfo = arg; @@ -572,10 +567,10 @@ ops(void *arg) memset(snap_list, 0, sizeof(snap_list)); /* Set up the default key and value buffers. */ - key = &_key; - key_gen_init(key); - value = &_value; - val_gen_init(value); + tinfo->key = &tinfo->_key; + key_gen_init(tinfo->key); + tinfo->value = &tinfo->_value; + val_gen_init(tinfo->value); /* Set the first operation where we'll create sessions and cursors. */ cursor = NULL; @@ -682,8 +677,7 @@ ops(void *arg) } /* Select a row. */ - keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows); - positioned = false; + tinfo->keyno = mmrand(&tinfo->rnd, 1, (u_int)g.rows); /* Select an operation. */ op = READ; @@ -710,13 +704,12 @@ ops(void *arg) positioned = false; if (op != READ && mmrand(&tinfo->rnd, 1, 5) == 1) { ++tinfo->search; - ret = read_row(cursor, key, value, keyno); + ret = read_row(tinfo, cursor); if (ret == 0) { positioned = true; - if (SNAP_TRACK) - snap_track(snap++, keyno, NULL, value); + SNAP_TRACK( + READ, tinfo->keyno, NULL, tinfo->value); } else { - positioned = false; if (ret == WT_ROLLBACK && intxn) goto deadlock; testutil_assert(ret == WT_NOTFOUND); @@ -727,12 +720,11 @@ ops(void *arg) if (!readonly && intxn && mmrand(&tinfo->rnd, 0, 20) == 1) { switch (g.type) { case ROW: - ret = - row_reserve(cursor, key, keyno, positioned); + ret = row_reserve(tinfo, cursor, positioned); break; case FIX: case VAR: - ret = col_reserve(cursor, keyno, positioned); + ret = col_reserve(tinfo, cursor, positioned); break; } if (ret == 0) { @@ -751,8 +743,7 @@ ops(void *arg) case INSERT: switch (g.type) { case ROW: - ret = row_insert(tinfo, - cursor, key, value, keyno, positioned); + ret = row_insert(tinfo, cursor, positioned); break; case FIX: case VAR: @@ -764,8 +755,7 @@ ops(void *arg) if (g.append_cnt >= g.append_max) goto update_instead_of_chosen_op; - ret = col_insert( - tinfo, cursor, key, value, &keyno); + ret = col_insert(tinfo, cursor); break; } @@ -773,9 +763,9 @@ ops(void *arg) positioned = false; if (ret == 0) { ++tinfo->insert; - if (SNAP_TRACK) - snap_track(snap++, keyno, - g.type == ROW ? key : NULL, value); + SNAP_TRACK(INSERT, tinfo->keyno, + g.type == ROW ? tinfo->key : NULL, + tinfo->value); } else { if (ret == WT_ROLLBACK && intxn) goto deadlock; @@ -793,18 +783,16 @@ ops(void *arg) ++tinfo->update; switch (g.type) { case ROW: - ret = row_modify(tinfo, cursor, - key, value, keyno, positioned); + ret = row_modify(tinfo, cursor, positioned); break; case VAR: - ret = col_modify(tinfo, cursor, - key, value, keyno, positioned); + ret = col_modify(tinfo, cursor, positioned); break; } if (ret == 0) { positioned = true; - if (SNAP_TRACK) - snap_track(snap++, keyno, NULL, value); + SNAP_TRACK( + MODIFY, tinfo->keyno, NULL, tinfo->value); } else { positioned = false; if (ret == WT_ROLLBACK && intxn) @@ -815,11 +803,11 @@ ops(void *arg) break; case READ: ++tinfo->search; - ret = read_row(cursor, key, value, keyno); + ret = read_row(tinfo, cursor); if (ret == 0) { positioned = true; - if (SNAP_TRACK) - snap_track(snap++, keyno, NULL, value); + SNAP_TRACK( + READ, tinfo->keyno, NULL, tinfo->value); } else { positioned = false; if (ret == WT_ROLLBACK && intxn) @@ -831,12 +819,12 @@ ops(void *arg) switch (g.type) { case ROW: ret = - row_remove(cursor, key, keyno, positioned); + row_remove(tinfo, cursor, positioned); break; case FIX: case VAR: ret = - col_remove(cursor, key, keyno, positioned); + col_remove(tinfo, cursor, positioned); break; } if (ret == 0) { @@ -845,8 +833,7 @@ ops(void *arg) * Don't set positioned: it's unchanged from the * previous state, but not necessarily set. */ - if (SNAP_TRACK) - snap_track(snap++, keyno, NULL, NULL); + SNAP_TRACK(REMOVE, tinfo->keyno, NULL, NULL); } else { positioned = false; if (ret == WT_ROLLBACK && intxn) @@ -859,19 +846,17 @@ update_instead_of_chosen_op: ++tinfo->update; switch (g.type) { case ROW: - ret = row_update(tinfo, cursor, - key, value, keyno, positioned); + ret = row_update(tinfo, cursor, positioned); break; case FIX: case VAR: - ret = col_update(tinfo, cursor, - key, value, keyno, positioned); + ret = col_update(tinfo, cursor, positioned); break; } if (ret == 0) { positioned = true; - if (SNAP_TRACK) - snap_track(snap++, keyno, NULL, value); + SNAP_TRACK( + UPDATE, tinfo->keyno, NULL, tinfo->value); } else { positioned = false; if (ret == WT_ROLLBACK && intxn) @@ -887,9 +872,10 @@ update_instead_of_chosen_op: * a random direction. */ if (positioned) { - dir = (int)mmrand(&tinfo->rnd, 0, 1); - for (i = 0; i < mmrand(&tinfo->rnd, 1, 100); ++i) { - if ((ret = nextprev(cursor, dir)) == 0) + next = mmrand(&tinfo->rnd, 0, 1) == 1; + j = mmrand(&tinfo->rnd, 1, 100); + for (i = 0; i < j; ++i) { + if ((ret = nextprev(tinfo, cursor, next)) == 0) continue; if (ret == WT_ROLLBACK && intxn) goto deadlock; @@ -912,9 +898,13 @@ update_instead_of_chosen_op: * Ending the transaction. If in snapshot isolation, repeat the * operations and confirm they're unchanged. */ - if (snap != NULL && (ret = snap_check( - cursor, snap_list, snap, key, value)) == WT_ROLLBACK) - goto deadlock; + if (snap != NULL) { + ret = snap_check( + cursor, snap_list, snap, tinfo->key, tinfo->value); + testutil_assert(ret == 0 || ret == WT_ROLLBACK); + if (ret == WT_ROLLBACK) + goto deadlock; + } /* * If we're in a transaction, commit 40% of the time and @@ -945,8 +935,8 @@ deadlock: ++tinfo->deadlock; free(snap_list[i].kdata); free(snap_list[i].vdata); } - key_gen_teardown(key); - val_gen_teardown(value); + key_gen_teardown(tinfo->key); + val_gen_teardown(tinfo->value); tinfo->state = TINFO_COMPLETE; return (WT_THREAD_RET_VALUE); @@ -993,7 +983,8 @@ wts_read_scan(void) last_keyno = keyno; } - switch (ret = read_row(cursor, &key, &value, keyno)) { + switch (ret = read_row_worker( + cursor, keyno, &key, &value, false)) { case 0: case WT_NOTFOUND: case WT_ROLLBACK: @@ -1011,13 +1002,13 @@ wts_read_scan(void) } /* - * read_row -- + * read_row_worker -- * Read and verify a single element in a row- or column-store file. */ int -read_row(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) +read_row_worker( + WT_CURSOR *cursor, uint64_t keyno, WT_ITEM *key, WT_ITEM *value, bool sn) { - static int sn = 0; WT_SESSION *session; uint8_t bitfield; int exact, ret; @@ -1045,11 +1036,8 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) ret = cursor->search_near(cursor, &exact); if (ret == 0 && exact != 0) ret = WT_NOTFOUND; - sn = 0; - } else { + } else ret = cursor->search(cursor); - sn = 1; - } switch (ret) { case 0: if (g.type == FIX) { @@ -1062,13 +1050,14 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) case WT_NOTFOUND: /* * In fixed length stores, zero values at the end of the key - * space are returned as not found. Treat this the same as + * space are returned as not-found. Treat this the same as * a zero value in the key space, to match BDB's behavior. + * The WiredTiger cursor has lost its position though, so + * we return not-found, the cursor movement can't continue. */ if (g.type == FIX) { *(uint8_t *)(value->data) = 0; value->size = 1; - ret = 0; } break; case WT_ROLLBACK: @@ -1107,20 +1096,34 @@ read_row(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno) } /* + * read_row -- + * Read and verify a single element in a row- or column-store file. + */ +static int +read_row(TINFO *tinfo, WT_CURSOR *cursor) +{ + /* 25% of the time we call search-near. */ + return (read_row_worker(cursor, tinfo->keyno, + tinfo->key, tinfo->value, mmrand(&tinfo->rnd, 0, 3) == 1)); +} + +/* * nextprev -- * Read and verify the next/prev element in a row- or column-store file. */ static int -nextprev(WT_CURSOR *cursor, int next) +nextprev(TINFO *tinfo, WT_CURSOR *cursor, bool next) { WT_DECL_RET; WT_ITEM key, value; uint64_t keyno; uint8_t bitfield; + int cmp; const char *which; + bool incrementing; keyno = 0; - which = next ? "next" : "prev"; + which = next ? "WT_CURSOR.next" : "WT_CURSOR.prev"; switch (ret = (next ? cursor->next(cursor) : cursor->prev(cursor))) { case 0: @@ -1143,6 +1146,57 @@ nextprev(WT_CURSOR *cursor, int next) } if (ret != 0) testutil_die(ret, "nextprev: get_key/get_value"); + + /* Check that keys are never returned out-of-order. */ + /* + * XXX + * WT-3889 + * LSM has a bug that prevents cursor order checks from + * working, skip the test for now. + */ + if (DATASOURCE("lsm")) + break; + + switch (g.type) { + case FIX: + case VAR: + testutil_assertfmt( + !next || tinfo->keyno < keyno, + "%s returned %" PRIu64 " then %" PRIu64, + which, tinfo->keyno, keyno); + testutil_assertfmt( + next || tinfo->keyno > keyno, + "%s returned %" PRIu64 " then %" PRIu64, + which, tinfo->keyno, keyno); + + tinfo->keyno = keyno; + break; + case ROW: + cmp = memcmp(tinfo->key->data, key.data, + WT_MIN(tinfo->key->size, key.size)); + incrementing = + (next && !g.c_reverse) || (!next && g.c_reverse); + testutil_assertfmt( + !incrementing || + cmp < 0 || + (cmp == 0 && tinfo->key->size < key.size), + "%s returned {%.*s} then {%.*s}", + which, + (int)tinfo->key->size, tinfo->key->data, + (int)key.size, key.data); + testutil_assertfmt( + incrementing || + cmp > 0 || + (cmp == 0 && tinfo->key->size > key.size), + "%s returned {%.*s} then {%.*s}", + which, + (int)tinfo->key->size, tinfo->key->data, + (int)key.size, key.data); + + testutil_check(__wt_buf_set((WT_SESSION_IMPL *) + cursor->session, tinfo->key, key.data, key.size)); + break; + } break; case WT_NOTFOUND: break; @@ -1225,18 +1279,19 @@ mismatch: if (g.type == ROW) { * Reserve a row in a row-store file. */ static int -row_reserve(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) +row_reserve(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) { WT_DECL_RET; if (!positioned) { - key_gen(key, keyno); - cursor->set_key(cursor, key); + key_gen(tinfo->key, tinfo->keyno); + cursor->set_key(cursor, tinfo->key); } if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, cursor->session, - "%-10s{%.*s}", "reserve", (int)key->size, key->data); + "%-10s{%.*s}", "reserve", + (int)tinfo->key->size, tinfo->key->data); switch (ret = cursor->reserve(cursor)) { case 0: @@ -1248,7 +1303,8 @@ row_reserve(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) return (WT_NOTFOUND); default: testutil_die(ret, - "row_reserve: reserve row %" PRIu64 " by key", keyno); + "row_reserve: reserve row %" PRIu64 " by key", + tinfo->keyno); } return (0); } @@ -1258,16 +1314,16 @@ row_reserve(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) * Reserve a row in a column-store file. */ static int -col_reserve(WT_CURSOR *cursor, uint64_t keyno, bool positioned) +col_reserve(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) { WT_DECL_RET; if (!positioned) - cursor->set_key(cursor, keyno); + cursor->set_key(cursor, tinfo->keyno); if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, cursor->session, - "%-10s%" PRIu64, "reserve", keyno); + "%-10s%" PRIu64, "reserve", tinfo->keyno); switch (ret = cursor->reserve(cursor)) { case 0: @@ -1278,7 +1334,7 @@ col_reserve(WT_CURSOR *cursor, uint64_t keyno, bool positioned) case WT_NOTFOUND: return (WT_NOTFOUND); default: - testutil_die(ret, "col_reserve: %" PRIu64, keyno); + testutil_die(ret, "col_reserve: %" PRIu64, tinfo->keyno); } return (0); } @@ -1314,22 +1370,21 @@ modify_build(TINFO *tinfo, WT_MODIFY *entries, int *nentriesp) * Modify a row in a row-store file. */ static int -row_modify(TINFO *tinfo, WT_CURSOR *cursor, - WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) +row_modify(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) { WT_DECL_RET; WT_MODIFY entries[MAX_MODIFY_ENTRIES]; int nentries; if (!positioned) { - key_gen(key, keyno); - cursor->set_key(cursor, key); + key_gen(tinfo->key, tinfo->keyno); + cursor->set_key(cursor, tinfo->key); } modify_build(tinfo, entries, &nentries); switch (ret = cursor->modify(cursor, entries, nentries)) { case 0: - testutil_check(cursor->get_value(cursor, value)); + testutil_check(cursor->get_value(cursor, tinfo->value)); break; case WT_CACHE_FULL: case WT_ROLLBACK: @@ -1338,20 +1393,23 @@ row_modify(TINFO *tinfo, WT_CURSOR *cursor, return (WT_NOTFOUND); default: testutil_die(ret, - "row_modify: modify row %" PRIu64 " by key", keyno); + "row_modify: modify row %" PRIu64 " by key", tinfo->keyno); } if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s{%.*s}, {%.*s}", "modify", - (int)key->size, key->data, (int)value->size, value->data); + (int)tinfo->key->size, tinfo->key->data, + (int)tinfo->value->size, tinfo->value->data); #ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) return (0); - bdb_update(key->data, key->size, value->data, value->size); + bdb_update( + tinfo->key->data, tinfo->key->size, + tinfo->value->data, tinfo->value->size); #endif return (0); } @@ -1361,20 +1419,19 @@ row_modify(TINFO *tinfo, WT_CURSOR *cursor, * Modify a row in a column-store file. */ static int -col_modify(TINFO *tinfo, WT_CURSOR *cursor, - WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) +col_modify(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) { WT_DECL_RET; WT_MODIFY entries[MAX_MODIFY_ENTRIES]; int nentries; if (!positioned) - cursor->set_key(cursor, keyno); + cursor->set_key(cursor, tinfo->keyno); modify_build(tinfo, entries, &nentries); switch (ret = cursor->modify(cursor, entries, nentries)) { case 0: - testutil_check(cursor->get_value(cursor, value)); + testutil_check(cursor->get_value(cursor, tinfo->value)); break; case WT_CACHE_FULL: case WT_ROLLBACK: @@ -1382,23 +1439,25 @@ col_modify(TINFO *tinfo, WT_CURSOR *cursor, case WT_NOTFOUND: return (WT_NOTFOUND); default: - testutil_die(ret, "col_modify: modify row %" PRIu64, keyno); + testutil_die(ret, + "col_modify: modify row %" PRIu64, tinfo->keyno); } if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s{%.*s}, {%.*s}", "modify", - (int)key->size, key->data, (int)value->size, value->data); + (int)tinfo->key->size, tinfo->key->data, + (int)tinfo->value->size, tinfo->value->data); #ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) return (0); - key_gen(key, keyno); - bdb_update(key->data, key->size, value->data, value->size); -#else - (void)key; /* [-Wunused-variable] */ + key_gen(tinfo->key, tinfo->keyno); + bdb_update( + tinfo->key->data, tinfo->key->size, + tinfo->value->data, tinfo->value->size); #endif return (0); } @@ -1408,23 +1467,23 @@ col_modify(TINFO *tinfo, WT_CURSOR *cursor, * Update a row in a row-store file. */ static int -row_update(TINFO *tinfo, WT_CURSOR *cursor, - WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) +row_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) { WT_DECL_RET; if (!positioned) { - key_gen(key, keyno); - cursor->set_key(cursor, key); + key_gen(tinfo->key, tinfo->keyno); + cursor->set_key(cursor, tinfo->key); } - val_gen(&tinfo->rnd, value, keyno); - cursor->set_value(cursor, value); + val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno); + cursor->set_value(cursor, tinfo->value); if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s{%.*s}, {%.*s}", "put", - (int)key->size, key->data, (int)value->size, value->data); + (int)tinfo->key->size, tinfo->key->data, + (int)tinfo->value->size, tinfo->value->data); switch (ret = cursor->update(cursor)) { case 0: @@ -1434,14 +1493,16 @@ row_update(TINFO *tinfo, WT_CURSOR *cursor, return (WT_ROLLBACK); default: testutil_die(ret, - "row_update: update row %" PRIu64 " by key", keyno); + "row_update: update row %" PRIu64 " by key", tinfo->keyno); } #ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) return (0); - bdb_update(key->data, key->size, value->data, value->size); + bdb_update( + tinfo->key->data, tinfo->key->size, + tinfo->value->data, tinfo->value->size); #endif return (0); } @@ -1451,30 +1512,30 @@ row_update(TINFO *tinfo, WT_CURSOR *cursor, * Update a row in a column-store file. */ static int -col_update(TINFO *tinfo, WT_CURSOR *cursor, - WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) +col_update(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) { WT_DECL_RET; if (!positioned) - cursor->set_key(cursor, keyno); - val_gen(&tinfo->rnd, value, keyno); + cursor->set_key(cursor, tinfo->keyno); + val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno); if (g.type == FIX) - cursor->set_value(cursor, *(uint8_t *)value->data); + cursor->set_value(cursor, *(uint8_t *)tinfo->value->data); else - cursor->set_value(cursor, value); + cursor->set_value(cursor, tinfo->value); if (g.logging == LOG_OPS) { if (g.type == FIX) (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}", - "update", keyno, - ((uint8_t *)value->data)[0]); + "update", tinfo->keyno, + ((uint8_t *)tinfo->value->data)[0]); else (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s%" PRIu64 " {%.*s}", - "update", keyno, - (int)value->size, (char *)value->data); + "update", tinfo->keyno, + (int)tinfo->value->size, + (char *)tinfo->value->data); } switch (ret = cursor->update(cursor)) { @@ -1484,17 +1545,17 @@ col_update(TINFO *tinfo, WT_CURSOR *cursor, case WT_ROLLBACK: return (WT_ROLLBACK); default: - testutil_die(ret, "col_update: %" PRIu64, keyno); + testutil_die(ret, "col_update: %" PRIu64, tinfo->keyno); } #ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) return (0); - key_gen(key, keyno); - bdb_update(key->data, key->size, value->data, value->size); -#else - (void)key; /* [-Wunused-variable] */ + key_gen(tinfo->key, tinfo->keyno); + bdb_update( + tinfo->key->data, tinfo->key->size, + tinfo->value->data, tinfo->value->size); #endif return (0); } @@ -1603,8 +1664,7 @@ table_append(uint64_t keyno) * Insert a row in a row-store file. */ static int -row_insert(TINFO *tinfo, WT_CURSOR *cursor, - WT_ITEM *key, WT_ITEM *value, uint64_t keyno, bool positioned) +row_insert(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) { WT_DECL_RET; @@ -1613,18 +1673,19 @@ row_insert(TINFO *tinfo, WT_CURSOR *cursor, * the insert method. Otherwise, generate a unique key and insert. */ if (!positioned) { - key_gen_insert(&tinfo->rnd, key, keyno); - cursor->set_key(cursor, key); + key_gen_insert(&tinfo->rnd, tinfo->key, tinfo->keyno); + cursor->set_key(cursor, tinfo->key); } - val_gen(&tinfo->rnd, value, keyno); - cursor->set_value(cursor, value); + val_gen(&tinfo->rnd, tinfo->value, tinfo->keyno); + cursor->set_value(cursor, tinfo->value); /* Log the operation */ if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s{%.*s}, {%.*s}", "insert", - (int)key->size, key->data, (int)value->size, value->data); + (int)tinfo->key->size, tinfo->key->data, + (int)tinfo->value->size, tinfo->value->data); switch (ret = cursor->insert(cursor)) { case 0: @@ -1634,14 +1695,16 @@ row_insert(TINFO *tinfo, WT_CURSOR *cursor, return (WT_ROLLBACK); default: testutil_die(ret, - "row_insert: insert row %" PRIu64 " by key", keyno); + "row_insert: insert row %" PRIu64 " by key", tinfo->keyno); } #ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) return (0); - bdb_update(key->data, key->size, value->data, value->size); + bdb_update( + tinfo->key->data, tinfo->key->size, + tinfo->value->data, tinfo->value->size); #endif return (0); } @@ -1651,17 +1714,15 @@ row_insert(TINFO *tinfo, WT_CURSOR *cursor, * Insert an element in a column-store file. */ static int -col_insert(TINFO *tinfo, - WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop) +col_insert(TINFO *tinfo, WT_CURSOR *cursor) { WT_DECL_RET; - uint64_t keyno; - val_gen(&tinfo->rnd, value, g.rows + 1); + val_gen(&tinfo->rnd, tinfo->value, g.rows + 1); if (g.type == FIX) - cursor->set_value(cursor, *(uint8_t *)value->data); + cursor->set_value(cursor, *(uint8_t *)tinfo->value->data); else - cursor->set_value(cursor, value); + cursor->set_value(cursor, tinfo->value); switch (ret = cursor->insert(cursor)) { case 0: break; @@ -1671,32 +1732,32 @@ col_insert(TINFO *tinfo, default: testutil_die(ret, "cursor.insert"); } - testutil_check(cursor->get_key(cursor, &keyno)); - *keynop = (uint32_t)keyno; + testutil_check(cursor->get_key(cursor, &tinfo->keyno)); - table_append(keyno); /* Extend the object. */ + table_append(tinfo->keyno); /* Extend the object. */ if (g.logging == LOG_OPS) { if (g.type == FIX) (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s%" PRIu64 " {0x%02" PRIx8 "}", - "insert", keyno, - ((uint8_t *)value->data)[0]); + "insert", tinfo->keyno, + ((uint8_t *)tinfo->value->data)[0]); else (void)g.wt_api->msg_printf(g.wt_api, cursor->session, "%-10s%" PRIu64 " {%.*s}", - "insert", keyno, - (int)value->size, (char *)value->data); + "insert", tinfo->keyno, + (int)tinfo->value->size, + (char *)tinfo->value->data); } #ifdef HAVE_BERKELEY_DB if (!SINGLETHREADED) return (0); - key_gen(key, keyno); - bdb_update(key->data, key->size, value->data, value->size); -#else - (void)key; /* [-Wunused-variable] */ + key_gen(tinfo->key, tinfo->keyno); + bdb_update( + tinfo->key->data, tinfo->key->size, + tinfo->value->data, tinfo->value->size); #endif return (0); } @@ -1706,18 +1767,18 @@ col_insert(TINFO *tinfo, * Remove an row from a row-store file. */ static int -row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) +row_remove(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) { WT_DECL_RET; if (!positioned) { - key_gen(key, keyno); - cursor->set_key(cursor, key); + key_gen(tinfo->key, tinfo->keyno); + cursor->set_key(cursor, tinfo->key); } if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, - cursor->session, "%-10s%" PRIu64, "remove", keyno); + cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno); /* We use the cursor in overwrite mode, check for existence. */ if ((ret = cursor->search(cursor)) == 0) @@ -1730,7 +1791,7 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) return (WT_ROLLBACK); default: testutil_die(ret, - "row_remove: remove %" PRIu64 " by key", keyno); + "row_remove: remove %" PRIu64 " by key", tinfo->keyno); } #ifdef HAVE_BERKELEY_DB @@ -1740,11 +1801,9 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) { int notfound; - bdb_remove(keyno, ¬found); - (void)notfound_chk("row_remove", ret, notfound, keyno); + bdb_remove(tinfo->keyno, ¬found); + (void)notfound_chk("row_remove", ret, notfound, tinfo->keyno); } -#else - (void)key; /* [-Wunused-variable] */ #endif return (ret); } @@ -1754,16 +1813,16 @@ row_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) * Remove a row from a column-store file. */ static int -col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) +col_remove(TINFO *tinfo, WT_CURSOR *cursor, bool positioned) { WT_DECL_RET; if (!positioned) - cursor->set_key(cursor, keyno); + cursor->set_key(cursor, tinfo->keyno); if (g.logging == LOG_OPS) (void)g.wt_api->msg_printf(g.wt_api, - cursor->session, "%-10s%" PRIu64, "remove", keyno); + cursor->session, "%-10s%" PRIu64, "remove", tinfo->keyno); /* We use the cursor in overwrite mode, check for existence. */ if ((ret = cursor->search(cursor)) == 0) @@ -1776,7 +1835,7 @@ col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) return (WT_ROLLBACK); default: testutil_die(ret, - "col_remove: remove %" PRIu64 " by key", keyno); + "col_remove: remove %" PRIu64 " by key", tinfo->keyno); } #ifdef HAVE_BERKELEY_DB @@ -1788,16 +1847,14 @@ col_remove(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, bool positioned) * do the same thing for the BDB store. */ if (g.type == FIX) { - key_gen(key, keyno); - bdb_update(key->data, key->size, "", 1); + key_gen(tinfo->key, tinfo->keyno); + bdb_update(tinfo->key->data, tinfo->key->size, "", 1); } else { int notfound; - bdb_remove(keyno, ¬found); - (void)notfound_chk("col_remove", ret, notfound, keyno); + bdb_remove(tinfo->keyno, ¬found); + (void)notfound_chk("col_remove", ret, notfound, tinfo->keyno); } -#else - (void)key; /* [-Wunused-variable] */ #endif return (ret); } diff --git a/src/third_party/wiredtiger/test/suite/test_assert04.py b/src/third_party/wiredtiger/test/suite/test_assert04.py index d582535fd0f..520a7a6a16d 100644 --- a/src/third_party/wiredtiger/test/suite/test_assert04.py +++ b/src/third_party/wiredtiger/test/suite/test_assert04.py @@ -94,7 +94,7 @@ class test_assert04(wttest.WiredTigerTestCase, suite_subprocess): c.close() # We must move the oldest timestamp forward in order to alter. - # Otherwise alter's closing of the file will fail with EBUSY. + # Otherwise alter closing the file will fail with EBUSY. self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(2)) # Now alter the setting and make sure we detect incorrect usage. @@ -345,5 +345,51 @@ class test_assert04(wttest.WiredTigerTestCase, suite_subprocess): self.assertEquals(c['key_nots'], 'value_nots1') c.close() + # Confirm it is okay to set the timestamp in the middle or end of the + # transaction. That should set the timestamp for the whole thing. + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts5'] = 'value_notsyet' + self.session.timestamp_transaction( + 'commit_timestamp=' + timestamp_str(20)) + c['key_ts5'] = 'value20' + self.session.commit_transaction() + c.close() + + c = self.session.open_cursor(uri) + self.assertEquals(c['key_ts5'], 'value20') + c.close() + + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts6'] = 'value_notsyet' + c['key_ts6'] = 'value21_after' + self.session.timestamp_transaction( + 'commit_timestamp=' + timestamp_str(21)) + self.session.commit_transaction() + c.close() + + c = self.session.open_cursor(uri) + self.assertEquals(c['key_ts6'], 'value21_after') + c.close() + + # Confirm it is okay to set the timestamp on the commit call. + # That should set the timestamp for the whole thing. + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_ts6'] = 'value_committs1' + c['key_ts6'] = 'value22' + self.session.commit_transaction('commit_timestamp=' + + timestamp_str(22)) + c.close() + + c = self.session.open_cursor(uri) + self.session.begin_transaction() + c['key_nots'] = 'value23' + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.commit_transaction( + 'commit_timestamp=' + timestamp_str(23)), msg_usage) + c.close() + if __name__ == '__main__': wttest.run() diff --git a/src/third_party/wiredtiger/test/suite/test_prepare01.py b/src/third_party/wiredtiger/test/suite/test_prepare01.py new file mode 100644 index 00000000000..f4ef7248228 --- /dev/null +++ b/src/third_party/wiredtiger/test/suite/test_prepare01.py @@ -0,0 +1,179 @@ +#!/usr/bin/env python +# +# Public Domain 2014-2018 MongoDB, Inc. +# Public Domain 2008-2014 WiredTiger, Inc. +# +# This is free and unencumbered software released into the public domain. +# +# Anyone is free to copy, modify, publish, use, compile, sell, or +# distribute this software, either in source code form or as a compiled +# binary, for any purpose, commercial or non-commercial, and by any +# means. +# +# In jurisdictions that recognize copyright laws, the author or authors +# of this software dedicate any and all copyright interest in the +# software to the public domain. We make this dedication for the benefit +# of the public at large and to the detriment of our heirs and +# successors. We intend this dedication to be an overt act of +# relinquishment in perpetuity of all present and future rights to this +# software under copyright law. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR +# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, +# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +import wiredtiger, wttest +from wtscenario import make_scenarios + +# test_prepare01.py +# Transactions: basic functionality with prepare +class test_prepare01(wttest.WiredTigerTestCase): + nentries = 1000 + scenarios = make_scenarios([ + ('col-f', dict(uri='file:text_txn01',key_format='r',value_format='S')), + ('col-t', dict(uri='table:text_txn01',key_format='r',value_format='S')), + ('fix-f', dict(uri='file:text_txn01',key_format='r',value_format='8t')), + ('fix-t', dict(uri='table:text_txn01',key_format='r',value_format='8t')), + ('row-f', dict(uri='file:text_txn01',key_format='S',value_format='S')), + ('row-t', dict(uri='table:text_txn01',key_format='S',value_format='S')), + ]) + + # Return the number of records visible to the cursor. + def cursor_count(self, cursor): + count = 0 + # Column-store appends result in phantoms, ignore records unless they + # have our flag value. + for r in cursor: + if self.value_format == 'S' or cursor.get_value() == 0xab: + count += 1 + return count + + # Checkpoint the database and assert the number of records visible to the + # checkpoint matches the expected value. + def check_checkpoint(self, expected): + s = self.conn.open_session() + s.checkpoint("name=test") + cursor = s.open_cursor(self.uri, None, "checkpoint=test") + self.assertEqual(self.cursor_count(cursor), expected) + s.close() + + # Open a cursor with specified isolation level, and assert the number of + # records visible to the cursor matches the expected value. + def check_txn_cursor(self, level, expected): + s = self.conn.open_session() + cursor = s.open_cursor(self.uri, None) + s.begin_transaction(level) + self.assertEqual(self.cursor_count(cursor), expected) + s.close() + + # Open a session with specified isolation level, and assert the number of + # records visible to the cursor matches the expected value. + def check_txn_session(self, level, expected): + s = self.conn.open_session(level) + cursor = s.open_cursor(self.uri, None) + # Currently ignore_prepare is not realized yet, hence no effect. + s.begin_transaction("ignore_prepare=true") + self.assertEqual(self.cursor_count(cursor), expected) + s.close() + + def check(self, cursor, committed, total): + # The cursor itself should see all of the records. + if cursor != None: + cursor.reset() + self.assertEqual(self.cursor_count(cursor), total) + + # Read-uncommitted should see all of the records. + # Snapshot and read-committed should see only committed records. + self.check_txn_cursor('isolation=read-uncommitted', total) + self.check_txn_session('isolation=read-uncommitted', total) + + self.check_txn_cursor('isolation=snapshot', committed) + self.check_txn_session('isolation=snapshot', committed) + + self.check_txn_cursor('isolation=read-committed', committed) + self.check_txn_session('isolation=read-committed', committed) + + # Checkpoints should only write committed items. + self.check_checkpoint(committed) + + # Loop through a set of inserts, periodically committing; before each + # commit, verify the number of visible records matches the expected value. + def test_visibility(self): + self.session.create(self.uri, + 'key_format=' + self.key_format + + ',value_format=' + self.value_format) + + committed = 0 + cursor = self.session.open_cursor(self.uri, None) + self.check(cursor, 0, 0) + msg = "/prepare_transaction is not supported/" + # Currently ignore_prepare is not realized yet, hence no effect. + self.session.begin_transaction("ignore_prepare=false") + for i in xrange(self.nentries): + if i > 0 and i % (self.nentries / 37) == 0: + self.check(cursor, committed, i) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.prepare_transaction(), msg) + self.session.commit_transaction() + committed = i + self.session.begin_transaction() + + if self.key_format == 'S': + cursor.set_key("key: %06d" % i) + else: + cursor.set_key(i + 1) + if self.value_format == 'S': + cursor.set_value("value: %06d" % i) + else: + cursor.set_value(0xab) + cursor.insert() + + self.check(cursor, committed, self.nentries) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.prepare_transaction(), msg) + self.session.commit_transaction() + self.check(cursor, self.nentries, self.nentries) + +# Test that read-committed is the default isolation level. +class test_read_committed_default(wttest.WiredTigerTestCase): + uri = 'table:test_prepare' + + # Return the number of records visible to the cursor. + def cursor_count(self, cursor): + count = 0 + for r in cursor: + count += 1 + return count + + def test_read_committed_default(self): + self.session.create(self.uri, 'key_format=S,value_format=S') + cursor = self.session.open_cursor(self.uri, None) + self.session.begin_transaction() + cursor['key: aaa'] = 'value: aaa' + msg = "/prepare_transaction is not supported/" + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.prepare_transaction(), msg) + self.session.commit_transaction() + self.session.begin_transaction() + cursor['key: bbb'] = 'value: bbb' + + s = self.conn.open_session() + cursor = s.open_cursor(self.uri, None) + s.begin_transaction("isolation=read-committed") + self.assertEqual(self.cursor_count(cursor), 1) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.prepare_transaction(), msg) + s.commit_transaction() + s.begin_transaction(None) + self.assertEqual(self.cursor_count(cursor), 1) + self.assertRaisesWithMessage(wiredtiger.WiredTigerError, + lambda: self.session.prepare_transaction(), msg) + s.commit_transaction() + s.close() + +if __name__ == '__main__': + wttest.run() diff --git a/src/third_party/wiredtiger/test/syscall/wt2336_base/main.c b/src/third_party/wiredtiger/test/syscall/wt2336_base/main.c index 54f9999a126..32457254950 100644 --- a/src/third_party/wiredtiger/test/syscall/wt2336_base/main.c +++ b/src/third_party/wiredtiger/test/syscall/wt2336_base/main.c @@ -47,9 +47,9 @@ fail(int ret) { int main(int argc, char *argv[]) { - int ret; WT_CONNECTION *conn; WT_SESSION *session; + int ret; (void)argc; (void)argv; diff --git a/src/third_party/wiredtiger/test/utility/misc.c b/src/third_party/wiredtiger/test/utility/misc.c index a632e832f53..69ff53c290c 100644 --- a/src/third_party/wiredtiger/test/utility/misc.c +++ b/src/third_party/wiredtiger/test/utility/misc.c @@ -39,6 +39,10 @@ testutil_die(int e, const char *fmt, ...) { va_list ap; + /* Flush output to be sure it doesn't mix with fatal errors. */ + (void)fflush(stdout); + (void)fflush(stderr); + /* Allow test programs to cleanup on fatal error. */ if (custom_die != NULL) (*custom_die)(); diff --git a/src/third_party/wiredtiger/test/utility/thread.c b/src/third_party/wiredtiger/test/utility/thread.c index 1e1bd0bf575..4f70c562687 100644 --- a/src/third_party/wiredtiger/test/utility/thread.c +++ b/src/third_party/wiredtiger/test/utility/thread.c @@ -226,8 +226,8 @@ op_cursor(void *arg) { TEST_OPTS *opts; TEST_PER_THREAD_OPTS *args; - WT_SESSION *session; WT_CURSOR *cursor; + WT_SESSION *session; int ret; args = (TEST_PER_THREAD_OPTS *)arg; @@ -321,8 +321,8 @@ op_create_unique(void *arg) void op_drop(void *arg) { - TEST_PER_THREAD_OPTS *args; TEST_OPTS *opts; + TEST_PER_THREAD_OPTS *args; WT_RAND_STATE rnd; WT_SESSION *session; int ret; |