summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@mongodb.com>2017-10-25 00:22:03 +1100
committerMichael Cahill <michael.cahill@mongodb.com>2017-10-25 00:22:03 +1100
commit2ff0f268538b713b457a16eb3e8f731bd2e63028 (patch)
tree2b05affc9567a0889c09b9b9b2d76d44b62a808c /src
parent979ee612682b77d9cabaafae10787fbb578cd32a (diff)
downloadmongo-2ff0f268538b713b457a16eb3e8f731bd2e63028.tar.gz
Import wiredtiger: a31e9d415a25561bd36b001f3a6e9b0a6a115236 from branch mongodb-3.6
ref: 4ecdb8f1a3..a31e9d415a for: 3.6.0 WT-3235 Fix typo in test case to cover more LSM use cases WT-3257 Need unit test for log recover setting WT-3585 Add an API to allow read timestamp to round up to oldest WT-3596 Timestamp documentation incomplete WT-3598 Open cursor should not to set transaction error on failure WT-3616 format failed to report a stuck cache WT-3629 Cache accounting underflow checks and logging WT-3640 Change bytes-read statistic WT-3643 Recovery does not run after database opened with recover=error WT-3667 Timestamp tests that take backups should pass log=(archive=false) WT-3669 Check for aborted updates during reconciliation WT-3672 Test format failure with commit timestamp older than oldest WT-3673 Opening the lookaside table can race with the statistics server WT-3674 Snapshot isolation failure after update/restore eviction
Diffstat (limited to 'src')
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py33
-rw-r--r--src/third_party/wiredtiger/dist/flags.py2
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok1
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/async/async_api.c3
-rw-r--r--src/third_party/wiredtiger/src/async/async_worker.c6
-rw-r--r--src/third_party/wiredtiger/src/block/block_ckpt.c4
-rw-r--r--src/third_party/wiredtiger/src/block/block_open.c3
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_compact.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_io.c5
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c15
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c10
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c3
-rw-r--r--src/third_party/wiredtiger/src/btree/row_srch.c3
-rw-r--r--src/third_party/wiredtiger/src/cache/cache_las.c36
-rw-r--r--src/third_party/wiredtiger/src/config/config.c6
-rw-r--r--src/third_party/wiredtiger/src/config/config_api.c1
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c6
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c9
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache.c4
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache_pool.c1
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_sweep.c4
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_join.c1
-rw-r--r--src/third_party/wiredtiger/src/docs/transactions.dox20
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_file.c2
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c168
-rw-r--r--src/third_party/wiredtiger/src/include/api.h9
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h5
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i94
-rw-r--r--src/third_party/wiredtiger/src/include/cache.h18
-rw-r--r--src/third_party/wiredtiger/src/include/cache.i11
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h9
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h4
-rw-r--r--src/third_party/wiredtiger/src/include/flags.h4
-rw-r--r--src/third_party/wiredtiger/src/include/serial.i18
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i3
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in39
-rw-r--r--src/third_party/wiredtiger/src/log/log.c12
-rw-r--r--src/third_party/wiredtiger/src/log/log_slot.c6
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_manager.c4
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_work_unit.c4
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_worker.c1
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_dir.c3
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c8
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c28
-rw-r--r--src/third_party/wiredtiger/src/support/thread_group.c14
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c64
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c19
-rw-r--r--src/third_party/wiredtiger/test/csuite/scope/main.c2
-rw-r--r--src/third_party/wiredtiger/test/fops/file.c17
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c9
-rw-r--r--src/third_party/wiredtiger/test/format/util.c16
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp03.py23
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp05.py1
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp06.py4
-rw-r--r--src/third_party/wiredtiger/test/suite/test_timestamp07.py2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_txn12.py9
-rw-r--r--src/third_party/wiredtiger/test/suite/test_txn18.py118
59 files changed, 602 insertions, 332 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 32faec8709d..8fcf99ad3c0 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -1111,8 +1111,13 @@ methods = {
Transactions with higher values are less likely to abort''',
min='-100', max='100'),
Config('read_timestamp', '', r'''
- read using the specified timestamp, see
+ read using the specified timestamp. The supplied value should not be
+ older than the current oldest timestamp. See
@ref transaction_timestamps'''),
+ Config('round_to_oldest', 'false', r'''
+ if read timestamp is earlier than oldest timestamp,
+ read timestamp will be rounded to oldest timestamp''',
+ type='boolean'),
Config('snapshot', '', r'''
use a named, in-memory snapshot, see
@ref transaction_named_snapshots'''),
@@ -1124,7 +1129,10 @@ methods = {
'WT_SESSION.commit_transaction' : Method([
Config('commit_timestamp', '', r'''
- set the commit timestamp for the current transaction, see
+ set the commit timestamp for the current transaction. The supplied
+ value should not be older than the first commit timestamp set for the
+ current transaction. The value should also not be older than the
+ current oldest and stable timestamps. See
@ref transaction_timestamps'''),
Config('sync', '', r'''
override whether to sync log records when the transaction commits,
@@ -1139,7 +1147,10 @@ methods = {
'WT_SESSION.timestamp_transaction' : Method([
Config('commit_timestamp', '', r'''
- set the commit timestamp for the current transaction, see
+ set the commit timestamp for the current transaction. The supplied
+ value should not be older than the first commit timestamp set for the
+ current transaction. The value should also not be older than the
+ current oldest and stable timestamps. See
@ref transaction_timestamps'''),
]),
@@ -1289,17 +1300,21 @@ methods = {
timestamps greater than the specified value until the next commit moves
the tracked commit timestamp forwards. This is only intended for use
where the application is rolling back locally committed transactions.
- See @ref transaction_timestamps'''),
+ The supplied value should not be older than the current oldest and
+ stable timestamps. See @ref transaction_timestamps'''),
Config('oldest_timestamp', '', r'''
future commits and queries will be no earlier than the specified
- timestamp. Supplied values must be monotonically increasing.
- See @ref transaction_timestamps'''),
+ timestamp. Supplied values must be monotonically increasing, any
+ attempt to set the value to older than the current is silently ignored.
+ The supplied value should not be newer than the current
+ stable timestamp. See @ref transaction_timestamps'''),
Config('stable_timestamp', '', r'''
checkpoints will not include commits that are newer than the specified
timestamp in tables configured with \c log=(enabled=false). Supplied
- values must be monotonically increasing. The stable timestamp data
- stability only applies to tables that are not being logged. See @ref
- transaction_timestamps'''),
+ values must be monotonically increasing, any attempt to set the value to
+ older than the current is silently ignored. The supplied value should
+ not be older than the current oldest timestamp. See
+ @ref transaction_timestamps'''),
]),
'WT_CONNECTION.rollback_to_stable' : Method([]),
diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py
index 7ddbff62a63..70602333ad5 100644
--- a/src/third_party/wiredtiger/dist/flags.py
+++ b/src/third_party/wiredtiger/dist/flags.py
@@ -106,7 +106,7 @@ flags = {
'CONN_EVICTION_NO_LOOKASIDE',
'CONN_EVICTION_RUN',
'CONN_IN_MEMORY',
- 'CONN_LAS_OPEN',
+ 'CONN_LOOKASIDE_OPEN',
'CONN_LEAK_MEMORY',
'CONN_LSM_MERGE',
'CONN_PANIC',
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index 7977c4ef2ff..75f237f39fb 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -1234,6 +1234,7 @@ unbuffered
uncompressing
uncompresssed
undef
+underflowed
unencrypted
unesc
unescape
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 3c022218fb0..1dd15b73e56 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "4ecdb8f1a327067a178258ad025806eeefc1267b",
+ "commit": "a31e9d415a25561bd36b001f3a6e9b0a6a115236",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-3.6"
diff --git a/src/third_party/wiredtiger/src/async/async_api.c b/src/third_party/wiredtiger/src/async/async_api.c
index 7f81ad0a8af..0152238456d 100644
--- a/src/third_party/wiredtiger/src/async/async_api.c
+++ b/src/third_party/wiredtiger/src/async/async_api.c
@@ -111,10 +111,11 @@ __async_new_op_alloc(WT_SESSION_IMPL *session, const char *uri,
WT_CONNECTION_IMPL *conn;
uint32_t i, save_i, view;
+ *opp = NULL;
+
conn = S2C(session);
async = conn->async;
WT_STAT_CONN_INCR(session, async_op_alloc);
- *opp = NULL;
retry:
op = NULL;
diff --git a/src/third_party/wiredtiger/src/async/async_worker.c b/src/third_party/wiredtiger/src/async/async_worker.c
index 2626a21435f..48961da9e42 100644
--- a/src/third_party/wiredtiger/src/async/async_worker.c
+++ b/src/third_party/wiredtiger/src/async/async_worker.c
@@ -22,8 +22,9 @@ __async_op_dequeue(WT_CONNECTION_IMPL *conn, WT_SESSION_IMPL *session,
uint64_t sleep_usec;
uint32_t tries;
- async = conn->async;
*op = NULL;
+
+ async = conn->async;
/*
* Wait for work to do. Work is available when async->head moves.
* Then grab the slot containing the work. If we lose, try again.
@@ -125,8 +126,9 @@ __async_worker_cursor(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op,
WT_DECL_RET;
WT_SESSION *wt_session;
- wt_session = (WT_SESSION *)session;
*cursorp = NULL;
+
+ wt_session = (WT_SESSION *)session;
/*
* Compact doesn't need a cursor.
*/
diff --git a/src/third_party/wiredtiger/src/block/block_ckpt.c b/src/third_party/wiredtiger/src/block/block_ckpt.c
index d744e10539f..64d0283a8a3 100644
--- a/src/third_party/wiredtiger/src/block/block_ckpt.c
+++ b/src/third_party/wiredtiger/src/block/block_ckpt.c
@@ -53,8 +53,6 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block,
WT_DECL_RET;
uint8_t *endp;
- ci = NULL;
-
/*
* Sometimes we don't find a root page (we weren't given a checkpoint,
* or the checkpoint was empty). In that case we return an empty root
@@ -62,6 +60,8 @@ __wt_block_checkpoint_load(WT_SESSION_IMPL *session, WT_BLOCK *block,
*/
*root_addr_sizep = 0;
+ ci = NULL;
+
#ifdef HAVE_VERBOSE
if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) {
if (addr != NULL) {
diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c
index 6a9b2e65ac5..5f2ab947acb 100644
--- a/src/third_party/wiredtiger/src/block/block_open.c
+++ b/src/third_party/wiredtiger/src/block/block_open.c
@@ -150,10 +150,11 @@ __wt_block_open(WT_SESSION_IMPL *session,
uint64_t bucket, hash;
uint32_t flags;
+ *blockp = block = NULL;
+
__wt_verbose(session, WT_VERB_BLOCK, "open: %s", filename);
conn = S2C(session);
- *blockp = block = NULL;
hash = __wt_hash_city64(filename, strlen(filename));
bucket = hash % WT_HASH_ARRAY_SIZE;
__wt_spin_lock(session, &conn->block_lock);
diff --git a/src/third_party/wiredtiger/src/btree/bt_compact.c b/src/third_party/wiredtiger/src/btree/bt_compact.c
index 75bb46aaf89..1a39b479ae8 100644
--- a/src/third_party/wiredtiger/src/btree/bt_compact.c
+++ b/src/third_party/wiredtiger/src/btree/bt_compact.c
@@ -220,6 +220,8 @@ __wt_compact_page_skip(
u_int type;
WT_UNUSED(context);
+ *skipp = false; /* Default to reading */
+
/*
* Skip deleted pages, rewriting them doesn't seem useful; in a better
* world we'd write the parent to delete the page.
@@ -229,8 +231,6 @@ __wt_compact_page_skip(
return (0);
}
- *skipp = false; /* Default to reading */
-
/*
* If the page is in-memory, we want to look at it (it may have been
* modified and written, and the current location is the interesting
diff --git a/src/third_party/wiredtiger/src/btree/bt_io.c b/src/third_party/wiredtiger/src/btree/bt_io.c
index 6d2e69d7f9b..e443fbdc94b 100644
--- a/src/third_party/wiredtiger/src/btree/bt_io.c
+++ b/src/third_party/wiredtiger/src/btree/bt_io.c
@@ -145,6 +145,8 @@ __wt_bt_read(WT_SESSION_IMPL *session,
WT_STAT_DATA_INCR(session, compress_read);
WT_STAT_CONN_INCRV(session, cache_bytes_read, dsk->mem_size);
WT_STAT_DATA_INCRV(session, cache_bytes_read, dsk->mem_size);
+ (void)__wt_atomic_add64(
+ &S2C(session)->cache->bytes_read, dsk->mem_size);
if (0) {
corrupt: if (ret == 0)
@@ -382,9 +384,10 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
WT_STAT_CONN_INCR(session, cache_write);
WT_STAT_DATA_INCR(session, cache_write);
- S2C(session)->cache->bytes_written += dsk->mem_size;
WT_STAT_CONN_INCRV(session, cache_bytes_write, dsk->mem_size);
WT_STAT_DATA_INCRV(session, cache_bytes_write, dsk->mem_size);
+ (void)__wt_atomic_add64(
+ &S2C(session)->cache->bytes_written, dsk->mem_size);
err: __wt_scr_free(session, &ctmp);
__wt_scr_free(session, &etmp);
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index d3df9f6bf78..e2d4fa01fa7 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -114,7 +114,6 @@ err: if ((pindex = WT_INTL_INDEX_GET_SAFE(page)) != NULL) {
/* Increment the cache statistics. */
__wt_cache_page_inmem_incr(session, page, size);
- (void)__wt_atomic_add64(&cache->bytes_read, size);
(void)__wt_atomic_add64(&cache->pages_inmem, 1);
page->cache_create_gen = cache->evict_pass_gen;
@@ -312,12 +311,13 @@ __inmem_col_var_repeats(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t *np)
const WT_PAGE_HEADER *dsk;
uint32_t i;
+ *np = 0;
+
btree = S2BT(session);
dsk = page->dsk;
unpack = &_unpack;
/* Walk the page, counting entries for the repeats array. */
- *np = 0;
WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
__wt_cell_unpack(cell, unpack);
if (__wt_cell_rle(unpack) > 1)
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index 0246c1eca66..838c6845b08 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -296,7 +296,7 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
* Read a page from the file.
*/
static int
-__page_read(WT_SESSION_IMPL *session, WT_REF *ref)
+__page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
{
struct timespec start, stop;
WT_BTREE *btree;
@@ -304,7 +304,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref)
WT_ITEM tmp;
WT_PAGE *page;
size_t addr_size;
- uint32_t new_state, previous_state;
+ uint32_t page_flags, new_state, previous_state;
const uint8_t *addr;
bool timer;
@@ -372,9 +372,12 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref)
* the allocated copy of the disk image on return, the in-memory object
* steals it.
*/
- WT_ERR(__wt_page_inmem(session, ref, tmp.data,
- WT_DATA_IN_ITEM(&tmp) ?
- WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED, &page));
+ page_flags =
+ WT_DATA_IN_ITEM(&tmp) ? WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED;
+ if (LF_ISSET(WT_READ_NO_EVICT) ||
+ F_ISSET(session, WT_SESSION_NO_EVICTION))
+ FLD_SET(page_flags, WT_PAGE_READ_NO_EVICT);
+ WT_ERR(__wt_page_inmem(session, ref, tmp.data, page_flags, &page));
tmp.mem = NULL;
skip_read:
@@ -499,7 +502,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
if (!LF_ISSET(WT_READ_NO_EVICT))
WT_RET(__wt_cache_eviction_check(
session, 1, NULL));
- WT_RET(__page_read(session, ref));
+ WT_RET(__page_read(session, ref, flags));
/*
* We just read a page, don't evict it before we have a
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 1ccb27c2296..66f0478c542 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -1495,8 +1495,8 @@ __split_multi_inmem(
* tombstone away: we may need it to correctly resolve
* modifications.
*/
- if (prev_upd != NULL &&
- prev_upd->type == WT_UPDATE_DELETED)
+ if (supd->onpage_upd->type == WT_UPDATE_DELETED &&
+ prev_upd != NULL)
prev_upd = prev_upd->next;
if (prev_upd != NULL) {
__wt_update_obsolete_free(
@@ -1665,6 +1665,12 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
* overrides WT_REF_DISK.
*/
if (multi->las_pageid != 0) {
+ /*
+ * We should not have a disk image if we did lookaside
+ * eviction.
+ */
+ WT_ASSERT(session, multi->disk_image == NULL);
+
WT_RET(__wt_calloc_one(session, &ref->page_las));
ref->page_las->las_pageid = multi->las_pageid;
#ifdef HAVE_TIMESTAMPS
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index 952b90e3444..c3b5d926a8f 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -696,9 +696,10 @@ __verify_overflow_cell(
const WT_PAGE_HEADER *dsk;
uint32_t cell_num, i;
+ *found = false;
+
btree = S2BT(session);
unpack = &_unpack;
- *found = false;
/*
* If a tree is empty (just created), it won't have a disk image;
diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c
index 16081e841dc..68299a396ba 100644
--- a/src/third_party/wiredtiger/src/btree/row_srch.c
+++ b/src/third_party/wiredtiger/src/btree/row_srch.c
@@ -23,9 +23,10 @@ __search_insert_append(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
WT_ITEM key;
int cmp, i;
+ *donep = 0;
+
btree = S2BT(session);
collator = btree->collator;
- *donep = 0;
if ((ins = WT_SKIP_LAST(ins_head)) == NULL)
return (0);
diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c
index 00dafb680da..13516d80c58 100644
--- a/src/third_party/wiredtiger/src/cache/cache_las.c
+++ b/src/third_party/wiredtiger/src/cache/cache_las.c
@@ -27,7 +27,7 @@ __wt_las_stats_update(WT_SESSION_IMPL *session)
* table data-source statistics. If there's no lookaside table, values
* remain 0.
*/
- if (!F_ISSET(conn, WT_CONN_LAS_OPEN))
+ if (!F_ISSET(conn, WT_CONN_LOOKASIDE_OPEN))
return;
/*
@@ -89,24 +89,19 @@ __wt_las_create(WT_SESSION_IMPL *session)
WT_RET(__wt_session_create(session, WT_LAS_URI, WT_LAS_FORMAT));
/*
- * Flag that the lookaside table has been created (before creating the
- * connection's lookaside table session, it checks before creating a
- * lookaside table cursor.
+ * Open a shared internal session and cursor used for the lookaside
+ * table. This session should never be tapped for eviction.
*/
- F_SET(conn, WT_CONN_LAS_OPEN);
-
- /*
- * Open a shared internal session used to access the lookaside table.
- * This session should never be tapped for eviction.
- */
- session_flags = WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION;
- WT_ERR(__wt_open_internal_session(
+ session_flags = WT_SESSION_NO_EVICTION;
+ WT_RET(__wt_open_internal_session(
conn, "lookaside table", true, session_flags, &conn->las_session));
+ WT_RET(__wt_las_cursor_open(conn->las_session));
- return (0);
+ /* The statistics server is already running, make sure we don't race. */
+ WT_WRITE_BARRIER();
+ F_SET(conn, WT_CONN_LOOKASIDE_OPEN);
-err: F_CLR(conn, WT_CONN_LAS_OPEN);
- return (ret);
+ return (0);
}
/*
@@ -122,6 +117,7 @@ __wt_las_destroy(WT_SESSION_IMPL *session)
conn = S2C(session);
+ F_CLR(conn, WT_CONN_LOOKASIDE_OPEN);
if (conn->las_session == NULL)
return (0);
@@ -138,15 +134,16 @@ __wt_las_destroy(WT_SESSION_IMPL *session)
* Open a new lookaside table cursor.
*/
int
-__wt_las_cursor_open(WT_SESSION_IMPL *session, WT_CURSOR **cursorp)
+__wt_las_cursor_open(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
+ WT_CURSOR *cursor;
WT_DECL_RET;
const char *open_cursor_cfg[] = {
WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL };
WT_WITHOUT_DHANDLE(session, ret = __wt_open_cursor(
- session, WT_LAS_URI, NULL, open_cursor_cfg, cursorp));
+ session, WT_LAS_URI, NULL, open_cursor_cfg, &cursor));
WT_RET(ret);
/*
@@ -154,7 +151,7 @@ __wt_las_cursor_open(WT_SESSION_IMPL *session, WT_CURSOR **cursorp)
* we don't always switch the LAS handle in to the session before
* entering this function.
*/
- btree = ((WT_CURSOR_BTREE *)(*cursorp))->btree;
+ btree = ((WT_CURSOR_BTREE *)cursor)->btree;
/* Track the lookaside file ID. */
if (S2C(session)->las_fileid == 0)
@@ -176,6 +173,9 @@ __wt_las_cursor_open(WT_SESSION_IMPL *session, WT_CURSOR **cursorp)
if (!F_ISSET(btree, WT_BTREE_NO_LOGGING))
F_SET(btree, WT_BTREE_NO_LOGGING);
+ session->las_cursor = cursor;
+ F_SET(session, WT_SESSION_LOOKASIDE_CURSOR);
+
return (0);
}
diff --git a/src/third_party/wiredtiger/src/config/config.c b/src/third_party/wiredtiger/src/config/config.c
index dd46aa55ad1..d48c39de6b5 100644
--- a/src/third_party/wiredtiger/src/config/config.c
+++ b/src/third_party/wiredtiger/src/config/config.c
@@ -346,12 +346,12 @@ __config_next(WT_CONFIG *conf, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value)
"", 0, 1, WT_CONFIG_ITEM_BOOL
};
+ /* Keys with no value default to true. */
+ *value = true_value;
+
out = key;
utf8_remain = 0;
-
key->len = 0;
- /* Keys with no value default to true. */
- *value = true_value;
if (conf->go == NULL)
conf->go = gostruct;
diff --git a/src/third_party/wiredtiger/src/config/config_api.c b/src/third_party/wiredtiger/src/config/config_api.c
index 74da016afbc..3b37732f49b 100644
--- a/src/third_party/wiredtiger/src/config/config_api.c
+++ b/src/third_party/wiredtiger/src/config/config_api.c
@@ -73,6 +73,7 @@ wiredtiger_config_parser_open(WT_SESSION *wt_session,
WT_SESSION_IMPL *session;
*config_parserp = NULL;
+
session = (WT_SESSION_IMPL *)wt_session;
WT_RET(__wt_calloc_one(session, &config_parser));
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index d7f4f6fe148..4edd436712b 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -240,6 +240,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_begin_transaction[] = {
{ "name", "string", NULL, NULL, NULL, 0 },
{ "priority", "int", NULL, "min=-100,max=100", NULL, 0 },
{ "read_timestamp", "string", NULL, NULL, NULL, 0 },
+ { "round_to_oldest", "boolean", NULL, NULL, NULL, 0 },
{ "snapshot", "string", NULL, NULL, NULL, 0 },
{ "sync", "boolean", NULL, NULL, NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
@@ -1210,8 +1211,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {
confchk_WT_SESSION_alter, 4
},
{ "WT_SESSION.begin_transaction",
- "isolation=,name=,priority=0,read_timestamp=,snapshot=,sync=",
- confchk_WT_SESSION_begin_transaction, 6
+ "isolation=,name=,priority=0,read_timestamp=,"
+ "round_to_oldest=false,snapshot=,sync=",
+ confchk_WT_SESSION_begin_transaction, 7
},
{ "WT_SESSION.checkpoint",
"drop=,force=false,name=,target=,use_timestamp=true",
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index 4fcd1b8ede1..f7525f22787 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -2652,8 +2652,15 @@ err: /* Discard the scratch buffers. */
__wt_scr_discard(session);
__wt_scr_discard(&conn->dummy_session);
- if (ret != 0)
+ if (ret != 0) {
+ /*
+ * Set panic if we're returning the run recovery error so that
+ * we don't try to checkpoint data handles.
+ */
+ if (ret == WT_RUN_RECOVERY)
+ F_SET(conn, WT_CONN_PANIC);
WT_TRET(__wt_connection_close(conn));
+ }
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c
index da5b6bfd55f..c83fb544982 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache.c
@@ -295,11 +295,11 @@ __wt_cache_destroy(WT_SESSION_IMPL *session)
return (0);
/* The cache should be empty at this point. Complain if not. */
- if (cache->pages_inmem != cache->pages_evict)
+ if (cache->pages_inmem != cache->pages_evicted)
__wt_errx(session,
"cache server: exiting with %" PRIu64 " pages in "
"memory and %" PRIu64 " pages evicted",
- cache->pages_inmem, cache->pages_evict);
+ cache->pages_inmem, cache->pages_evicted);
if (cache->bytes_image != 0)
__wt_errx(session,
"cache server: exiting with %" PRIu64 " image bytes in "
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
index 83e122aaabe..4475b27a7b8 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
@@ -575,6 +575,7 @@ __cache_pool_adjust(WT_SESSION_IMPL *session,
bool busy, decrease_ok, grow, pool_full;
*adjustedp = false;
+
cp = __wt_process.cache_pool;
grow = false;
pool_full = cp->currently_used >= cp->size;
diff --git a/src/third_party/wiredtiger/src/conn/conn_sweep.c b/src/third_party/wiredtiger/src/conn/conn_sweep.c
index a164e34fe33..9b64c7a0f77 100644
--- a/src/third_party/wiredtiger/src/conn/conn_sweep.c
+++ b/src/third_party/wiredtiger/src/conn/conn_sweep.c
@@ -159,10 +159,10 @@ __sweep_discard_trees(WT_SESSION_IMPL *session, u_int *dead_handlesp)
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
- conn = S2C(session);
-
*dead_handlesp = 0;
+ conn = S2C(session);
+
TAILQ_FOREACH(dhandle, &conn->dhqh, q) {
if (WT_DHANDLE_CAN_DISCARD(dhandle))
++*dead_handlesp;
diff --git a/src/third_party/wiredtiger/src/cursor/cur_join.c b/src/third_party/wiredtiger/src/cursor/cur_join.c
index 24f68e3863a..e1fbb63178f 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_join.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_join.c
@@ -54,6 +54,7 @@ __curjoin_iter_init(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin,
WT_CURSOR_JOIN_ITER *iter;
*iterp = NULL;
+
WT_RET(__wt_calloc_one(session, iterp));
iter = *iterp;
iter->cjoin = cjoin;
diff --git a/src/third_party/wiredtiger/src/docs/transactions.dox b/src/third_party/wiredtiger/src/docs/transactions.dox
index 4ba6d5d2526..f66923aac3f 100644
--- a/src/third_party/wiredtiger/src/docs/transactions.dox
+++ b/src/third_party/wiredtiger/src/docs/transactions.dox
@@ -154,9 +154,9 @@ Named snapshots are not durable: they do not survive WT_CONNECTION::close.
Some applications have their own notion of time, including an expected commit
order for transactions that may be inconsistent with the order assigned by
WiredTiger. We assume that applications can represent their notion of a
-timestamp as an integral value of some size that generally increases over
-time. For example, a simple 64-bit integer could be incremented to generate
-transaction timestamps, if that is sufficient for the application.
+timestamp as an unsigned integral value of some size that generally increases
+over time. For example, a simple 64-bit integer could be incremented to
+generate transaction timestamps, if that is sufficient for the application.
The application's timestamp size is specified as a number of bytes at build
time, with <code>configure --with-timestamp-size=X</code>. The default
@@ -168,6 +168,13 @@ Applications can assign explicit commit timestamps to transactions, then read
hexadecimal encoding, so the encoded value can be twice as long as the raw
timestamp value.
+WiredTiger also provides the ability to set a different commit timestamp for
+different set of updates in a single transaction. This can be done by calling
+WT_SESSION::timestamp_transaction repeatedly to set a new commit timestamp
+between a set of updates for the current transaction. This gives the ability to
+commit several updates with different read "as of" timestamp in a single
+transaction.
+
Setting a read timestamp in WT_SESSION::begin_transaction forces a transaction
to run at snapshot isolation and ignore any commits with a newer timestamp.
@@ -182,7 +189,12 @@ known stable location that is sufficient for durability. During a checkpoint
the state of a table will be saved only as of the stable timestamp. Newer
updates after that stable timestamp will not be included in the checkpoint.
That can be overridden in the call to WT_SESSION::checkpoint. It is expected
-that the stable timestamp is updated frequently.
+that the stable timestamp is updated frequently. Setting a stable location
+provides the ability, if needed, to rollback to this location by placing a call
+to WT_CONNECTION::rollback_to_stable. With the rollback, however, WiredTiger
+does not automatically reset the maximum commit timestamp it is tracking. The
+application should explicitly do so by setting a commit timestamp in
+WT_CONNECTION::set_timestamp.
Commit timestamps cannot be set in the past of any read timestamp that has
been used. This is enforced by assertions in diagnostic builds, if
diff --git a/src/third_party/wiredtiger/src/evict/evict_file.c b/src/third_party/wiredtiger/src/evict/evict_file.c
index bd70de8bddb..147b615c0ab 100644
--- a/src/third_party/wiredtiger/src/evict/evict_file.c
+++ b/src/third_party/wiredtiger/src/evict/evict_file.c
@@ -53,7 +53,7 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
* the tree are removed.
*/
if (F_ISSET(dhandle, WT_DHANDLE_DEAD) &&
- F_ISSET(S2C(session), WT_CONN_LAS_OPEN) &&
+ F_ISSET(S2C(session), WT_CONN_LOOKASIDE_OPEN) &&
!F_ISSET(btree, WT_BTREE_LOOKASIDE)) {
WT_ASSERT(session, !WT_IS_METADATA(dhandle));
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 02208e0f84a..0205dbb08e3 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -377,13 +377,12 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work)
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
- uint64_t orig_pages_evicted;
+
+ /* Assume there has been no progress. */
+ *did_work = false;
conn = S2C(session);
cache = conn->cache;
- WT_ASSERT(session, did_work != NULL);
- *did_work = false;
- orig_pages_evicted = cache->pages_evicted;
/* Evict pages from the cache as needed. */
WT_RET(__evict_pass(session));
@@ -411,46 +410,58 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work)
__wt_readunlock(session, &conn->dhandle_lock);
WT_RET(ret);
- cache->pages_evicted = 0;
- } else if (cache->pages_evicted != cache->pages_evict) {
- cache->pages_evicted = cache->pages_evict;
+ /* Make sure we'll notice next time we're stuck. */
+ cache->last_eviction_progress = 0;
+ return (0);
+ }
+
+ /* Track if work was done. */
+ *did_work = cache->eviction_progress != cache->last_eviction_progress;
+ cache->last_eviction_progress = cache->eviction_progress;
+
+ /* Eviction is stuck, check if we have made progress. */
+ if (*did_work) {
#if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE)
__wt_epoch(session, &cache->stuck_time);
- } else if (!F_ISSET(conn, WT_CONN_IN_MEMORY)) {
- /*
- * If we're stuck for 5 minutes in diagnostic mode, or the
- * verbose evict_stuck flag is configured, log the cache
- * and transaction state.
- *
- * If we're stuck for 5 minutes in diagnostic mode, give up.
- *
- * We don't do this check for in-memory workloads because
- * application threads are not blocked by the cache being full.
- * If the cache becomes full of clean pages, we can be
- * servicing reads while the cache appears stuck to eviction.
- */
- __wt_epoch(session, &now);
- if (WT_TIMEDIFF_SEC(now, cache->stuck_time) > 300) {
+#endif
+ return (0);
+ }
+
+#if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE)
+ /*
+ * If we're stuck for 5 minutes in diagnostic mode, or the verbose
+ * evict_stuck flag is configured, log the cache and transaction state.
+ *
+ * If we're stuck for 5 minutes in diagnostic mode, give up.
+ *
+ * We don't do this check for in-memory workloads because application
+ * threads are not blocked by the cache being full. If the cache becomes
+ * full of clean pages, we can be servicing reads while the cache
+ * appears stuck to eviction.
+ */
+ if (F_ISSET(conn, WT_CONN_IN_MEMORY))
+ return (0);
+
+ __wt_epoch(session, &now);
+ if (WT_TIMEDIFF_SEC(now, cache->stuck_time) > 300) {
#if defined(HAVE_DIAGNOSTIC)
- __wt_err(session, ETIMEDOUT,
- "Cache stuck for too long, giving up");
- ret = ETIMEDOUT;
- WT_TRET(__wt_verbose_dump_txn(session));
- WT_TRET(__wt_verbose_dump_cache(session));
- return (ret);
+ __wt_err(session, ETIMEDOUT,
+ "Cache stuck for too long, giving up");
+ ret = ETIMEDOUT;
+ WT_TRET(__wt_verbose_dump_txn(session));
+ WT_TRET(__wt_verbose_dump_cache(session));
+ return (ret);
#elif defined(HAVE_VERBOSE)
- if (WT_VERBOSE_ISSET(session, WT_VERB_EVICT_STUCK)) {
- WT_RET(__wt_verbose_dump_txn(session));
- WT_RET(__wt_verbose_dump_cache(session));
+ if (WT_VERBOSE_ISSET(session, WT_VERB_EVICT_STUCK)) {
+ WT_RET(__wt_verbose_dump_txn(session));
+ WT_RET(__wt_verbose_dump_cache(session));
- /* Reset the timer. */
- __wt_epoch(session, &cache->stuck_time);
- }
-#endif
+ /* Reset the timer. */
+ __wt_epoch(session, &cache->stuck_time);
}
#endif
}
- *did_work = cache->pages_evicted != orig_pages_evicted;
+#endif
return (0);
}
@@ -622,7 +633,7 @@ __evict_pass(WT_SESSION_IMPL *session)
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_TXN_GLOBAL *txn_global;
- uint64_t oldest_id, pages_evicted, prev_oldest_id;
+ uint64_t eviction_progress, oldest_id, prev_oldest_id;
u_int loop;
conn = S2C(session);
@@ -630,7 +641,7 @@ __evict_pass(WT_SESSION_IMPL *session)
txn_global = &conn->txn_global;
/* Track whether pages are being evicted and progress is made. */
- pages_evicted = cache->pages_evict;
+ eviction_progress = cache->eviction_progress;
prev_oldest_id = txn_global->oldest_id;
WT_CLEAR(prev);
@@ -705,7 +716,7 @@ __evict_pass(WT_SESSION_IMPL *session)
* treat the cache as stuck and start rolling back
* transactions and writing updates to the lookaside table.
*/
- if (pages_evicted == cache->pages_evict) {
+ if (eviction_progress == cache->eviction_progress) {
if (WT_TIMEDIFF_MS(now, prev) >= 20 &&
F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD |
WT_CACHE_EVICT_DIRTY_HARD)) {
@@ -757,7 +768,7 @@ __evict_pass(WT_SESSION_IMPL *session)
cache->evict_aggressive_score);
}
loop = 0;
- pages_evicted = cache->pages_evict;
+ eviction_progress = cache->eviction_progress;
}
}
return (0);
@@ -959,7 +970,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session)
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
uint64_t delta_msec, delta_pages;
- uint64_t pgs_evicted_cur, pgs_evicted_persec_cur, time_diff;
+ uint64_t eviction_progress, eviction_progress_rate, time_diff;
int32_t cur_threads, i, target_threads, thread_surplus;
conn = S2C(session);
@@ -972,16 +983,16 @@ __evict_tune_workers(WT_SESSION_IMPL *session)
if (conn->evict_threads_max == conn->evict_threads_min)
return;
- pgs_evicted_cur = 0;
+ eviction_progress_rate = 0;
__wt_epoch(session, &current_time);
- time_diff = WT_TIMEDIFF_MS(current_time, conn->evict_tune_last_time);
+ time_diff = WT_TIMEDIFF_MS(current_time, cache->evict_tune_last_time);
/*
* If we have reached the stable state and have not run long enough to
* surpass the forced re-tuning threshold, return.
*/
- if (conn->evict_tune_stable) {
+ if (cache->evict_tune_stable) {
if (time_diff < EVICT_FORCE_RETUNE)
return;
@@ -989,11 +1000,11 @@ __evict_tune_workers(WT_SESSION_IMPL *session)
* Stable state was reached a long time ago. Let's re-tune.
* Reset all the state.
*/
- conn->evict_tune_stable = false;
- conn->evict_tune_last_action_time.tv_sec = 0;
- conn->evict_tune_pgs_last = 0;
- conn->evict_tune_num_points = 0;
- conn->evict_tune_pg_sec_max = 0;
+ cache->evict_tune_stable = false;
+ cache->evict_tune_last_action_time.tv_sec = 0;
+ cache->evict_tune_progress_last = 0;
+ cache->evict_tune_num_points = 0;
+ cache->evict_tune_progress_rate_max = 0;
/* Reduce the number of eviction workers by one */
thread_surplus =
@@ -1017,10 +1028,10 @@ __evict_tune_workers(WT_SESSION_IMPL *session)
return;
/*
- * Measure the number of evicted pages so far. Eviction rate correlates
- * to performance, so this is our metric of success.
+ * Measure the evicted progress so far. Eviction rate correlates to
+ * performance, so this is our metric of success.
*/
- pgs_evicted_cur = cache->pages_evict;
+ eviction_progress = cache->eviction_progress;
/*
* If we have recorded the number of pages evicted at the end of
@@ -1029,21 +1040,21 @@ __evict_tune_workers(WT_SESSION_IMPL *session)
* measurement interval.
* Otherwise, we just record the number of evicted pages and return.
*/
- if (conn->evict_tune_pgs_last == 0)
+ if (cache->evict_tune_progress_last == 0)
goto done;
- delta_msec = WT_TIMEDIFF_MS(current_time, conn->evict_tune_last_time);
- delta_pages = pgs_evicted_cur - conn->evict_tune_pgs_last;
- pgs_evicted_persec_cur = (delta_pages * WT_THOUSAND) / delta_msec;
- conn->evict_tune_num_points++;
+ delta_msec = WT_TIMEDIFF_MS(current_time, cache->evict_tune_last_time);
+ delta_pages = eviction_progress - cache->evict_tune_progress_last;
+ eviction_progress_rate = (delta_pages * WT_THOUSAND) / delta_msec;
+ cache->evict_tune_num_points++;
/*
* Keep track of the maximum eviction throughput seen and the number
* of workers corresponding to that throughput.
*/
- if (pgs_evicted_persec_cur > conn->evict_tune_pg_sec_max) {
- conn->evict_tune_pg_sec_max = pgs_evicted_persec_cur;
- conn->evict_tune_workers_best =
+ if (eviction_progress_rate > cache->evict_tune_progress_rate_max) {
+ cache->evict_tune_progress_rate_max = eviction_progress_rate;
+ cache->evict_tune_workers_best =
conn->evict_threads.current_threads;
}
@@ -1057,8 +1068,8 @@ __evict_tune_workers(WT_SESSION_IMPL *session)
* we will go back to the best observed number of workers and
* settle into a stable state.
*/
- if (conn->evict_tune_num_points >= conn->evict_tune_datapts_needed) {
- if (conn->evict_tune_workers_best ==
+ if (cache->evict_tune_num_points >= cache->evict_tune_datapts_needed) {
+ if (cache->evict_tune_workers_best ==
conn->evict_threads.current_threads &&
conn->evict_threads.current_threads <
conn->evict_threads_max) {
@@ -1066,7 +1077,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session)
* Keep adding workers. We will check again
* at the next check point.
*/
- conn->evict_tune_datapts_needed += WT_MIN(
+ cache->evict_tune_datapts_needed += WT_MIN(
EVICT_TUNE_DATAPT_MIN,
(conn->evict_threads_max -
conn->evict_threads.current_threads) /
@@ -1079,7 +1090,7 @@ __evict_tune_workers(WT_SESSION_IMPL *session)
*/
thread_surplus =
(int32_t)conn->evict_threads.current_threads -
- (int32_t)conn->evict_tune_workers_best;
+ (int32_t)cache->evict_tune_workers_best;
for (i = 0; i < thread_surplus; i++) {
__wt_thread_group_stop_one(
@@ -1089,8 +1100,8 @@ __evict_tune_workers(WT_SESSION_IMPL *session)
}
WT_STAT_CONN_SET(session,
cache_eviction_stable_state_workers,
- conn->evict_tune_workers_best);
- conn->evict_tune_stable = true;
+ cache->evict_tune_workers_best);
+ cache->evict_tune_stable = true;
WT_STAT_CONN_SET(session, cache_eviction_active_workers,
conn->evict_threads.current_threads);
goto done;
@@ -1103,8 +1114,8 @@ __evict_tune_workers(WT_SESSION_IMPL *session)
* we must accumulate before deciding if we should keep adding workers
* or settle on a previously tried stable number of workers.
*/
- if (conn->evict_tune_last_action_time.tv_sec == 0)
- conn->evict_tune_datapts_needed = EVICT_TUNE_DATAPT_MIN;
+ if (cache->evict_tune_last_action_time.tv_sec == 0)
+ cache->evict_tune_datapts_needed = EVICT_TUNE_DATAPT_MIN;
if (F_ISSET(cache, WT_CACHE_EVICT_ALL)) {
cur_threads = (int32_t)conn->evict_threads.current_threads;
@@ -1121,14 +1132,14 @@ __evict_tune_workers(WT_SESSION_IMPL *session)
__wt_verbose(session,
WT_VERB_EVICTSERVER, "%s", "added worker thread");
}
- conn->evict_tune_last_action_time = current_time;
+ cache->evict_tune_last_action_time = current_time;
}
WT_STAT_CONN_SET(session, cache_eviction_active_workers,
conn->evict_threads.current_threads);
-done: conn->evict_tune_last_time = current_time;
- conn->evict_tune_pgs_last = pgs_evicted_cur;
+done: cache->evict_tune_last_time = current_time;
+ cache->evict_tune_progress_last = eviction_progress;
}
/*
@@ -2022,6 +2033,9 @@ __evict_get_ref(
uint32_t candidates;
bool is_app, server_only, urgent_ok;
+ *btreep = NULL;
+ *refp = NULL;
+
cache = S2C(session)->cache;
is_app = !F_ISSET(session, WT_SESSION_INTERNAL);
server_only = is_server && !WT_EVICT_HAS_WORKERS(session);
@@ -2029,8 +2043,6 @@ __evict_get_ref(
!WT_EVICT_HAS_WORKERS(session) ||
(is_app && __wt_cache_aggressive(session));
urgent_queue = cache->evict_urgent_queue;
- *btreep = NULL;
- *refp = NULL;
WT_STAT_CONN_INCR(session, cache_eviction_get_ref);
@@ -2255,7 +2267,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
WT_DECL_RET;
WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *txn_state;
- uint64_t init_evict_count, max_pages_evicted;
+ uint64_t initial_progress, max_progress;
bool timer;
conn = S2C(session);
@@ -2282,7 +2294,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
if (timer)
__wt_epoch(session, &enter);
- for (init_evict_count = cache->pages_evict;; ret = 0) {
+ for (initial_progress = cache->eviction_progress;; ret = 0) {
/*
* A pathological case: if we're the oldest transaction in the
* system and the eviction server is stuck trying to find space,
@@ -2307,12 +2319,12 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
if (!busy && txn_state->pinned_id != WT_TXN_NONE &&
txn_global->current != txn_global->oldest_id)
busy = true;
- max_pages_evicted = busy ? 5 : 20;
+ max_progress = busy ? 5 : 20;
/* See if eviction is still needed. */
if (!__wt_eviction_needed(session, busy, &pct_full) ||
- (pct_full < 100 &&
- cache->pages_evict > init_evict_count + max_pages_evicted))
+ (pct_full < 100 && cache->eviction_progress >
+ initial_progress + max_progress))
break;
/*
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h
index 3eb951f81ac..e1c3ba9ff3c 100644
--- a/src/third_party/wiredtiger/src/include/api.h
+++ b/src/third_party/wiredtiger/src/include/api.h
@@ -101,6 +101,15 @@
API_END(s, ret); \
return ((ret) == WT_NOTFOUND ? ENOENT : (ret))
+/*
+ * Used in cases where transaction error should not be set, but the error is
+ * returned from the API. Success is passed to the API_END macro. If the
+ * method is about to return WT_NOTFOUND map it to ENOENT.
+ */
+#define API_END_RET_NO_TXN_ERROR(s, ret) \
+ API_END(s, 0); \
+ return ((ret) == WT_NOTFOUND ? ENOENT : (ret))
+
#define CONNECTION_API_CALL(conn, s, n, config, cfg) \
s = (conn)->default_session; \
API_CALL(s, WT_CONNECTION, n, NULL, config, cfg)
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 8ba6a240ace..158fcf87d29 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -589,8 +589,9 @@ struct __wt_page {
#define WT_PAGE_DISK_MAPPED 0x04 /* Disk image in mapped memory */
#define WT_PAGE_EVICT_LRU 0x08 /* Page is on the LRU queue */
#define WT_PAGE_OVERFLOW_KEYS 0x10 /* Page has overflow keys */
-#define WT_PAGE_SPLIT_INSERT 0x20 /* A leaf page was split for append */
-#define WT_PAGE_UPDATE_IGNORE 0x40 /* Ignore updates on page discard */
+#define WT_PAGE_READ_NO_EVICT 0x20 /* Page read with eviction disabled */
+#define WT_PAGE_SPLIT_INSERT 0x40 /* A leaf page was split for append */
+#define WT_PAGE_UPDATE_IGNORE 0x80 /* Ignore updates on page discard */
uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */
uint8_t unused[2]; /* Unused padding */
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 8803f3b907d..f2948bfc90f 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -170,20 +170,16 @@ __wt_cache_decr_check_size(
if (__wt_atomic_subsize(vp, v) < WT_EXABYTE)
return;
-#ifdef HAVE_DIAGNOSTIC
- (void)__wt_atomic_addsize(vp, v);
-
- {
- static bool first = true;
+ /*
+ * It's a bug if this accounting underflowed but allow the application
+ * to proceed - the consequence is we use more cache than configured.
+ */
+ *vp = 0;
+ __wt_errx(session,
+ "%s went negative with decrement of %" WT_SIZET_FMT, fld, v);
- if (!first)
- return;
- __wt_errx(session, "%s underflow: decrementing %" WT_SIZET_FMT, fld, v);
- first = false;
- }
-#else
- WT_UNUSED(fld);
- WT_UNUSED(session);
+#ifdef HAVE_DIAGNOSTIC
+ __wt_abort(session);
#endif
}
@@ -198,40 +194,20 @@ __wt_cache_decr_check_uint64(
if (__wt_atomic_sub64(vp, v) < WT_EXABYTE)
return;
-#ifdef HAVE_DIAGNOSTIC
- (void)__wt_atomic_add64(vp, v);
-
- {
- static bool first = true;
+ /*
+ * It's a bug if this accounting underflowed but allow the application
+ * to proceed - the consequence is we use more cache than configured.
+ */
+ *vp = 0;
+ __wt_errx(session,
+ "%s went negative with decrement of %" WT_SIZET_FMT, fld, v);
- if (!first)
- return;
- __wt_errx(session, "%s underflow: decrementing %" WT_SIZET_FMT, fld, v);
- first = false;
- }
-#else
- WT_UNUSED(fld);
- WT_UNUSED(session);
+#ifdef HAVE_DIAGNOSTIC
+ __wt_abort(session);
#endif
}
/*
- * __wt_cache_decr_zero_uint64 --
- * Decrement a uint64_t cache value and zero it on underflow.
- */
-static inline void
-__wt_cache_decr_zero_uint64(
- WT_SESSION_IMPL *session, uint64_t *vp, size_t v, const char *fld)
-{
- if (__wt_atomic_sub64(vp, v) < WT_EXABYTE)
- return;
-
- __wt_errx(
- session, "%s went negative: decrementing %" WT_SIZET_FMT, fld, v);
- *vp = 0;
-}
-
-/*
* __wt_cache_page_byte_dirty_decr --
* Decrement the page's dirty byte count, guarding from underflow.
*/
@@ -368,10 +344,10 @@ __wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page)
cache = S2C(session)->cache;
if (WT_PAGE_IS_INTERNAL(page))
- __wt_cache_decr_zero_uint64(session,
+ __wt_cache_decr_check_uint64(session,
&cache->pages_dirty_intl, 1, "dirty internal page count");
else
- __wt_cache_decr_zero_uint64(session,
+ __wt_cache_decr_check_uint64(session,
&cache->pages_dirty_leaf, 1, "dirty leaf page count");
modify = page->modify;
@@ -438,33 +414,41 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page, bool rewrite)
/* Update the cache's dirty-byte count. */
if (modify != NULL && modify->bytes_dirty != 0) {
if (WT_PAGE_IS_INTERNAL(page)) {
- __wt_cache_decr_zero_uint64(session,
+ __wt_cache_decr_check_uint64(session,
&btree->bytes_dirty_intl,
modify->bytes_dirty, "WT_BTREE.bytes_dirty_intl");
- __wt_cache_decr_zero_uint64(session,
+ __wt_cache_decr_check_uint64(session,
&cache->bytes_dirty_intl,
modify->bytes_dirty, "WT_CACHE.bytes_dirty_intl");
} else if (!btree->lsm_primary) {
- __wt_cache_decr_zero_uint64(session,
+ __wt_cache_decr_check_uint64(session,
&btree->bytes_dirty_leaf,
modify->bytes_dirty, "WT_BTREE.bytes_dirty_leaf");
- __wt_cache_decr_zero_uint64(session,
+ __wt_cache_decr_check_uint64(session,
&cache->bytes_dirty_leaf,
modify->bytes_dirty, "WT_CACHE.bytes_dirty_leaf");
}
}
- /* Update pages and bytes evicted. */
+ /* Update bytes and pages evicted. */
(void)__wt_atomic_add64(&cache->bytes_evict, page->memory_footprint);
+ (void)__wt_atomic_addv64(&cache->pages_evicted, 1);
/*
- * Don't count rewrites as eviction: there's no guarantee we are making
- * real progress.
+ * Track if eviction makes progress. This is used in various places to
+ * determine whether eviction is stuck.
+ *
+ * We don't count rewrites as progress.
+ *
+ * Further, if a page was read with eviction disabled, we don't count
+ * evicting a it as progress. Since disabling eviction allows pages to
+ * be read even when the cache is full, we want to avoid workloads
+ * repeatedly reading a page with eviction disabled (e.g., from the
+ * metadata), then evicting that page and deciding that is a sign that
+ * eviction is unstuck.
*/
- if (rewrite)
- (void)__wt_atomic_sub64(&cache->pages_inmem, 1);
- else
- (void)__wt_atomic_addv64(&cache->pages_evict, 1);
+ if (!rewrite && !F_ISSET_ATOMIC(page, WT_PAGE_READ_NO_EVICT))
+ (void)__wt_atomic_addv64(&cache->eviction_progress, 1);
}
/*
diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h
index 9d7489732aa..456cb0382e4 100644
--- a/src/third_party/wiredtiger/src/include/cache.h
+++ b/src/third_party/wiredtiger/src/include/cache.h
@@ -64,8 +64,7 @@ struct __wt_cache {
uint64_t bytes_dirty_leaf;
uint64_t pages_dirty_leaf;
uint64_t bytes_evict; /* Bytes/pages discarded by eviction */
- volatile uint64_t pages_evict;
- uint64_t pages_evicted; /* Pages evicted during a pass */
+ uint64_t pages_evicted;
uint64_t bytes_image; /* Bytes of disk images */
uint64_t bytes_inmem; /* Bytes/pages in memory */
uint64_t pages_inmem;
@@ -73,6 +72,9 @@ struct __wt_cache {
uint64_t bytes_read; /* Bytes read into memory */
uint64_t bytes_written;
+ volatile uint64_t eviction_progress; /* Eviction progress count */
+ uint64_t last_eviction_progress;/* Tracked eviction progress */
+
uint64_t app_waits; /* User threads waited for cache */
uint64_t app_evicts; /* Pages evicted by user threads */
uint64_t server_evicts; /* Pages evicted by server thread */
@@ -111,6 +113,18 @@ struct __wt_cache {
u_int overhead_pct; /* Cache percent adjustment */
/*
+ * Eviction thread tuning information.
+ */
+ uint32_t evict_tune_datapts_needed; /* Data needed to tune */
+ struct timespec evict_tune_last_action_time;/* Time of last action */
+ struct timespec evict_tune_last_time; /* Time of last check */
+ uint32_t evict_tune_num_points; /* Number of values tried */
+ uint64_t evict_tune_progress_last; /* Progress counter */
+ uint64_t evict_tune_progress_rate_max; /* Max progress rate */
+ bool evict_tune_stable; /* Are we stable? */
+ uint32_t evict_tune_workers_best; /* Best performing value */
+
+ /*
* Pass interrupt counter.
*/
volatile uint32_t pass_intr; /* Interrupt eviction pass. */
diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i
index d51e58e471b..33b1bf2a7af 100644
--- a/src/third_party/wiredtiger/src/include/cache.i
+++ b/src/third_party/wiredtiger/src/include/cache.i
@@ -128,7 +128,7 @@ __wt_page_evict_soon(WT_SESSION_IMPL *session, WT_REF *ref)
static inline uint64_t
__wt_cache_pages_inuse(WT_CACHE *cache)
{
- return (cache->pages_inmem - cache->pages_evict);
+ return (cache->pages_inmem - cache->pages_evicted);
}
/*
@@ -194,13 +194,12 @@ __wt_cache_bytes_other(WT_CACHE *cache)
{
uint64_t bytes_image, bytes_inmem;
- bytes_image = cache->bytes_image;
- bytes_inmem = cache->bytes_inmem;
-
/*
- * The reads above could race with changes to the values, so protect
- * against underflow.
+ * Reads can race with changes to the values, so only read once and
+ * check for the race.
*/
+ bytes_image = *(volatile uint64_t *)&cache->bytes_image;
+ bytes_inmem = *(volatile uint64_t *)&cache->bytes_inmem;
return ((bytes_image > bytes_inmem) ? 0 :
__wt_cache_bytes_plus_overhead(cache, bytes_inmem - bytes_image));
}
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index 3f890a50d2b..2fa440e4e08 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -289,15 +289,6 @@ struct __wt_connection_impl {
uint32_t evict_threads_max;/* Max eviction threads */
uint32_t evict_threads_min;/* Min eviction threads */
- uint32_t evict_tune_datapts_needed;/* Data needed to tune */
- struct timespec evict_tune_last_action_time;/* Time of last action */
- struct timespec evict_tune_last_time; /* Time of last check */
- uint32_t evict_tune_num_points; /* Number of values tried */
- uint64_t evict_tune_pgs_last; /* Number of pages evicted */
- uint64_t evict_tune_pg_sec_max; /* Max throughput encountered */
- bool evict_tune_stable; /* Are we stable? */
- uint32_t evict_tune_workers_best;/* Best performing value */
-
#define WT_STATLOG_FILENAME "WiredTigerStat.%d.%H"
WT_SESSION_IMPL *stat_session; /* Statistics log session */
wt_thread_t stat_tid; /* Statistics log thread */
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 71bda687659..fc0b5135882 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -203,7 +203,7 @@ extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *
extern void __wt_las_stats_update(WT_SESSION_IMPL *session);
extern int __wt_las_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_las_cursor_open(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_cursor_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags);
extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_insert_block(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CURSOR *cursor, WT_MULTI *multi, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -821,7 +821,7 @@ extern int __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name,
extern int __wt_txn_global_query_timestamp( WT_SESSION_IMPL *session, char *hex_timestamp, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_timestamp_validate(WT_SESSION_IMPL *session, wt_timestamp_t *ts, WT_CONFIG_ITEM *cval, bool cmp_oldest, bool cmp_stable, bool cmp_commit) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t *ts, WT_CONFIG_ITEM *cval, bool cmp_oldest, bool cmp_stable, bool cmp_commit) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session);
extern void __wt_txn_clear_commit_timestamp(WT_SESSION_IMPL *session);
diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h
index ec0ef0a5311..dd98234f9e2 100644
--- a/src/third_party/wiredtiger/src/include/flags.h
+++ b/src/third_party/wiredtiger/src/include/flags.h
@@ -9,8 +9,8 @@
#define WT_CONN_EVICTION_NO_LOOKASIDE 0x00000010
#define WT_CONN_EVICTION_RUN 0x00000020
#define WT_CONN_IN_MEMORY 0x00000040
-#define WT_CONN_LAS_OPEN 0x00000080
-#define WT_CONN_LEAK_MEMORY 0x00000100
+#define WT_CONN_LEAK_MEMORY 0x00000080
+#define WT_CONN_LOOKASIDE_OPEN 0x00000100
#define WT_CONN_LSM_MERGE 0x00000200
#define WT_CONN_PANIC 0x00000400
#define WT_CONN_READONLY 0x00000800
diff --git a/src/third_party/wiredtiger/src/include/serial.i b/src/third_party/wiredtiger/src/include/serial.i
index 5d2c9a22058..c5758ee605a 100644
--- a/src/third_party/wiredtiger/src/include/serial.i
+++ b/src/third_party/wiredtiger/src/include/serial.i
@@ -159,12 +159,12 @@ __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
WT_DECL_RET;
WT_INSERT *new_ins = *new_insp;
- /* Check for page write generation wrap. */
- WT_RET(__page_write_gen_wrapped_check(page));
-
/* Clear references to memory we now own and must free on error. */
*new_insp = NULL;
+ /* Check for page write generation wrap. */
+ WT_RET(__page_write_gen_wrapped_check(page));
+
/*
* Acquire the page's spinlock unless we already have exclusive access.
* Then call the worker function.
@@ -210,12 +210,12 @@ __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
u_int i;
bool simple;
- /* Check for page write generation wrap. */
- WT_RET(__page_write_gen_wrapped_check(page));
-
/* Clear references to memory we now own and must free on error. */
*new_insp = NULL;
+ /* Check for page write generation wrap. */
+ WT_RET(__page_write_gen_wrapped_check(page));
+
simple = true;
for (i = 0; i < skipdepth; i++)
if (new_ins->next[i] == NULL)
@@ -265,12 +265,12 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
WT_UPDATE *obsolete, *upd = *updp;
uint64_t txn;
- /* Check for page write generation wrap. */
- WT_RET(__page_write_gen_wrapped_check(page));
-
/* Clear references to memory we now own and must free on error. */
*updp = NULL;
+ /* Check for page write generation wrap. */
+ WT_RET(__page_write_gen_wrapped_check(page));
+
/*
* All structure setup must be flushed before the structure is entered
* into the list. We need a write barrier here, our callers depend on
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 26dcd01fe5e..b0b71dbb3d0 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -152,9 +152,10 @@ __txn_next_op(WT_SESSION_IMPL *session, WT_TXN_OP **opp)
{
WT_TXN *txn;
- txn = &session->txn;
*opp = NULL;
+ txn = &session->txn;
+
/*
* We're about to perform an update.
* Make sure we have allocated a transaction ID.
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 830850f102b..b9801aedfbb 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -1705,8 +1705,12 @@ struct __wt_session {
* @config{priority, priority of the transaction for resolving
* conflicts. Transactions with higher values are less likely to
* abort., an integer between -100 and 100; default \c 0.}
- * @config{read_timestamp, read using the specified timestamp\, see @ref
- * transaction_timestamps., a string; default empty.}
+ * @config{read_timestamp, read using the specified timestamp. The
+ * supplied value should not be older than the current oldest timestamp.
+ * See @ref transaction_timestamps., a string; default empty.}
+ * @config{round_to_oldest, if read timestamp is earlier than oldest
+ * timestamp\, read timestamp will be rounded to oldest timestamp., a
+ * boolean flag; default \c false.}
* @config{snapshot, use a named\, in-memory snapshot\, see @ref
* transaction_named_snapshots., a string; default empty.}
* @config{sync, whether to sync log records when the transaction
@@ -1732,8 +1736,10 @@ struct __wt_session {
* @param session the session handle
* @configstart{WT_SESSION.commit_transaction, see dist/api_data.py}
* @config{commit_timestamp, set the commit timestamp for the current
- * transaction\, see @ref transaction_timestamps., a string; default
- * empty.}
+ * transaction. The supplied value should not be older than the first
+ * commit timestamp set for the current transaction. The value should
+ * also not be older than the current oldest and stable timestamps. See
+ * @ref transaction_timestamps., a string; default empty.}
* @config{sync, override whether to sync log records when the
* transaction commits\, inherited from ::wiredtiger_open \c
* transaction_sync. The \c background setting initiates a background
@@ -1775,8 +1781,10 @@ struct __wt_session {
* @param session the session handle
* @configstart{WT_SESSION.timestamp_transaction, see dist/api_data.py}
* @config{commit_timestamp, set the commit timestamp for the current
- * transaction\, see @ref transaction_timestamps., a string; default
- * empty.}
+ * transaction. The supplied value should not be older than the first
+ * commit timestamp set for the current transaction. The value should
+ * also not be older than the current oldest and stable timestamps. See
+ * @ref transaction_timestamps., a string; default empty.}
* @configend
* @errors
*/
@@ -2330,18 +2338,23 @@ struct __wt_connection {
* WT_CONNECTION::query_timestamp to ignore commit timestamps greater
* than the specified value until the next commit moves the tracked
* commit timestamp forwards. This is only intended for use where the
- * application is rolling back locally committed transactions. See @ref
- * transaction_timestamps., a string; default empty.}
+ * application is rolling back locally committed transactions. The
+ * supplied value should not be older than the current oldest and stable
+ * timestamps. See @ref transaction_timestamps., a string; default
+ * empty.}
* @config{oldest_timestamp, future commits and queries will be no
* earlier than the specified timestamp. Supplied values must be
- * monotonically increasing. See @ref transaction_timestamps., a
- * string; default empty.}
+ * monotonically increasing\, any attempt to set the value to older than
+ * the current is silently ignored. The supplied value should not be
+ * newer than the current stable timestamp. See @ref
+ * transaction_timestamps., a string; default empty.}
* @config{stable_timestamp, checkpoints will not include commits that
* are newer than the specified timestamp in tables configured with \c
* log=(enabled=false). Supplied values must be monotonically
- * increasing. The stable timestamp data stability only applies to
- * tables that are not being logged. See @ref transaction_timestamps.,
- * a string; default empty.}
+ * increasing\, any attempt to set the value to older than the current
+ * is silently ignored. The supplied value should not be older than the
+ * current oldest timestamp. See @ref transaction_timestamps., a
+ * string; default empty.}
* @configend
* @errors
*/
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index 89fe64c6f18..0b01b61ced3 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -345,14 +345,15 @@ __wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, bool *recp)
uint64_t dummy_txnid;
uint32_t dummy_fileid, dummy_optype, rectype;
- conn = S2C(session);
- log = conn->log;
-
/*
* Default is to run recovery always (regardless of whether this
* connection has logging enabled).
*/
*recp = true;
+
+ conn = S2C(session);
+ log = conn->log;
+
if (log == NULL)
return (0);
@@ -430,11 +431,11 @@ __wt_log_get_all_files(WT_SESSION_IMPL *session,
*filesp = NULL;
*countp = 0;
+ *maxid = 0;
id = 0;
log = S2C(session)->log;
- *maxid = 0;
/*
* These may be files needed by backup. Force the current slot
* to get written to the file.
@@ -1659,10 +1660,11 @@ __log_has_hole(WT_SESSION_IMPL *session,
size_t bufsz, rdlen;
char *buf, *zerobuf;
+ *hole = false;
+
conn = S2C(session);
log = conn->log;
remainder = log_size - offset;
- *hole = false;
/*
* It can be very slow looking for the last real record in the log
diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c
index f8b355dcde3..61dfb82083d 100644
--- a/src/third_party/wiredtiger/src/log/log_slot.c
+++ b/src/third_party/wiredtiger/src/log/log_slot.c
@@ -104,11 +104,11 @@ __log_slot_close(
int count;
#endif
+ *releasep = false;
+
WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SLOT));
- WT_ASSERT(session, releasep != NULL);
conn = S2C(session);
log = conn->log;
- *releasep = 0;
if (slot == NULL)
return (WT_NOTFOUND);
retry:
@@ -149,7 +149,7 @@ retry:
*/
WT_STAT_CONN_INCR(session, log_slot_closes);
if (WT_LOG_SLOT_DONE(new_state))
- *releasep = 1;
+ *releasep = true;
slot->slot_end_lsn = slot->slot_start_lsn;
/*
* A thread setting the unbuffered flag sets the unbuffered size after
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
index 5fb397e3db9..62f2c7795be 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
@@ -593,9 +593,9 @@ __wt_lsm_manager_pop_entry(
WT_LSM_MANAGER *manager;
WT_LSM_WORK_UNIT *entry;
+ *entryp = entry = NULL;
+
manager = &S2C(session)->lsm_manager;
- *entryp = NULL;
- entry = NULL;
/*
* Pop the entry off the correct queue based on our work type.
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
index 05e5fe5b07e..f6aea02e20d 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
@@ -77,6 +77,7 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session,
uint32_t i;
*chunkp = NULL;
+
chunk = evict_chunk = flush_chunk = NULL;
WT_ASSERT(session, lsm_tree->queue_ref > 0);
@@ -130,7 +131,6 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session,
}
err: __wt_lsm_tree_readunlock(session, lsm_tree);
-
*chunkp = chunk;
return (ret);
}
@@ -168,8 +168,8 @@ __wt_lsm_work_switch(
/* We've become responsible for freeing the work unit. */
entry = *entryp;
- *ran = false;
*entryp = NULL;
+ *ran = false;
if (entry->lsm_tree->need_switch) {
WT_WITH_SCHEMA_LOCK(session,
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_worker.c b/src/third_party/wiredtiger/src/lsm/lsm_worker.c
index 21838d13e79..8e89cf39099 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_worker.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_worker.c
@@ -54,6 +54,7 @@ __lsm_worker_general_op(
bool force;
*completed = false;
+
/*
* Return if this thread cannot process a bloom, drop or flush.
*/
diff --git a/src/third_party/wiredtiger/src/os_posix/os_dir.c b/src/third_party/wiredtiger/src/os_posix/os_dir.c
index 8f77aba5f96..205ca389f43 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_dir.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_dir.c
@@ -28,11 +28,10 @@ __wt_posix_directory_list(WT_FILE_SYSTEM *file_system,
int tret;
char **entries;
- session = (WT_SESSION_IMPL *)wt_session;
-
*dirlistp = NULL;
*countp = 0;
+ session = (WT_SESSION_IMPL *)wt_session;
dirp = NULL;
dirallocsz = 0;
entries = NULL;
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 4056722a13c..108d9cf15f9 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -1362,7 +1362,7 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
#endif
all_visible = *updp == first_txn_upd &&
(F_ISSET(r, WT_REC_VISIBLE_ALL) ?
- __wt_txn_visible_all(session, max_txn, timestampp) :
+ !uncommitted && __wt_txn_visible_all(session, max_txn, timestampp) :
__wt_txn_visible(session, max_txn, timestampp));
if (all_visible)
@@ -1423,6 +1423,12 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
check_original_value:
/*
+ * Paranoia: check that we didn't choose an update that has since been
+ * rolled back.
+ */
+ WT_ASSERT(session, *updp == NULL || (*updp)->txnid != WT_TXN_ABORTED);
+
+ /*
* Returning an update means the original on-page value might be lost,
* and that's a problem if there's a reader that needs it. There are
* two cases: any lookaside table eviction (because the backing disk
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index cc32766c9dc..fa33b55c936 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -445,17 +445,15 @@ err: if (cursor != NULL)
/*
* Opening a cursor on a non-existent data source will set ret to
- * either of ENOENT or WT_NOTFOUND at this point. However,
+ * either of ENOENT or WT_NOTFOUND at this point. However,
* applications may reasonably do this inside a transaction to check
* for the existence of a table or index.
*
- * Prefer WT_NOTFOUND here: that does not force running transactions to
- * roll back. It will be mapped back to ENOENT.
+ * Failure in opening a cursor should not set an error on the
+ * transaction and WT_NOTFOUND will be mapped to ENOENT.
*/
- if (ret == ENOENT)
- ret = WT_NOTFOUND;
- API_END_RET_NOTFOUND_MAP(session, ret);
+ API_END_RET_NO_TXN_ERROR(session, ret);
}
/*
@@ -1973,8 +1971,6 @@ int
__wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name,
bool open_metadata, uint32_t session_flags, WT_SESSION_IMPL **sessionp)
{
- WT_DECL_RET;
- WT_SESSION *wt_session;
WT_SESSION_IMPL *session;
*sessionp = NULL;
@@ -1991,22 +1987,6 @@ __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name,
*/
F_SET(session, session_flags | WT_SESSION_INTERNAL);
- /*
- * Optionally acquire a lookaside table cursor (or clear caller's flag).
- * Acquiring the lookaside table cursor requires various locks; we've
- * seen problems in the past where deadlocks happened because sessions
- * deadlocked getting the cursor late in the process. Be defensive,
- * get it now.
- */
- if (!F_ISSET(conn, WT_CONN_LAS_OPEN))
- F_CLR(session, WT_SESSION_LOOKASIDE_CURSOR);
- if (F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR) &&
- (ret = __wt_las_cursor_open(session, &session->las_cursor)) != 0) {
- wt_session = &session->iface;
- WT_TRET(wt_session->close(wt_session, NULL));
- return (ret);
- }
-
*sessionp = session;
return (0);
}
diff --git a/src/third_party/wiredtiger/src/support/thread_group.c b/src/third_party/wiredtiger/src/support/thread_group.c
index f5842bea572..ba0c57e9468 100644
--- a/src/third_party/wiredtiger/src/support/thread_group.c
+++ b/src/third_party/wiredtiger/src/support/thread_group.c
@@ -182,17 +182,15 @@ __thread_group_resize(
WT_ERR(__wt_calloc_one(session, &thread));
/*
* Threads get their own session and lookaside table cursor
- * if the lookaside table is open. Note that threads are
- * started during recovery, before the lookaside table is
- * created.
+ * (if the lookaside table is open).
*/
- session_flags = 0;
- if (LF_ISSET(WT_THREAD_CAN_WAIT))
- FLD_SET(session_flags, WT_SESSION_CAN_WAIT);
- if (LF_ISSET(WT_THREAD_LOOKASIDE))
- FLD_SET(session_flags, WT_SESSION_LOOKASIDE_CURSOR);
+ session_flags =
+ LF_ISSET(WT_THREAD_CAN_WAIT) ? WT_SESSION_CAN_WAIT : 0;
WT_ERR(__wt_open_internal_session(conn, group->name,
false, session_flags, &thread->session));
+ if (LF_ISSET(WT_THREAD_LOOKASIDE) &&
+ F_ISSET(conn, WT_CONN_LOOKASIDE_OPEN))
+ WT_ERR(__wt_las_cursor_open(thread->session));
if (LF_ISSET(WT_THREAD_PANIC_FAIL))
F_SET(thread, WT_THREAD_PANIC_FAIL);
thread->id = i;
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index cfdb7d26498..91771403e13 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -443,13 +443,65 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
if (cval.len > 0) {
#ifdef HAVE_TIMESTAMPS
wt_timestamp_t ts;
+ WT_TXN_GLOBAL *txn_global;
+ char timestamp_buf[2 * WT_TIMESTAMP_SIZE + 1];
+ bool round_to_oldest;
+ txn_global = &S2C(session)->txn_global;
WT_RET(__wt_txn_parse_timestamp(session, "read", &ts, &cval));
- WT_RET(__wt_timestamp_validate(session,
- &ts, &cval, true, false, false));
- __wt_timestamp_set(&txn->read_timestamp, &ts);
+
+ /*
+ * Read the configuration here to reduce the span of the
+ * critical section.
+ */
+ WT_RET(__wt_config_gets_def(session,
+ cfg, "round_to_oldest", 0, &cval));
+ round_to_oldest = cval.val;
+ /*
+ * This code is not using the timestamp validate function to
+ * avoid a race between checking and setting transaction
+ * timestamp.
+ */
+ __wt_readlock(session, &txn_global->rwlock);
+ if (__wt_timestamp_cmp(&ts, &txn_global->oldest_timestamp) < 0)
+ {
+ WT_RET(__wt_timestamp_to_hex_string(session,
+ timestamp_buf, &ts));
+ /*
+ * If given read timestamp is earlier than oldest
+ * timestamp then round the read timestamp to
+ * oldest timestamp.
+ */
+ if (round_to_oldest)
+ __wt_timestamp_set(&txn->read_timestamp,
+ &txn_global->oldest_timestamp);
+ else {
+ __wt_readunlock(session, &txn_global->rwlock);
+ WT_RET_MSG(session, EINVAL, "read timestamp "
+ "%s older than oldest timestamp",
+ timestamp_buf);
+ }
+ } else {
+ __wt_timestamp_set(&txn->read_timestamp, &ts);
+ /*
+ * Reset to avoid a verbose message as read
+ * timestamp is not rounded to oldest timestamp.
+ */
+ round_to_oldest = false;
+ }
+
__wt_txn_set_read_timestamp(session);
+ __wt_readunlock(session, &txn_global->rwlock);
txn->isolation = WT_ISO_SNAPSHOT;
+ if (round_to_oldest) {
+ /*
+ * This message is generated here to reduce the span of
+ * critical section.
+ */
+ __wt_verbose(session, WT_VERB_TIMESTAMP, "Read "
+ "timestamp %s : Rounded to oldest timestamp",
+ timestamp_buf);
+ }
#else
WT_RET_MSG(session, EINVAL, "read_timestamp requires a "
"version of WiredTiger built with timestamp support");
@@ -586,7 +638,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
#ifdef HAVE_TIMESTAMPS
WT_ERR(__wt_txn_parse_timestamp(session, "commit", &ts, &cval));
WT_ERR(__wt_timestamp_validate(session,
- &ts, &cval, true, true, true));
+ "commit", &ts, &cval, true, true, true));
__wt_timestamp_set(&txn->commit_timestamp, &ts);
__wt_txn_set_commit_timestamp(session);
#else
@@ -830,7 +882,9 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
case WT_TXN_OP_BASIC:
case WT_TXN_OP_BASIC_TS:
case WT_TXN_OP_INMEM:
- WT_ASSERT(session, op->u.upd->txnid == txn->id);
+ WT_ASSERT(session, op->u.upd->txnid == txn->id);
+ WT_ASSERT(session, S2C(session)->las_fileid == 0 ||
+ op->fileid != S2C(session)->las_fileid);
op->u.upd->txnid = WT_TXN_ABORTED;
break;
case WT_TXN_OP_REF:
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index 8f90afeb8b4..0201036684d 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -481,8 +481,9 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
* global stable and/or running transaction commit timestamp.
*/
int
-__wt_timestamp_validate(WT_SESSION_IMPL *session, wt_timestamp_t *ts,
- WT_CONFIG_ITEM *cval, bool cmp_oldest, bool cmp_stable, bool cmp_commit)
+__wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name,
+ wt_timestamp_t *ts, WT_CONFIG_ITEM *cval,
+ bool cmp_oldest, bool cmp_stable, bool cmp_commit)
{
WT_TXN *txn = &session->txn;
WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
@@ -503,12 +504,12 @@ __wt_timestamp_validate(WT_SESSION_IMPL *session, wt_timestamp_t *ts,
if (older_than_oldest_ts)
WT_RET_MSG(session, EINVAL,
- "commit timestamp %.*s older than oldest timestamp",
- (int)cval->len, cval->str);
+ "%s timestamp %.*s older than oldest timestamp",
+ name, (int)cval->len, cval->str);
if (older_than_stable_ts)
WT_RET_MSG(session, EINVAL,
- "commit timestamp %.*s older than stable timestamp",
- (int)cval->len, cval->str);
+ "%s timestamp %.*s older than stable timestamp",
+ name, (int)cval->len, cval->str);
/*
* Compare against the commit timestamp of the current transaction.
@@ -520,9 +521,9 @@ __wt_timestamp_validate(WT_SESSION_IMPL *session, wt_timestamp_t *ts,
WT_RET(__wt_timestamp_to_hex_string(
session, hex_timestamp, &txn->first_commit_timestamp));
WT_RET_MSG(session, EINVAL,
- "commit timestamp %.*s older than the first "
+ "%s timestamp %.*s older than the first "
"commit timestamp %s for this transaction",
- (int)cval->len, cval->str, hex_timestamp);
+ name, (int)cval->len, cval->str, hex_timestamp);
}
return (0);
@@ -554,7 +555,7 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
"to set a commit_timestamp");
WT_RET(__wt_txn_parse_timestamp(session, "commit", &ts, &cval));
WT_RET(__wt_timestamp_validate(session,
- &ts, &cval, true, true, true));
+ "commit", &ts, &cval, true, true, true));
__wt_timestamp_set(&txn->commit_timestamp, &ts);
__wt_txn_set_commit_timestamp(session);
#else
diff --git a/src/third_party/wiredtiger/test/csuite/scope/main.c b/src/third_party/wiredtiger/test/csuite/scope/main.c
index f913f75da53..e9035775ba5 100644
--- a/src/third_party/wiredtiger/test/csuite/scope/main.c
+++ b/src/third_party/wiredtiger/test/csuite/scope/main.c
@@ -335,7 +335,7 @@ main(int argc, char *argv[])
run(opts->conn, "file:file.ru", "key_format=r,value_format=u");
run(opts->conn, "lsm:lsm.SS", "key_format=S,value_format=S");
- run(opts->conn, "lsm:lsm.Su", "key_format=S,value_format=S");
+ run(opts->conn, "lsm:lsm.Su", "key_format=S,value_format=u");
run(opts->conn, "table:table.SS", "key_format=S,value_format=S");
run(opts->conn, "table:table.Su", "key_format=S,value_format=u");
diff --git a/src/third_party/wiredtiger/test/fops/file.c b/src/third_party/wiredtiger/test/fops/file.c
index 118845ab805..62f1cc8b5bf 100644
--- a/src/third_party/wiredtiger/test/fops/file.c
+++ b/src/third_party/wiredtiger/test/fops/file.c
@@ -36,16 +36,19 @@ obj_bulk(void)
WT_CURSOR *c;
WT_SESSION *session;
int ret;
+ bool create;
testutil_check(conn->open_session(conn, NULL, NULL, &session));
if (use_txn)
testutil_check(session->begin_transaction(session, NULL));
+ create = false;
if ((ret = session->create(session, uri, config)) != 0)
if (ret != EEXIST && ret != EBUSY)
testutil_die(ret, "session.create");
if (ret == 0) {
+ create = true;
__wt_yield();
if ((ret = session->open_cursor(
session, uri, NULL, "bulk", &c)) == 0) {
@@ -55,16 +58,12 @@ obj_bulk(void)
}
if (use_txn) {
- /*
- * As the operations are being performed concurrently,
- * return value can be ENOENT, EBUSY or EINVAL will set
- * error to transaction opened by session. In these
- * cases the transaction has to be aborted.
- */
- if (ret != ENOENT && ret != EBUSY && ret != EINVAL)
- ret = session->commit_transaction(session, NULL);
- else
+ /* If create fails, rollback else will commit.*/
+ if (!create)
ret = session->rollback_transaction(session, NULL);
+ else
+ ret = session->commit_transaction(session, NULL);
+
if (ret == EINVAL)
testutil_die(ret, "session.commit bulk");
}
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index 607dd43a8f3..4c5576654d2 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -499,8 +499,13 @@ commit_transaction(TINFO *tinfo, WT_SESSION *session)
testutil_check(
session->commit_transaction(session, config_buf));
- /* After the commit, update our last timestamp. */
- tinfo->timestamp = ts;
+ /*
+ * Update the thread's last-committed timestamp. Don't let the
+ * compiler re-order this statement, if we were to race with
+ * the timestamp thread, it might see our thread update before
+ * the transaction commit.
+ */
+ WT_PUBLISH(tinfo->timestamp, ts);
} else
testutil_check(session->commit_transaction(session, NULL));
++tinfo->commit;
diff --git a/src/third_party/wiredtiger/test/format/util.c b/src/third_party/wiredtiger/test/format/util.c
index 9ea44a29801..83ddf307cc9 100644
--- a/src/third_party/wiredtiger/test/format/util.c
+++ b/src/third_party/wiredtiger/test/format/util.c
@@ -607,7 +607,10 @@ timestamp(void *arg)
* once every 15 seconds.
*/
while (!g.workers_finished) {
- /* Find the lowest committed timestamp. */
+ /*
+ * Find the lowest committed timestamp. The timestamp thread
+ * starts before the operational threads, wait for them.
+ */
oldest_timestamp = UINT64_MAX;
for (i = 0; i < g.c_threads; ++i) {
tinfo = tinfo_list[i];
@@ -615,15 +618,20 @@ timestamp(void *arg)
tinfo->timestamp < oldest_timestamp)
oldest_timestamp = tinfo->timestamp;
}
+ if (oldest_timestamp == UINT64_MAX) {
+ __wt_sleep(1, 0);
+ continue;
+ }
/*
* Don't get more than 100 transactions or more than 15 seconds
* out of date.
*/
- if (oldest_timestamp >= g.timestamp ||
- g.timestamp - oldest_timestamp < 100) {
+ WT_READ_BARRIER();
+ testutil_assert(oldest_timestamp <= g.timestamp);
+ if (g.timestamp - oldest_timestamp < 100) {
__wt_seconds((WT_SESSION_IMPL *)session, &now);
- if (g.timestamp == 0 || difftime(now, last) < 15) {
+ if (difftime(now, last) < 15) {
__wt_sleep(1, 0);
continue;
}
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp03.py b/src/third_party/wiredtiger/test/suite/test_timestamp03.py
index 1a2511ea6ee..c340d258d62 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp03.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp03.py
@@ -61,8 +61,8 @@ class test_timestamp03(wttest.WiredTigerTestCase, suite_subprocess):
conncfg = [
('nolog', dict(conn_config='create', using_log=False)),
- ('V1', dict(conn_config='create,log=(enabled),compatibility=(release="2.9")', using_log=True)),
- ('V2', dict(conn_config='create,log=(enabled)', using_log=True)),
+ ('V1', dict(conn_config='create,log=(archive=false,enabled),compatibility=(release="2.9")', using_log=True)),
+ ('V2', dict(conn_config='create,log=(archive=false,enabled)', using_log=True)),
]
scenarios = make_scenarios(types, ckpt, conncfg)
@@ -245,7 +245,6 @@ class test_timestamp03(wttest.WiredTigerTestCase, suite_subprocess):
# Make sure a timestamp cursor is the last one to update. This
# tests the scenario for a bug we found where recovery replayed
# the last record written into the log.
- #
cur_nots_log[k] = self.value2
cur_nots_nolog[k] = self.value2
self.session.begin_transaction()
@@ -269,6 +268,24 @@ class test_timestamp03(wttest.WiredTigerTestCase, suite_subprocess):
self.check(self.session, 'read_timestamp=' + old_ts,
self.table_nots_nolog, dict((k, self.value2) for k in orig_keys))
+ # Scenario: 4a
+ # This scenario is same as earlier one with read_timestamp earlier than
+ # oldest_timestamp and using the option of round_to_oldest
+ earlier_ts = timestamp_str(90)
+ self.check(self.session,
+ 'read_timestamp=' + earlier_ts +',round_to_oldest=true',
+ self.table_ts_log, dict((k, self.value) for k in orig_keys))
+ self.check(self.session,
+ 'read_timestamp=' + earlier_ts +',round_to_oldest=true',
+ self.table_ts_nolog, dict((k, self.value) for k in orig_keys))
+ # Tables not using the timestamps should see updated values (i.e. value2).
+ self.check(self.session,
+ 'read_timestamp=' + earlier_ts +',round_to_oldest=true',
+ self.table_nots_log, dict((k, self.value2) for k in orig_keys))
+ self.check(self.session,
+ 'read_timestamp=' + earlier_ts +',round_to_oldest=true',
+ self.table_nots_nolog, dict((k, self.value2) for k in orig_keys))
+
# Scenario: 5
# Check that we see the updated values as per the timestamp.
# Construct expected values.
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp05.py b/src/third_party/wiredtiger/test/suite/test_timestamp05.py
index f145184146c..e8433c27a1e 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp05.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp05.py
@@ -30,7 +30,6 @@
# Timestamps: make sure they don't end up in metadata
#
-from helper import copy_wiredtiger_home
import random
from suite_subprocess import suite_subprocess
import wiredtiger, wttest
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp06.py b/src/third_party/wiredtiger/test/suite/test_timestamp06.py
index 27301cc5ba4..ab4440656df 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp06.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp06.py
@@ -57,8 +57,8 @@ class test_timestamp06(wttest.WiredTigerTestCase, suite_subprocess):
conncfg = [
('nolog', dict(conn_config='create', using_log=False)),
- ('V1', dict(conn_config='create,log=(enabled),compatibility=(release="2.9")', using_log=True)),
- ('V2', dict(conn_config='create,log=(enabled)', using_log=True)),
+ ('V1', dict(conn_config='create,log=(archive=false,enabled),compatibility=(release="2.9")', using_log=True)),
+ ('V2', dict(conn_config='create,log=(archive=false,enabled)', using_log=True)),
]
scenarios = make_scenarios(conncfg, types, ckpt)
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp07.py b/src/third_party/wiredtiger/test/suite/test_timestamp07.py
index ffbe1e314d1..214cc2ee164 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp07.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp07.py
@@ -51,7 +51,7 @@ class test_timestamp07(wttest.WiredTigerTestCase, suite_subprocess):
conncfg = [
('nolog', dict(conn_config='create,cache_size=1M', using_log=False)),
- ('log', dict(conn_config='create,log=(enabled),cache_size=1M', using_log=True)),
+ ('log', dict(conn_config='create,log=(archive=false,enabled),cache_size=1M', using_log=True)),
]
nkeys = [
diff --git a/src/third_party/wiredtiger/test/suite/test_txn12.py b/src/third_party/wiredtiger/test/suite/test_txn12.py
index a0ecfb42bdb..21065083633 100644
--- a/src/third_party/wiredtiger/test/suite/test_txn12.py
+++ b/src/third_party/wiredtiger/test/suite/test_txn12.py
@@ -51,7 +51,8 @@ class test_txn12(wttest.WiredTigerTestCase, suite_subprocess):
msg = '/next_random.*boolean/'
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda:session.open_cursor(self.uri, None, "next_random=bar"), msg)
- # This commit should succeed as we have done no writes.
+ # This commit should succeed as open cursor should not set transaction
+ # error.
session.commit_transaction()
# Create a read/write transaction.
@@ -60,9 +61,9 @@ class test_txn12(wttest.WiredTigerTestCase, suite_subprocess):
c[123] = 123
self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
lambda:session.open_cursor(self.uri, None, "next_random=bar"), msg)
- # This commit should fail as we have written something
- self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
- lambda:session.commit_transaction(), '/requires rollback/')
+ # This commit should succeed as open cursor should not set transaction
+ # error.
+ session.commit_transaction()
if __name__ == '__main__':
wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_txn18.py b/src/third_party/wiredtiger/test/suite/test_txn18.py
new file mode 100644
index 00000000000..ec3cc7bae00
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_txn18.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2017 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_txn18.py
+# Transactions: test recovery settings
+#
+
+import fnmatch, os, shutil, time
+from suite_subprocess import suite_subprocess
+import wiredtiger, wttest
+
+class test_txn18(wttest.WiredTigerTestCase, suite_subprocess):
+ t1 = 'table:test_txn18'
+ create_params = 'key_format=i,value_format=i'
+ conn_config = 'log=(archive=false,enabled,file_max=100K),' + \
+ 'transaction_sync=(method=dsync,enabled)'
+ conn_recerror = conn_config + ',log=(recover=error)'
+ conn_recon = conn_config + ',log=(recover=on)'
+
+ def simulate_crash(self, olddir, newdir):
+ ''' Simulate a crash from olddir and restart in newdir. '''
+ # with the connection still open, copy files to new directory
+ shutil.rmtree(newdir, ignore_errors=True)
+ os.mkdir(newdir)
+ for fname in os.listdir(olddir):
+ fullname = os.path.join(olddir, fname)
+ # Skip lock file on Windows since it is locked
+ if os.path.isfile(fullname) and \
+ "WiredTiger.lock" not in fullname and \
+ "Tmplog" not in fullname and \
+ "Preplog" not in fullname:
+ shutil.copy(fullname, newdir)
+
+ def test_recovery(self):
+ ''' Run the recovery settings '''
+
+ # Here's the strategy:
+ # - Create a table (t1).
+ # - Insert data into t1.
+ # - Simulate a crash.
+ # - Make recovery run with recovery=error
+ # and make sure it detects an error since recovery is needed
+ # - Make recovery run with recovery=on.
+ # - Do a clean shutdown and restart with recovery=error
+ # and make sure is successful.
+ #
+ # If we aren't tracking file IDs properly, it's possible that
+ # we'd end up apply the log records for t2 to table t1.
+ self.session.create(self.t1, self.create_params)
+ #
+ # Since we're logging, we need to flush out the meta-data file
+ # from the create.
+ self.session.checkpoint()
+ c = self.session.open_cursor(self.t1, None, None)
+ for i in range(10000):
+ c[i] = i + 1
+ c.close()
+ olddir = "."
+ newdir = "RESTART"
+ errdir = "ERROR"
+ self.simulate_crash(olddir, errdir)
+ self.simulate_crash(olddir, newdir)
+ # close the original connection
+ self.close_conn()
+ # Trying to open the error directory with recover=error should return an error.
+ msg = '/recovery must be run/'
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda:self.wiredtiger_open(errdir, self.conn_recerror), msg)
+
+ # If recover=error is run on the directory and returns an error,
+ # make sure when we subsequenty open with recover=on it properly
+ # recovers all the data.
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda:self.wiredtiger_open(newdir, self.conn_recerror), msg)
+
+ # Opening with recover=on should succeed.
+ self.conn = self.wiredtiger_open(newdir, self.conn_recon)
+ # Make sure the data we added originally is there
+ self.session = self.setUpSessionOpen(self.conn)
+ c = self.session.open_cursor(self.t1, None, None)
+ i = 0
+ for key, value in c:
+ self.assertEqual(i, key)
+ self.assertEqual(i+1, value)
+ i += 1
+ self.assertEqual(i, 10000)
+ c.close()
+ self.close_conn()
+ # Reopening with recover=error after a clean shutdown should succeed.
+ self.conn = self.wiredtiger_open(newdir, self.conn_recerror)
+
+if __name__ == '__main__':
+ wttest.run()