summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexander.gorrod@mongodb.com>2017-11-13 08:36:56 +1100
committerAlex Gorrod <alexander.gorrod@mongodb.com>2017-11-13 08:36:56 +1100
commit3a8316e86e9c7cd379679d8530ecc54ad9bdf5c1 (patch)
treecdf913c7523c32f4a2441e05062d2150097b4d8c
parent0a2f8f6ad756189263d050b29f69bc57b45b9816 (diff)
parentd56f8dc481f3250b531273d0cd376f57df324914 (diff)
downloadmongo-3a8316e86e9c7cd379679d8530ecc54ad9bdf5c1.tar.gz
Merge branch 'develop' into mongodb-3.6
-rw-r--r--dist/api_data.py2
-rw-r--r--dist/flags.py10
-rw-r--r--dist/s_define.list2
-rw-r--r--dist/stat_data.py2
-rw-r--r--src/btree/bt_read.c67
-rw-r--r--src/btree/bt_split.c15
-rw-r--r--src/btree/bt_sync.c1
-rw-r--r--src/cache/cache_las.c513
-rw-r--r--src/config/config_def.c30
-rw-r--r--src/conn/conn_api.c4
-rw-r--r--src/conn/conn_cache.c6
-rw-r--r--src/conn/conn_handle.c2
-rw-r--r--src/conn/conn_sweep.c36
-rw-r--r--src/cursor/cur_join.c3
-rw-r--r--src/cursor/cur_std.c3
-rw-r--r--src/cursor/cur_table.c23
-rw-r--r--src/evict/evict_file.c7
-rw-r--r--src/evict/evict_lru.c56
-rw-r--r--src/evict/evict_page.c11
-rw-r--r--src/include/api.h30
-rw-r--r--src/include/btmem.h15
-rw-r--r--src/include/btree.h2
-rw-r--r--src/include/btree.i24
-rw-r--r--src/include/cache.h43
-rw-r--r--src/include/cache.i20
-rw-r--r--src/include/connection.h17
-rw-r--r--src/include/extern.h7
-rw-r--r--src/include/extern_posix.h3
-rw-r--r--src/include/extern_win.h3
-rw-r--r--src/include/flags.h84
-rw-r--r--src/include/schema.h2
-rw-r--r--src/include/session.h6
-rw-r--r--src/include/stat.h2
-rw-r--r--src/include/wiredtiger.in50
-rw-r--r--src/lsm/lsm_merge.c9
-rw-r--r--src/lsm/lsm_tree.c12
-rw-r--r--src/lsm/lsm_work_unit.c8
-rw-r--r--src/os_posix/os_map.c2
-rw-r--r--src/os_posix/os_thread.c24
-rw-r--r--src/os_win/os_thread.c12
-rw-r--r--src/reconcile/rec_write.c16
-rw-r--r--src/schema/schema_open.c2
-rw-r--r--src/session/session_api.c9
-rw-r--r--src/session/session_compact.c22
-rw-r--r--src/support/err.c2
-rw-r--r--src/support/stat.c7
-rw-r--r--src/txn/txn.c41
-rw-r--r--src/txn/txn_ckpt.c13
-rw-r--r--src/txn/txn_rollback_to_stable.c6
-rw-r--r--src/txn/txn_timestamp.c55
-rw-r--r--test/checkpoint/checkpointer.c2
-rw-r--r--test/checkpoint/workers.c2
-rw-r--r--test/csuite/timestamp_abort/main.c15
-rw-r--r--test/csuite/wt2719_reconfig/main.c1
-rw-r--r--test/cursor_order/cursor_order_ops.c4
-rw-r--r--test/format/util.c9
-rw-r--r--test/thread/rw.c4
57 files changed, 1013 insertions, 365 deletions
diff --git a/dist/api_data.py b/dist/api_data.py
index 3d6d4712413..a8b1a30a333 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -561,6 +561,7 @@ connection_runtime_config = [
'fileops',
'handleops',
'log',
+ 'lookaside',
'lookaside_activity',
'lsm',
'lsm_manager',
@@ -575,7 +576,6 @@ connection_runtime_config = [
'salvage',
'shared_cache',
'split',
- 'temporary',
'thread_group',
'timestamp',
'transaction',
diff --git a/dist/flags.py b/dist/flags.py
index 21fd0756435..28c91486e1a 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -23,11 +23,12 @@ flags = {
],
'page_read' : [
'READ_CACHE',
+ 'READ_IGNORE_CACHE_SIZE',
'READ_LOOKASIDE',
'READ_NOTFOUND_OK',
'READ_NO_EMPTY',
- 'READ_NO_EVICT',
'READ_NO_GEN',
+ 'READ_NO_SPLIT',
'READ_NO_WAIT',
'READ_PREV',
'READ_RESTART_OK',
@@ -74,6 +75,7 @@ flags = {
'VERB_HANDLEOPS',
'VERB_LOG',
'VERB_LOOKASIDE',
+ 'VERB_LOOKASIDE_ACTIVITY',
'VERB_LSM',
'VERB_LSM_MANAGER',
'VERB_METADATA',
@@ -87,7 +89,6 @@ flags = {
'VERB_SALVAGE',
'VERB_SHARED_CACHE',
'VERB_SPLIT',
- 'VERB_TEMPORARY',
'VERB_THREAD_GROUP',
'VERB_TIMESTAMP',
'VERB_TRANSACTION',
@@ -124,6 +125,7 @@ flags = {
'session' : [
'SESSION_CAN_WAIT',
'SESSION_INTERNAL',
+ 'SESSION_IGNORE_CACHE_SIZE',
'SESSION_LOCKED_CHECKPOINT',
'SESSION_LOCKED_HANDLE_LIST_READ',
'SESSION_LOCKED_HANDLE_LIST_WRITE',
@@ -136,12 +138,12 @@ flags = {
'SESSION_LOCKED_TURTLE',
'SESSION_LOGGING_INMEM',
'SESSION_LOOKASIDE_CURSOR',
- 'SESSION_NO_CACHE',
'SESSION_NO_DATA_HANDLES',
- 'SESSION_NO_EVICTION',
+ 'SESSION_NO_RECONCILE',
'SESSION_NO_LOGGING',
'SESSION_NO_SCHEMA_LOCK',
'SESSION_QUIET_CORRUPT_FILE',
+ 'SESSION_READ_WONT_NEED',
'SESSION_SERVER_ASYNC',
],
'stat' : [
diff --git a/dist/s_define.list b/dist/s_define.list
index dcaf975434f..fb0162079d9 100644
--- a/dist/s_define.list
+++ b/dist/s_define.list
@@ -22,6 +22,8 @@ WT_CACHE_LINE_PAD_END
WT_CONN_CHECK_PANIC
WT_DEADLOCK
WT_DEBUG_BYTE
+WT_SINGLE_THREAD_CHECK_START
+WT_SINGLE_THREAD_CHECK_STOP
WT_ERR_ERROR_OK
WT_EXT_FOREACH_OFF
WT_HANDLE_CLOSED
diff --git a/dist/stat_data.py b/dist/stat_data.py
index 64d3d46818b..44eb743479d 100644
--- a/dist/stat_data.py
+++ b/dist/stat_data.py
@@ -453,6 +453,8 @@ connection_stats = [
TxnStat('txn_pinned_checkpoint_range', 'transaction range of IDs currently pinned by a checkpoint', 'no_clear,no_scale'),
TxnStat('txn_pinned_range', 'transaction range of IDs currently pinned', 'no_clear,no_scale'),
TxnStat('txn_pinned_snapshot_range', 'transaction range of IDs currently pinned by named snapshots', 'no_clear,no_scale'),
+ TxnStat('txn_pinned_timestamp', 'transaction range of timestamps currently pinned', 'no_clear,no_scale'),
+ TxnStat('txn_pinned_timestamp_oldest', 'transaction range of timestamps pinned by the oldest timestamp', 'no_clear,no_scale'),
TxnStat('txn_read_queue_head', 'transactions read timestamp queue inserts to head'),
TxnStat('txn_read_queue_inserts', 'transactions read timestamp queue inserts total'),
TxnStat('txn_read_queue_len', 'transactions read timestamp queue length'),
diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c
index fe6be6517a2..fc4afc7f9b1 100644
--- a/src/btree/bt_read.c
+++ b/src/btree/bt_read.c
@@ -116,7 +116,7 @@ __las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id)
cursor, btree_id, ref->page_las->las_pageid);
for (; ret == 0; ret = cursor->next(cursor)) {
WT_ERR(cursor->get_key(cursor,
- &las_id, &las_pageid, &las_counter, &las_key));
+ &las_pageid, &las_id, &las_counter, &las_key));
/*
* Confirm the search using the unique prefix; if not a match,
@@ -314,6 +314,11 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
/*
* Attempt to set the state to WT_REF_READING for normal reads, or
* WT_REF_LOCKED, for deleted pages or pages with lookaside entries.
+ * The difference is that checkpoints can skip over clean pages that
+ * are being read into cache, but need to wait for deletes or lookaside
+ * updates to be resolved (in order for checkpoint to write the correct
+ * version of the page).
+ *
* If successful, we've won the race, read the page.
*/
switch (previous_state = ref->state) {
@@ -368,8 +373,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
*/
page_flags =
WT_DATA_IN_ITEM(&tmp) ? WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED;
- if (LF_ISSET(WT_READ_NO_EVICT) ||
- F_ISSET(session, WT_SESSION_NO_EVICTION))
+ if (LF_ISSET(WT_READ_IGNORE_CACHE_SIZE))
FLD_SET(page_flags, WT_PAGE_READ_NO_EVICT);
WT_ERR(__wt_page_inmem(session, ref, tmp.data, page_flags, &page));
tmp.mem = NULL;
@@ -518,6 +522,9 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
btree = S2BT(session);
+ if (F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE))
+ LF_SET(WT_READ_IGNORE_CACHE_SIZE);
+
/*
* Ignore reads of pages already known to be in cache, otherwise the
* eviction server can dominate these statistics.
@@ -554,7 +561,7 @@ read: /*
* allowed to do eviction work, check for space in the
* cache.
*/
- if (!LF_ISSET(WT_READ_NO_EVICT))
+ if (!LF_ISSET(WT_READ_IGNORE_CACHE_SIZE))
WT_RET(__wt_cache_eviction_check(
session, 1, NULL));
WT_RET(__page_read(session, ref, flags));
@@ -574,7 +581,7 @@ read: /*
* we "acquire" it.
*/
wont_need = LF_ISSET(WT_READ_WONT_NEED) ||
- F_ISSET(session, WT_SESSION_NO_CACHE);
+ F_ISSET(session, WT_SESSION_READ_WONT_NEED);
continue;
case WT_REF_READING:
if (LF_ISSET(WT_READ_CACHE))
@@ -623,17 +630,22 @@ read: /*
}
/*
- * If eviction is configured for this file, check to see
- * if the page qualifies for forced eviction and update
- * the page's generation number. If eviction isn't being
- * done on this file, we're done.
+ * Check if the page requires forced eviction.
*/
- if (did_read || LF_ISSET(WT_READ_NO_EVICT) ||
- F_ISSET(session, WT_SESSION_NO_EVICTION) ||
+ if (did_read || LF_ISSET(WT_READ_NO_SPLIT) ||
btree->evict_disabled > 0 || btree->lsm_primary)
goto skip_evict;
/*
+ * If reconciliation is disabled (e.g., when inserting
+ * into the lookaside table), skip forced eviction if
+ * the page can't split.
+ */
+ if (F_ISSET(session, WT_SESSION_NO_RECONCILE) &&
+ !__wt_leaf_page_can_split(session, ref->page))
+ goto skip_evict;
+
+ /*
* Forcibly evict pages that are too big.
*/
if (force_attempts < 10 &&
@@ -684,9 +696,19 @@ skip_evict: /*
* Check if we need an autocommit transaction.
* Starting a transaction can trigger eviction, so skip
* it if eviction isn't permitted.
+ *
+ * The logic here is a little weird: some code paths do
+ * a blanket ban on checking the cache size in
+ * sessions, but still require a transaction (e.g.,
+ * when updating metadata or lookaside). If
+ * WT_READ_IGNORE_CACHE_SIZE was passed in explicitly,
+ * we're done. If we set WT_READ_IGNORE_CACHE_SIZE
+ * because it was set in the session then make sure we
+ * start a transaction.
*/
- return (LF_ISSET(WT_READ_NO_EVICT) ? 0 :
- __wt_txn_autocommit_check(session));
+ return (LF_ISSET(WT_READ_IGNORE_CACHE_SIZE) &&
+ !F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE) ?
+ 0 : __wt_txn_autocommit_check(session));
WT_ILLEGAL_VALUE(session);
}
@@ -707,7 +729,7 @@ skip_evict: /*
* check if the cache needs help. If we do work for the cache,
* substitute that for a sleep.
*/
- if (!LF_ISSET(WT_READ_NO_EVICT)) {
+ if (!LF_ISSET(WT_READ_IGNORE_CACHE_SIZE)) {
WT_RET(
__wt_cache_eviction_check(session, 1, &cache_work));
if (cache_work)
@@ -728,30 +750,33 @@ __btree_verbose_lookaside_read(
WT_SESSION_IMPL *session, uint32_t las_id, uint64_t las_pageid)
{
#ifdef HAVE_VERBOSE
- WT_CONNECTION_IMPL *conn;
+ WT_CACHE *cache;
uint64_t ckpt_gen_current, ckpt_gen_last;
- if (!WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE))
+ if (!WT_VERBOSE_ISSET(session,
+ WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY))
return;
- conn = S2C(session);
+ cache = S2C(session)->cache;
ckpt_gen_current = __wt_gen(session, WT_GEN_CHECKPOINT);
- ckpt_gen_last = conn->las_verb_gen_read;
+ ckpt_gen_last = cache->las_verb_gen_read;
/*
* This message is throttled to one per checkpoint. To do this we
* track the generation of the last checkpoint for which the message
* was printed and check against the current checkpoint generation.
*/
- if (ckpt_gen_current > ckpt_gen_last) {
+ if (WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE) ||
+ ckpt_gen_current > ckpt_gen_last) {
/*
* Attempt to atomically replace the last checkpoint generation
* for which this message was printed. If the atomic swap fails
* we have raced and the winning thread will print the message.
*/
- if (__wt_atomic_casv64(&conn->las_verb_gen_read,
+ if (__wt_atomic_casv64(&cache->las_verb_gen_read,
ckpt_gen_last, ckpt_gen_current)) {
- __wt_verbose(session, WT_VERB_LOOKASIDE,
+ __wt_verbose(session,
+ WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY,
"Read from lookaside file triggered for "
"file ID %" PRIu32 ", page ID %" PRIu64,
las_id, las_pageid);
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index dc699a6b23b..021788919d0 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -141,6 +141,9 @@ __split_verify_root(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_DECL_RET;
WT_REF *ref;
+ uint32_t read_flags;
+
+ read_flags = WT_READ_CACHE | WT_READ_NO_EVICT;
/* The split is complete and live, verify all of the pages involved. */
__split_verify_intl_key_order(session, page);
@@ -156,14 +159,14 @@ __split_verify_root(WT_SESSION_IMPL *session, WT_PAGE *page)
* Ignore pages not in-memory (deleted, on-disk, being read),
* there's no in-memory structure to check.
*/
- if ((ret = __wt_page_in(session,
- ref, WT_READ_CACHE | WT_READ_NO_EVICT)) == WT_NOTFOUND)
+ if ((ret =
+ __wt_page_in(session, ref, read_flags)) == WT_NOTFOUND)
continue;
WT_ERR(ret);
__split_verify_intl_key_order(session, ref->page);
- WT_ERR(__wt_page_release(session, ref, WT_READ_NO_EVICT));
+ WT_ERR(__wt_page_release(session, ref, read_flags));
} WT_INTL_FOREACH_END;
return (0);
@@ -345,6 +348,9 @@ __split_ref_prepare(
* ascend into the created children, but eventually fail as that parent
* page won't yet know about the created children pages. That's OK, we
* spin there until the parent's page index is updated.
+ *
+ * Lock the newly created page to ensure it doesn't split until all
+ * child pages have been updated.
*/
for (i = skip_first ? 1 : 0; i < pindex->entries; ++i) {
ref = pindex->index[i];
@@ -352,10 +358,12 @@ __split_ref_prepare(
/* Switch the WT_REF's to their new page. */
j = 0;
+ WT_PAGE_LOCK(session, child);
WT_INTL_FOREACH_BEGIN(session, child, child_ref) {
child_ref->home = child;
child_ref->pindex_hint = j++;
} WT_INTL_FOREACH_END;
+ WT_PAGE_UNLOCK(session, child);
#ifdef HAVE_DIAGNOSTIC
WT_WITH_PAGE_INDEX(session,
@@ -1643,6 +1651,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
WT_RET(__wt_calloc_one(session, &ref->page_las));
*ref->page_las = multi->page_las;
+ WT_ASSERT(session, ref->page_las->las_max_txn != WT_TXN_NONE);
ref->state = WT_REF_LOOKASIDE;
}
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index d15852af935..2338d5be8ed 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -58,6 +58,7 @@ __sync_checkpoint_can_skip(WT_SESSION_IMPL *session, WT_PAGE *page)
i = 0; i < mod->mod_multi_entries; ++multi, ++i)
if (multi->addr.addr == NULL)
return (false);
+
return (true);
}
diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c
index e2ebd38e82f..deed37517bb 100644
--- a/src/cache/cache_las.c
+++ b/src/cache/cache_las.c
@@ -9,18 +9,44 @@
#include "wt_internal.h"
/*
+ * When an operation is accessing the lookaside table, it should ignore the
+ * cache size (since the cache is already full), any pages it reads should be
+ * evicted before application data, and the operation can't reenter
+ * reconciliation.
+ */
+#define WT_LAS_SESSION_FLAGS \
+ (WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_READ_WONT_NEED | \
+ WT_SESSION_NO_RECONCILE)
+
+/*
+ * __wt_las_nonempty --
+ * Return when there are entries in the lookaside table.
+ */
+bool
+__wt_las_nonempty(WT_SESSION_IMPL *session)
+{
+ WT_CACHE *cache;
+
+ cache = S2C(session)->cache;
+
+ return (cache->las_entry_count > 0);
+}
+
+/*
* __wt_las_stats_update --
* Update the lookaside table statistics for return to the application.
*/
void
__wt_las_stats_update(WT_SESSION_IMPL *session)
{
+ WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_CONNECTION_STATS **cstats;
WT_DSRC_STATS **dstats;
int64_t v;
conn = S2C(session);
+ cache = conn->cache;
/*
* Lookaside table statistics are copied from the underlying lookaside
@@ -36,7 +62,7 @@ __wt_las_stats_update(WT_SESSION_IMPL *session)
*/
cstats = conn->stats;
dstats = ((WT_CURSOR_BTREE *)
- conn->las_session->las_cursor)->btree->dhandle->stats;
+ cache->las_session[0]->las_cursor)->btree->dhandle->stats;
v = WT_STAT_READ(dstats, cursor_insert);
WT_STAT_SET(session, cstats, cache_lookaside_insert, v);
@@ -62,13 +88,15 @@ __wt_las_stats_update(WT_SESSION_IMPL *session)
int
__wt_las_create(WT_SESSION_IMPL *session)
{
+ WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
- uint32_t session_flags;
+ int i;
const char *drop_cfg[] = {
WT_CONFIG_BASE(session, WT_SESSION_drop), "force=true", NULL };
conn = S2C(session);
+ cache = conn->cache;
/* Read-only and in-memory configurations don't need the LAS table. */
if (F_ISSET(conn, WT_CONN_IN_MEMORY | WT_CONN_READONLY))
@@ -86,16 +114,17 @@ __wt_las_create(WT_SESSION_IMPL *session)
WT_RET(ret);
/* Re-create the table. */
- WT_RET(__wt_session_create(session, WT_LAS_URI, WT_LAS_FORMAT));
+ WT_RET(__wt_session_create(session, WT_LAS_URI, WT_LAS_CONFIG));
/*
* Open a shared internal session and cursor used for the lookaside
- * table. This session should never be tapped for eviction.
+ * table. This session should never perform reconciliation.
*/
- session_flags = WT_SESSION_NO_EVICTION;
- WT_RET(__wt_open_internal_session(
- conn, "lookaside table", true, session_flags, &conn->las_session));
- WT_RET(__wt_las_cursor_open(conn->las_session));
+ for (i = 0; i < WT_LAS_NUM_SESSIONS; i++) {
+ WT_RET(__wt_open_internal_session(conn, "lookaside table",
+ true, WT_LAS_SESSION_FLAGS, &cache->las_session[i]));
+ WT_RET(__wt_las_cursor_open(cache->las_session[i]));
+ }
/* The statistics server is already running, make sure we don't race. */
WT_WRITE_BARRIER();
@@ -111,20 +140,31 @@ __wt_las_create(WT_SESSION_IMPL *session)
int
__wt_las_destroy(WT_SESSION_IMPL *session)
{
+ WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_SESSION *wt_session;
+ int i;
conn = S2C(session);
+ cache = conn->cache;
F_CLR(conn, WT_CONN_LOOKASIDE_OPEN);
- if (conn->las_session == NULL)
+ if (cache == NULL)
return (0);
- wt_session = &conn->las_session->iface;
- ret = wt_session->close(wt_session, NULL);
+ for (i = 0; i < WT_LAS_NUM_SESSIONS; i++) {
+ if (cache->las_session[i] == NULL)
+ continue;
+
+ wt_session = &cache->las_session[i]->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ cache->las_session[i] = NULL;
+ }
- conn->las_session = NULL;
+ __wt_buf_free(session, &cache->las_sweep_key);
+ __wt_free(session, cache->las_dropped);
+ __wt_free(session, cache->las_sweep_dropmap);
return (ret);
}
@@ -154,8 +194,8 @@ __wt_las_cursor_open(WT_SESSION_IMPL *session)
btree = ((WT_CURSOR_BTREE *)cursor)->btree;
/* Track the lookaside file ID. */
- if (S2C(session)->las_fileid == 0)
- S2C(session)->las_fileid = btree->id;
+ if (S2C(session)->cache->las_fileid == 0)
+ S2C(session)->cache->las_fileid = btree->id;
/*
* Set special flags for the lookaside table: the lookaside flag (used,
@@ -187,7 +227,8 @@ void
__wt_las_cursor(
WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags)
{
- WT_CONNECTION_IMPL *conn;
+ WT_CACHE *cache;
+ int i;
*cursorp = NULL;
@@ -200,10 +241,9 @@ __wt_las_cursor(
* problems and there's no reason to believe lookaside pages will be
* useful more than once.
*/
- *session_flags =
- F_MASK(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION);
+ *session_flags = F_MASK(session, WT_LAS_SESSION_FLAGS);
- conn = S2C(session);
+ cache = S2C(session)->cache;
/*
* Some threads have their own lookaside table cursors, else lock the
@@ -212,12 +252,30 @@ __wt_las_cursor(
if (F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR))
*cursorp = session->las_cursor;
else {
- __wt_spin_lock(session, &conn->las_lock);
- *cursorp = conn->las_session->las_cursor;
+ for (;;) {
+ __wt_spin_lock(session, &cache->las_lock);
+ for (i = 0; i < WT_LAS_NUM_SESSIONS; i++) {
+ if (!cache->las_session_inuse[i]) {
+ *cursorp =
+ cache->las_session[i]->las_cursor;
+ cache->las_session_inuse[i] = true;
+ break;
+ }
+ }
+ __wt_spin_unlock(session, &cache->las_lock);
+ if (*cursorp != NULL)
+ break;
+ /*
+ * If all the lookaside sessions are busy, stall.
+ *
+ * XXX better as a condition variable.
+ */
+ __wt_sleep(0, 1000);
+ }
}
- /* Turn caching and eviction off. */
- F_SET(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION);
+ /* Configure session to access the lookaside table. */
+ F_SET(session, WT_LAS_SESSION_FLAGS);
}
/*
@@ -226,13 +284,14 @@ __wt_las_cursor(
*/
int
__wt_las_cursor_close(
- WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags)
+ WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags)
{
- WT_CONNECTION_IMPL *conn;
+ WT_CACHE *cache;
WT_CURSOR *cursor;
WT_DECL_RET;
+ int i;
- conn = S2C(session);
+ cache = S2C(session)->cache;
if ((cursor = *cursorp) == NULL)
return (0);
@@ -245,15 +304,23 @@ __wt_las_cursor_close(
* We turned off caching and eviction while the lookaside cursor was in
* use, restore the session's flags.
*/
- F_CLR(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION);
+ F_CLR(session, WT_LAS_SESSION_FLAGS);
F_SET(session, session_flags);
/*
* Some threads have their own lookaside table cursors, else unlock the
* shared lookaside cursor.
*/
- if (!F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR))
- __wt_spin_unlock(session, &conn->las_lock);
+ if (!F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR)) {
+ __wt_spin_lock(session, &cache->las_lock);
+ for (i = 0; i < WT_LAS_NUM_SESSIONS; i++)
+ if (cursor->session == &cache->las_session[i]->iface) {
+ cache->las_session_inuse[i] = false;
+ break;
+ }
+ __wt_spin_unlock(session, &cache->las_lock);
+ WT_ASSERT(session, i != WT_LAS_NUM_SESSIONS);
+ }
return (ret);
}
@@ -263,54 +330,74 @@ __wt_las_cursor_close(
* Display a verbose message once per checkpoint with details about the
* cache state when performing a lookaside table write.
*/
-static void
-__las_insert_block_verbose(
- WT_SESSION_IMPL *session, uint32_t btree_id, uint64_t las_pageid)
+static int
+__las_insert_block_verbose(WT_SESSION_IMPL *session, WT_MULTI *multi)
{
#ifdef HAVE_VERBOSE
+ WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
+#ifdef HAVE_TIMESTAMPS
+ char hex_timestamp[2 * WT_TIMESTAMP_SIZE + 1];
+#else
+ char hex_timestamp[9]; /* Enough for disabled string */
+#endif
uint64_t ckpt_gen_current, ckpt_gen_last;
- uint32_t pct_dirty, pct_full;
+ uint32_t btree_id, pct_dirty, pct_full;
- if (!WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE))
- return;
+ btree_id = S2BT(session)->id;
+
+ if (!WT_VERBOSE_ISSET(session,
+ WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY))
+ return (0);
conn = S2C(session);
+ cache = conn->cache;
ckpt_gen_current = __wt_gen(session, WT_GEN_CHECKPOINT);
- ckpt_gen_last = conn->las_verb_gen_write;
+ ckpt_gen_last = cache->las_verb_gen_write;
/*
- * This message is throttled to one per checkpoint. To do this we
- * track the generation of the last checkpoint for which the message
- * was printed and check against the current checkpoint generation.
+ * Print a message if verbose lookaside, or once per checkpoint if
+ * only reporting activity. Avoid an expensive atomic operation as
+ * often as possible when the message rate is limited.
*/
- if (ckpt_gen_current > ckpt_gen_last) {
- /*
- * Attempt to atomically replace the last checkpoint generation
- * for which this message was printed. If the atomic swap fails
- * we have raced and the winning thread will print the message.
- */
- if (__wt_atomic_casv64(&conn->las_verb_gen_write,
- ckpt_gen_last, ckpt_gen_current)) {
- (void)__wt_eviction_clean_needed(session, &pct_full);
- (void)__wt_eviction_dirty_needed(session, &pct_dirty);
-
- __wt_verbose(session, WT_VERB_LOOKASIDE,
- "Page reconciliation triggered lookaside write"
- "file ID %" PRIu32 ", page ID %" PRIu64 ". "
- "Entries now in lookaside file: %" PRId64 ", "
- "cache dirty: %" PRIu32 "%% , "
- "cache use: %" PRIu32 "%%",
- btree_id, las_pageid,
- WT_STAT_READ(conn->stats, cache_lookaside_entries),
- pct_dirty, pct_full);
- }
+ if (WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE) ||
+ (ckpt_gen_current > ckpt_gen_last &&
+ __wt_atomic_casv64(&cache->las_verb_gen_write,
+ ckpt_gen_last, ckpt_gen_current))) {
+ (void)__wt_eviction_clean_needed(session, &pct_full);
+ (void)__wt_eviction_dirty_needed(session, &pct_dirty);
+
+#ifdef HAVE_TIMESTAMPS
+ WT_RET(__wt_timestamp_to_hex_string(
+ session, hex_timestamp, &multi->page_las.min_timestamp));
+#else
+ WT_RET(__wt_snprintf(
+ hex_timestamp, sizeof(hex_timestamp), "disabled"));
+#endif
+ __wt_verbose(session,
+ WT_VERB_LOOKASIDE | WT_VERB_LOOKASIDE_ACTIVITY,
+ "Page reconciliation triggered lookaside write "
+ "file ID %" PRIu32 ", page ID %" PRIu64 ". "
+ "Max txn ID %" PRIu64 ", min timestamp %s, skewed %s. "
+ "Entries now in lookaside file: %" PRId64 ", "
+ "cache dirty: %" PRIu32 "%% , "
+ "cache use: %" PRIu32 "%%",
+ btree_id, multi->page_las.las_pageid,
+ multi->page_las.las_max_txn,
+ hex_timestamp,
+ multi->page_las.las_skew_oldest? "oldest" : "youngest",
+ WT_STAT_READ(conn->stats, cache_lookaside_entries),
+ pct_dirty, pct_full);
}
+
+ /* Never skip updating the tracked generation */
+ if (WT_VERBOSE_ISSET(session, WT_VERB_LOOKASIDE))
+ cache->las_verb_gen_write = ckpt_gen_current;
#else
WT_UNUSED(session);
- WT_UNUSED(btree_id);
- WT_UNUSED(las_pageid);
+ WT_UNUSED(multi);
#endif
+ return (0);
}
/*
@@ -318,11 +405,14 @@ __las_insert_block_verbose(
* Copy one set of saved updates into the database's lookaside buffer.
*/
int
-__wt_las_insert_block(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_CURSOR *cursor, WT_MULTI *multi, WT_ITEM *key)
+__wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
+ WT_PAGE *page, WT_MULTI *multi, WT_ITEM *key)
{
+ WT_BTREE *btree;
+ WT_DECL_RET;
WT_ITEM las_timestamp, las_value;
WT_SAVE_UPD *list;
+ WT_SESSION_IMPL *las_session;
WT_UPDATE *upd;
uint64_t insert_cnt, las_counter, las_pageid;
uint32_t btree_id, i, slot;
@@ -332,15 +422,23 @@ __wt_las_insert_block(WT_SESSION_IMPL *session,
WT_CLEAR(las_value);
insert_cnt = 0;
- btree_id = S2BT(session)->id;
+ btree = S2BT(session);
+ btree_id = btree->id;
las_pageid = multi->page_las.las_pageid =
- __wt_atomic_add64(&S2BT(session)->las_pageid, 1);
+ __wt_atomic_add64(&S2C(session)->cache->las_pageid, 1);
+
+ if (!btree->lookaside_entries)
+ btree->lookaside_entries = true;
+
+ /* Wrap all the updates in a transaction. */
+ las_session = (WT_SESSION_IMPL *)cursor->session;
+ WT_RET(__wt_txn_begin(las_session, NULL));
/*
* Make sure there are no leftover entries (e.g., from a handle
* reopen).
*/
- WT_RET(__wt_las_remove_block(session, cursor, btree_id, las_pageid));
+ WT_ERR(__wt_las_remove_block(session, cursor, btree_id, las_pageid));
/* Enter each update in the boundary's list into the lookaside store. */
for (las_counter = 0, i = 0,
@@ -350,20 +448,20 @@ __wt_las_insert_block(WT_SESSION_IMPL *session,
case WT_PAGE_COL_FIX:
case WT_PAGE_COL_VAR:
p = key->mem;
- WT_RET(
+ WT_ERR(
__wt_vpack_uint(&p, 0, WT_INSERT_RECNO(list->ins)));
key->size = WT_PTRDIFF(p, key->data);
break;
case WT_PAGE_ROW_LEAF:
if (list->ins == NULL)
- WT_RET(__wt_row_leaf_key(
+ WT_ERR(__wt_row_leaf_key(
session, page, list->ripcip, key, false));
else {
key->data = WT_INSERT_KEY(list->ins);
key->size = WT_INSERT_KEY_SIZE(list->ins);
}
break;
- WT_ILLEGAL_VALUE(session);
+ WT_ILLEGAL_VALUE_ERR(session);
}
/*
@@ -411,7 +509,7 @@ __wt_las_insert_block(WT_SESSION_IMPL *session,
}
cursor->set_key(cursor,
- btree_id, las_pageid, ++las_counter, key);
+ las_pageid, btree_id, ++las_counter, key);
#ifdef HAVE_TIMESTAMPS
las_timestamp.data = &upd->timestamp;
@@ -420,20 +518,27 @@ __wt_las_insert_block(WT_SESSION_IMPL *session,
cursor->set_value(cursor,
upd->txnid, &las_timestamp, upd->type, &las_value);
- WT_RET(cursor->insert(cursor));
+ WT_ERR(cursor->insert(cursor));
++insert_cnt;
} while ((upd = upd->next) != NULL);
}
- __wt_free(session, multi->supd);
- multi->supd_entries = 0;
-
if (insert_cnt > 0) {
WT_STAT_CONN_INCRV(
session, cache_lookaside_entries, insert_cnt);
- __las_insert_block_verbose(session, btree_id, las_pageid);
+ __wt_atomic_add64(
+ &S2C(session)->cache->las_entry_count, insert_cnt);
+ WT_ERR(__las_insert_block_verbose(session, multi));
}
- return (0);
+
+err: /* Resolve the transaction. */
+ if (ret == 0)
+ ret = __wt_txn_commit(las_session, NULL);
+ else
+ WT_TRET(__wt_txn_rollback(las_session, NULL));
+ __wt_free(session, multi->supd);
+ multi->supd_entries = 0;
+ return (ret);
}
/*
@@ -452,6 +557,15 @@ __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid)
int exact;
/*
+ * When scanning for all pages, start at the beginning of the lookaside
+ * table.
+ */
+ if (pageid == 0) {
+ WT_RET(cursor->reset(cursor));
+ return (cursor->next(cursor));
+ }
+
+ /*
* Because of the special visibility rules for lookaside, a new block
* can appear in between our search and the block of interest. Keep
* trying until we find it.
@@ -459,7 +573,7 @@ __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid)
for (;;) {
WT_CLEAR(las_key);
cursor->set_key(cursor,
- btree_id, pageid, (uint64_t)0, &las_key);
+ pageid, btree_id, (uint64_t)0, &las_key);
WT_RET(cursor->search_near(cursor, &exact));
if (exact < 0) {
WT_RET(cursor->next(cursor));
@@ -475,9 +589,9 @@ __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid)
* WT_CONNECTION::rollback_to_stable.
*/
WT_RET(cursor->get_key(cursor,
- &las_id, &las_pageid, &las_counter, &las_key));
- if (las_id < btree_id || (las_id == btree_id &&
- pageid != 0 && las_pageid < pageid))
+ &las_pageid, &las_id, &las_counter, &las_key));
+ if (las_pageid < pageid || (las_pageid == pageid &&
+ las_id < btree_id))
continue;
}
@@ -489,7 +603,7 @@ __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid)
/*
* __wt_las_remove_block --
- * Remove all records matching a key prefix from the lookaside store.
+ * Remove all records for a given page from the lookaside store.
*/
int
__wt_las_remove_block(WT_SESSION_IMPL *session,
@@ -497,18 +611,29 @@ __wt_las_remove_block(WT_SESSION_IMPL *session,
{
WT_DECL_RET;
WT_ITEM las_key;
+ WT_SESSION_IMPL *las_session;
uint64_t las_counter, las_pageid, remove_cnt;
uint32_t las_id, session_flags;
- bool local_cursor;
+ bool local_cursor, local_txn;
remove_cnt = 0;
session_flags = 0; /* [-Wconditional-uninitialized] */
- local_cursor = false;
+ local_cursor = local_txn = false;
if (cursor == NULL) {
__wt_las_cursor(session, &cursor, &session_flags);
local_cursor = true;
}
+ las_session = (WT_SESSION_IMPL *)cursor->session;
+
+ /*
+ * Wrap all of the removes in a transaction, unless this remove is part
+ * of a larger operation.
+ */
+ if (local_cursor) {
+ WT_ERR(__wt_txn_begin(las_session, NULL));
+ local_txn = true;
+ }
/*
* Search for the block's unique prefix and step through all matching
@@ -517,16 +642,13 @@ __wt_las_remove_block(WT_SESSION_IMPL *session,
ret = __wt_las_cursor_position(cursor, btree_id, pageid);
for (; ret == 0; ret = cursor->next(cursor)) {
WT_ERR(cursor->get_key(cursor,
- &las_id, &las_pageid, &las_counter, &las_key));
+ &las_pageid, &las_id, &las_counter, &las_key));
/*
* Confirm the search using the unique prefix; if not a match,
- * we're done searching for records for this page. Note that
- * page ID zero is special: it is a wild card indicating that
- * all pages in the tree should be removed.
+ * we're done searching for records for this page.
*/
- if (las_id != btree_id ||
- (pageid != 0 && las_pageid != pageid))
+ if (las_pageid != pageid || las_id != btree_id)
break;
WT_ERR(cursor->remove(cursor));
@@ -534,9 +656,218 @@ __wt_las_remove_block(WT_SESSION_IMPL *session,
}
WT_ERR_NOTFOUND_OK(ret);
-err: if (local_cursor)
- WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
+err: if (local_txn) {
+ if (ret == 0)
+ ret = __wt_txn_commit(las_session, NULL);
+ else
+ WT_TRET(__wt_txn_rollback(las_session, NULL));
+ }
+ if (local_cursor)
+ WT_TRET(__wt_las_cursor_close(
+ session, &cursor, session_flags));
WT_STAT_CONN_DECRV(session, cache_lookaside_entries, remove_cnt);
+ __wt_cache_decr_check_uint64(session,
+ &S2C(session)->cache->las_entry_count, remove_cnt,
+ "lookaside entry count");
+ return (ret);
+}
+
+/*
+ * __wt_las_save_dropped --
+ * Save a dropped btree ID to be swept from the lookaside table.
+ */
+int
+__wt_las_save_dropped(WT_SESSION_IMPL *session)
+{
+ WT_BTREE *btree;
+ WT_CACHE *cache;
+ WT_DECL_RET;
+
+ btree = S2BT(session);
+ cache = S2C(session)->cache;
+
+ __wt_spin_lock(session, &cache->las_sweep_lock);
+ WT_ERR(__wt_realloc_def(session, &cache->las_dropped_alloc,
+ cache->las_dropped_next + 1, &cache->las_dropped));
+ cache->las_dropped[cache->las_dropped_next++] = btree->id;
+err: __wt_spin_unlock(session, &cache->las_sweep_lock);
+ return (ret);
+}
+
+/*
+ * __las_sweep_init --
+ * Prepare to start a lookaside sweep.
+ */
+static int
+__las_sweep_init(WT_SESSION_IMPL *session)
+{
+ WT_CACHE *cache;
+ WT_DECL_RET;
+ u_int i;
+
+ cache = S2C(session)->cache;
+
+ __wt_spin_lock(session, &cache->las_sweep_lock);
+ /* If no files have been dropped, there's nothing to do. */
+ if (cache->las_dropped_next == 0)
+ WT_ERR(WT_NOTFOUND);
+
+ /* Scan the btree IDs to find min/max. */
+ cache->las_sweep_dropmin = UINT32_MAX;
+ cache->las_sweep_dropmax = 0;
+ for (i = 0; i < cache->las_dropped_next; i++) {
+ cache->las_sweep_dropmin = WT_MIN(
+ cache->las_sweep_dropmin,
+ cache->las_dropped[i]);
+ cache->las_sweep_dropmax = WT_MAX(
+ cache->las_sweep_dropmax,
+ cache->las_dropped[i]);
+ }
+
+ /* Initialize the bitmap. */
+ __wt_free(session, cache->las_sweep_dropmap);
+ WT_ERR(__bit_alloc(session,
+ 1 + cache->las_sweep_dropmax - cache->las_sweep_dropmin,
+ &cache->las_sweep_dropmap));
+ for (i = 0; i < cache->las_dropped_next; i++)
+ __bit_set(cache->las_sweep_dropmap,
+ cache->las_dropped[i] - cache->las_sweep_dropmin);
+
+ /* Clear the list of btree IDs. */
+ cache->las_dropped_next = 0;
+
+err: __wt_spin_unlock(session, &cache->las_sweep_lock);
+ return (ret);
+}
+
+/*
+ * __wt_las_sweep --
+ * Sweep the lookaside table.
+ */
+int
+__wt_las_sweep(WT_SESSION_IMPL *session)
+{
+ WT_CACHE *cache;
+ WT_CURSOR *cursor;
+ WT_DECL_RET;
+ WT_ITEM *key, las_key;
+ uint64_t cnt, las_counter, las_pageid, remove_cnt;
+ uint32_t las_id, session_flags;
+ int notused;
+
+ cache = S2C(session)->cache;
+ cursor = NULL;
+ key = &cache->las_sweep_key;
+ remove_cnt = 0;
+ session_flags = 0; /* [-Werror=maybe-uninitialized] */
+
+ __wt_las_cursor(session, &cursor, &session_flags);
+
+ /* We should have our own session. */
+ WT_ASSERT(session, cursor->session == &session->iface);
+
+ /*
+ * When continuing a sweep, position the cursor using the key from the
+ * last call (we don't care if we're before or after the key, either
+ * side is fine).
+ *
+ * Otherwise, we're starting a new sweep, gather the list of trees to
+ * sweep.
+ */
+ if (key->size != 0) {
+ __wt_cursor_set_raw_key(cursor, key);
+ ret = cursor->search_near(cursor, &notused);
+
+ /*
+ * Don't search for the same key twice; if we don't set a new
+ * key below, it's because we've reached the end of the table
+ * and we want the next pass to start at the beginning of the
+ * table. Searching for the same key could leave us stuck at
+ * the end of the table, repeatedly checking the same rows.
+ */
+ key->size = 0;
+ } else
+ ret = __las_sweep_init(session);
+
+ if (ret != 0)
+ goto srch_notfound;
+
+ /*
+ * The sweep server wakes up every 10 seconds (by default), it's a slow
+ * moving thread. Try to review the entire lookaside table once every 5
+ * minutes, or every 30 calls.
+ *
+ * The reason is because the lookaside table exists because we're seeing
+ * cache/eviction pressure (it allows us to trade performance and disk
+ * space for cache space), and it's likely lookaside blocks are being
+ * evicted, and reading them back in doesn't help things. A trickier,
+ * but possibly better, alternative might be to review all lookaside
+ * blocks in the cache in order to get rid of them, and slowly review
+ * lookaside blocks that have already been evicted.
+ */
+ cnt = (uint64_t)WT_MAX(100, cache->las_entry_count / 30);
+
+ /* Walk the file. */
+ for (; cnt > 0 && (ret = cursor->next(cursor)) == 0; --cnt) {
+ /*
+ * Give up if the cache is stuck: we are ignoring the cache
+ * size while scanning the lookaside table, so we're making
+ * things worse.
+ */
+ if (__wt_cache_stuck(session))
+ cnt = 1;
+
+ /*
+ * If the loop terminates after completing a work unit, we will
+ * continue the table sweep next time. Get a local copy of the
+ * sweep key, we're going to reset the cursor; do so before
+ * calling cursor.remove, cursor.remove can discard our hazard
+ * pointer and the page could be evicted from underneath us.
+ */
+ if (cnt == 1) {
+ WT_ERR(__wt_cursor_get_raw_key(cursor, key));
+ if (!WT_DATA_IN_ITEM(key))
+ WT_ERR(__wt_buf_set(
+ session, key, key->data, key->size));
+ }
+
+ WT_ERR(cursor->get_key(cursor,
+ &las_pageid, &las_id, &las_counter, &las_key));
+
+ /*
+ * If the entry belongs to a dropped tree, discard it.
+ *
+ * Cursor opened overwrite=true: won't return WT_NOTFOUND
+ * should another thread remove the record before we do (not
+ * expected for dropped trees), and the cursor remains
+ * positioned in that case.
+ *
+ * TODO it would also be good to remove entries in lookaside
+ * from live files that have aged out. If we track for each
+ * entry whether it was the on-page value chosen by
+ * reconciliation, we can safely remove entries from that point
+ * on (for the given key) that are visible to all readers.
+ */
+ if (__bit_test(cache->las_sweep_dropmap,
+ las_id - cache->las_sweep_dropmin)) {
+ WT_ERR(cursor->remove(cursor));
+ ++remove_cnt;
+ }
+ }
+
+srch_notfound:
+ WT_ERR_NOTFOUND_OK(ret);
+
+ if (0) {
+err: __wt_buf_free(session, key);
+ }
+
+ WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
+
+ __wt_cache_decr_check_uint64(session,
+ &S2C(session)->cache->las_entry_count, remove_cnt,
+ "lookaside entry count");
+
return (ret);
}
diff --git a/src/config/config_def.c b/src/config/config_def.c
index e7ead608672..f0e1dc1f701 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -179,12 +179,12 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
{ "verbose", "list",
NULL, "choices=[\"api\",\"block\",\"checkpoint\","
"\"checkpoint_progress\",\"compact\",\"evict\",\"evict_stuck\","
- "\"evictserver\",\"fileops\",\"handleops\",\"log\","
+ "\"evictserver\",\"fileops\",\"handleops\",\"log\",\"lookaside\","
"\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
"\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
"\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
- "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
- "\"transaction\",\"verify\",\"version\",\"write\"]",
+ "\"split\",\"thread_group\",\"timestamp\",\"transaction\","
+ "\"verify\",\"version\",\"write\"]",
NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
};
@@ -834,12 +834,12 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
{ "verbose", "list",
NULL, "choices=[\"api\",\"block\",\"checkpoint\","
"\"checkpoint_progress\",\"compact\",\"evict\",\"evict_stuck\","
- "\"evictserver\",\"fileops\",\"handleops\",\"log\","
+ "\"evictserver\",\"fileops\",\"handleops\",\"log\",\"lookaside\","
"\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
"\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
"\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
- "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
- "\"transaction\",\"verify\",\"version\",\"write\"]",
+ "\"split\",\"thread_group\",\"timestamp\",\"transaction\","
+ "\"verify\",\"version\",\"write\"]",
NULL, 0 },
{ "write_through", "list",
NULL, "choices=[\"data\",\"log\"]",
@@ -929,12 +929,12 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
{ "verbose", "list",
NULL, "choices=[\"api\",\"block\",\"checkpoint\","
"\"checkpoint_progress\",\"compact\",\"evict\",\"evict_stuck\","
- "\"evictserver\",\"fileops\",\"handleops\",\"log\","
+ "\"evictserver\",\"fileops\",\"handleops\",\"log\",\"lookaside\","
"\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
"\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
"\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
- "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
- "\"transaction\",\"verify\",\"version\",\"write\"]",
+ "\"split\",\"thread_group\",\"timestamp\",\"transaction\","
+ "\"verify\",\"version\",\"write\"]",
NULL, 0 },
{ "version", "string", NULL, NULL, NULL, 0 },
{ "write_through", "list",
@@ -1019,12 +1019,12 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
{ "verbose", "list",
NULL, "choices=[\"api\",\"block\",\"checkpoint\","
"\"checkpoint_progress\",\"compact\",\"evict\",\"evict_stuck\","
- "\"evictserver\",\"fileops\",\"handleops\",\"log\","
+ "\"evictserver\",\"fileops\",\"handleops\",\"log\",\"lookaside\","
"\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
"\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
"\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
- "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
- "\"transaction\",\"verify\",\"version\",\"write\"]",
+ "\"split\",\"thread_group\",\"timestamp\",\"transaction\","
+ "\"verify\",\"version\",\"write\"]",
NULL, 0 },
{ "version", "string", NULL, NULL, NULL, 0 },
{ "write_through", "list",
@@ -1109,12 +1109,12 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
{ "verbose", "list",
NULL, "choices=[\"api\",\"block\",\"checkpoint\","
"\"checkpoint_progress\",\"compact\",\"evict\",\"evict_stuck\","
- "\"evictserver\",\"fileops\",\"handleops\",\"log\","
+ "\"evictserver\",\"fileops\",\"handleops\",\"log\",\"lookaside\","
"\"lookaside_activity\",\"lsm\",\"lsm_manager\",\"metadata\","
"\"mutex\",\"overflow\",\"read\",\"rebalance\",\"reconcile\","
"\"recovery\",\"recovery_progress\",\"salvage\",\"shared_cache\","
- "\"split\",\"temporary\",\"thread_group\",\"timestamp\","
- "\"transaction\",\"verify\",\"version\",\"write\"]",
+ "\"split\",\"thread_group\",\"timestamp\",\"transaction\","
+ "\"verify\",\"version\",\"write\"]",
NULL, 0 },
{ "write_through", "list",
NULL, "choices=[\"data\",\"log\"]",
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index 5f77f27ee3f..fd8fd6763db 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -1816,7 +1816,8 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
{ "fileops", WT_VERB_FILEOPS },
{ "handleops", WT_VERB_HANDLEOPS },
{ "log", WT_VERB_LOG },
- { "lookaside_activity", WT_VERB_LOOKASIDE },
+ { "lookaside", WT_VERB_LOOKASIDE },
+ { "lookaside_activity", WT_VERB_LOOKASIDE_ACTIVITY },
{ "lsm", WT_VERB_LSM },
{ "lsm_manager", WT_VERB_LSM_MANAGER },
{ "metadata", WT_VERB_METADATA },
@@ -1830,7 +1831,6 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
{ "salvage", WT_VERB_SALVAGE },
{ "shared_cache", WT_VERB_SHARED_CACHE },
{ "split", WT_VERB_SPLIT },
- { "temporary", WT_VERB_TEMPORARY },
{ "thread_group", WT_VERB_THREAD_GROUP },
{ "timestamp", WT_VERB_TIMESTAMP },
{ "transaction", WT_VERB_TRANSACTION },
diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c
index 007aa8757da..76106b3592f 100644
--- a/src/conn/conn_cache.c
+++ b/src/conn/conn_cache.c
@@ -198,6 +198,10 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
WT_RET_MSG(NULL, ret,
"Failed to create session for eviction walks");
+ WT_RET(__wt_spin_init(session, &cache->las_lock, "lookaside table"));
+ WT_RET(__wt_spin_init(
+ session, &cache->las_sweep_lock, "lookaside sweep"));
+
/* Allocate the LRU eviction queue. */
cache->evict_slots = WT_EVICT_WALK_BASE + WT_EVICT_WALK_INCR;
for (i = 0; i < WT_EVICT_QUEUE_MAX; ++i) {
@@ -334,6 +338,8 @@ __wt_cache_destroy(WT_SESSION_IMPL *session)
__wt_spin_destroy(session, &cache->evict_pass_lock);
__wt_spin_destroy(session, &cache->evict_queue_lock);
__wt_spin_destroy(session, &cache->evict_walk_lock);
+ __wt_spin_destroy(session, &cache->las_lock);
+ __wt_spin_destroy(session, &cache->las_sweep_lock);
wt_session = &cache->walk_session->iface;
if (wt_session != NULL)
WT_TRET(wt_session->close(wt_session, NULL));
diff --git a/src/conn/conn_handle.c b/src/conn/conn_handle.c
index 2f3f9488b58..42ae866b329 100644
--- a/src/conn/conn_handle.c
+++ b/src/conn/conn_handle.c
@@ -55,7 +55,6 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn)
WT_SPIN_INIT_TRACKED(session, &conn->checkpoint_lock, checkpoint);
WT_RET(__wt_spin_init(session, &conn->encryptor_lock, "encryptor"));
WT_RET(__wt_spin_init(session, &conn->fh_lock, "file list"));
- WT_RET(__wt_spin_init(session, &conn->las_lock, "lookaside table"));
WT_SPIN_INIT_TRACKED(session, &conn->metadata_lock, metadata);
WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure"));
WT_SPIN_INIT_TRACKED(session, &conn->schema_lock, schema);
@@ -125,7 +124,6 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn)
__wt_spin_destroy(session, &conn->encryptor_lock);
__wt_spin_destroy(session, &conn->fh_lock);
__wt_rwlock_destroy(session, &conn->hot_backup_lock);
- __wt_spin_destroy(session, &conn->las_lock);
__wt_spin_destroy(session, &conn->metadata_lock);
__wt_spin_destroy(session, &conn->reconfig_lock);
__wt_spin_destroy(session, &conn->schema_lock);
diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c
index 9b64c7a0f77..06e441a3037 100644
--- a/src/conn/conn_sweep.c
+++ b/src/conn/conn_sweep.c
@@ -278,10 +278,12 @@ __sweep_server(void *arg)
WT_DECL_RET;
WT_SESSION_IMPL *session;
time_t now;
+ uint64_t last_las_sweep_id, oldest_id;
u_int dead_handles;
session = arg;
conn = S2C(session);
+ last_las_sweep_id = WT_TXN_NONE;
/*
* Sweep for dead and excess handles.
@@ -300,6 +302,26 @@ __sweep_server(void *arg)
WT_STAT_CONN_INCR(session, dh_sweeps);
/*
+ * Sweep the lookaside table. If the lookaside table hasn't yet
+ * been written, there's no work to do.
+ *
+ * Don't sweep the lookaside table if the cache is stuck full.
+ * The sweep uses the cache and can exacerbate the problem.
+ * If we try to sweep when the cache is full or we aren't
+ * making progress in eviction, sweeping can wind up constantly
+ * bringing in and evicting pages from the lookaside table,
+ * which will stop the cache from moving into the stuck state.
+ */
+ if (__wt_las_nonempty(session) &&
+ !__wt_cache_stuck(session)) {
+ oldest_id = __wt_txn_oldest_id(session);
+ if (WT_TXNID_LT(last_las_sweep_id, oldest_id)) {
+ WT_ERR(__wt_las_sweep(session));
+ last_las_sweep_id = oldest_id;
+ }
+ }
+
+ /*
* Mark handles with a time of death, and report whether any
* handles are marked dead. If sweep_idle_time is 0, handles
* never become idle.
@@ -379,15 +401,21 @@ __wt_sweep_create(WT_SESSION_IMPL *session)
/*
* Handle sweep does enough I/O it may be called upon to perform slow
- * operations for the block manager.
- *
- * Don't tap the sweep thread for eviction.
+ * operations for the block manager. Sweep should not block due to the
+ * cache being full.
*/
- session_flags = WT_SESSION_CAN_WAIT | WT_SESSION_NO_EVICTION;
+ session_flags = WT_SESSION_CAN_WAIT | WT_SESSION_IGNORE_CACHE_SIZE;
WT_RET(__wt_open_internal_session(
conn, "sweep-server", true, session_flags, &conn->sweep_session));
session = conn->sweep_session;
+ /*
+ * Sweep should have it's own lookaside cursor to avoid blocking reads
+ * and eviction when processing drops.
+ */
+ if (F_ISSET(conn, WT_CONN_LOOKASIDE_OPEN))
+ WT_RET(__wt_las_cursor_open(session));
+
WT_RET(__wt_cond_alloc(
session, "handle sweep server", &conn->sweep_cond));
diff --git a/src/cursor/cur_join.c b/src/cursor/cur_join.c
index e1fbb63178f..bcd3943122d 100644
--- a/src/cursor/cur_join.c
+++ b/src/cursor/cur_join.c
@@ -532,7 +532,8 @@ typedef struct {
* Handle a key produced by a custom extractor.
*/
static int
-__curjoin_extract_insert(WT_CURSOR *cursor) {
+__curjoin_extract_insert(WT_CURSOR *cursor)
+{
WT_CURJOIN_EXTRACTOR *cextract;
WT_DECL_RET;
WT_ITEM ikey;
diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c
index 9296038bd96..9cfa3203aec 100644
--- a/src/cursor/cur_std.c
+++ b/src/cursor/cur_std.c
@@ -570,8 +570,7 @@ __wt_cursor_close(WT_CURSOR *cursor)
__wt_buf_free(session, &cursor->value);
__wt_free(session, cursor->internal_uri);
- if (!F_ISSET(cursor, WT_CURSTD_URI_SHARED))
- __wt_free(session, cursor->uri);
+ __wt_free(session, cursor->uri);
__wt_overwrite_and_free(session, cursor);
return (0);
}
diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c
index 78d508a4e9d..429f75208f2 100644
--- a/src/cursor/cur_table.c
+++ b/src/cursor/cur_table.c
@@ -33,7 +33,8 @@ typedef struct {
* Handle a key produced by a custom extractor.
*/
static int
-__curextract_insert(WT_CURSOR *cursor) {
+__curextract_insert(WT_CURSOR *cursor)
+{
WT_CURSOR_EXTRACTOR *cextract;
WT_ITEM *key, ikey, pkey;
WT_SESSION_IMPL *session;
@@ -135,12 +136,13 @@ __wt_apply_single_idx(WT_SESSION_IMPL *session, WT_INDEX *idx,
* Apply an operation to all indices of a table.
*/
static int
-__apply_idx(WT_CURSOR_TABLE *ctable, size_t func_off, bool skip_immutable) {
+__apply_idx(WT_CURSOR_TABLE *ctable, size_t func_off, bool skip_immutable)
+{
WT_CURSOR **cp;
WT_INDEX *idx;
WT_SESSION_IMPL *session;
- int (*f)(WT_CURSOR *);
u_int i;
+ int (*f)(WT_CURSOR *);
cp = ctable->idx_cursors;
session = (WT_SESSION_IMPL *)ctable->iface.session;
@@ -987,22 +989,15 @@ __wt_curtable_open(WT_SESSION_IMPL *session,
if (table->is_simple) {
/* Just return a cursor on the underlying data source. */
- if (table->is_simple_file)
- ret = __wt_curfile_open(session,
- table->cgroups[0]->source, NULL, cfg, cursorp);
- else
- ret = __wt_open_cursor(session,
- table->cgroups[0]->source, NULL, cfg, cursorp);
+ ret = __wt_open_cursor(session,
+ table->cgroups[0]->source, NULL, cfg, cursorp);
WT_TRET(__wt_schema_release_table(session, table));
if (ret == 0) {
/* Fix up the public URI to match what was passed in. */
cursor = *cursorp;
- if (!F_ISSET(cursor, WT_CURSTD_URI_SHARED))
- __wt_free(session, cursor->uri);
- cursor->uri = table->iface.name;
- WT_ASSERT(session, strcmp(uri, cursor->uri) == 0);
- F_SET(cursor, WT_CURSTD_URI_SHARED);
+ __wt_free(session, cursor->uri);
+ WT_TRET(__wt_strdup(session, uri, &cursor->uri));
}
return (ret);
}
diff --git a/src/evict/evict_file.c b/src/evict/evict_file.c
index 147b615c0ab..13e2823d234 100644
--- a/src/evict/evict_file.c
+++ b/src/evict/evict_file.c
@@ -54,10 +54,11 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
*/
if (F_ISSET(dhandle, WT_DHANDLE_DEAD) &&
F_ISSET(S2C(session), WT_CONN_LOOKASIDE_OPEN) &&
- !F_ISSET(btree, WT_BTREE_LOOKASIDE)) {
- WT_ASSERT(session, !WT_IS_METADATA(dhandle));
+ btree->lookaside_entries) {
+ WT_ASSERT(session, !WT_IS_METADATA(dhandle) &&
+ !F_ISSET(btree, WT_BTREE_LOOKASIDE));
- WT_RET(__wt_las_remove_block(session, NULL, btree->id, 0));
+ WT_RET(__wt_las_save_dropped(session));
} else
FLD_SET(walk_flags, WT_READ_LOOKASIDE);
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 02851492039..3af5338d73f 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -75,7 +75,8 @@ __evict_entry_priority(WT_SESSION_IMPL *session, WT_REF *ref)
return (WT_READGEN_OLDEST);
/* Any page from a dead tree is a great choice. */
- if (F_ISSET(btree->dhandle, WT_DHANDLE_DEAD))
+ if (F_ISSET(btree->dhandle, WT_DHANDLE_DEAD) ||
+ F_ISSET(btree, WT_BTREE_LOOKASIDE))
return (WT_READGEN_OLDEST);
/* Any empty page (leaf or internal), is a good choice. */
@@ -606,6 +607,21 @@ __evict_update_work(WT_SESSION_IMPL *session)
F_SET(cache, WT_CACHE_EVICT_SCRUB);
/*
+ * Try lookaside evict when:
+ * (1) the cache is stuck; OR
+ * (2) the lookaside score goes over 80; and
+ * (3) the cache is more than half way from the dirty target to the
+ * dirty trigger.
+ */
+ if (!F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE) &&
+ (__wt_cache_stuck(session) ||
+ (__wt_cache_lookaside_score(cache) > 80 &&
+ dirty_inuse > (uint64_t)
+ ((cache->eviction_dirty_target + cache->eviction_dirty_trigger) *
+ bytes_max) / 200)))
+ F_SET(cache, WT_CACHE_EVICT_LOOKASIDE);
+
+ /*
* With an in-memory cache, we only do dirty eviction in order to scrub
* pages.
*/
@@ -1632,6 +1648,28 @@ __evict_walk_file(WT_SESSION_IMPL *session,
QUEUE_FILLS_PER_PASS;
/*
+ * If the tree is dead or we're near the end of the queue, fill the
+ * remaining slots.
+ */
+ if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD))
+ target_pages = remaining_slots;
+
+ /*
+ * Lookaside pages don't count toward the cache's dirty limit.
+ *
+ * Preferentially evict lookaside pages unless applications are stalled
+ * on the dirty limit. Once application threads are stalled by the
+ * dirty limit, don't take any lookaside pages unless we're also up
+ * against the total cache size limit.
+ */
+ if (F_ISSET(btree, WT_BTREE_LOOKASIDE)) {
+ if (!F_ISSET(cache, WT_CACHE_EVICT_DIRTY_HARD))
+ target_pages = remaining_slots;
+ else if (!F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD))
+ target_pages = 0;
+ }
+
+ /*
* Walk trees with a small fraction of the cache in case there are so
* many trees that none of them use enough of the cache to be allocated
* slots. Only skip a tree if it has no bytes of interest.
@@ -1652,12 +1690,7 @@ __evict_walk_file(WT_SESSION_IMPL *session,
if (target_pages < MIN_PAGES_PER_TREE)
target_pages = MIN_PAGES_PER_TREE;
- /*
- * If the tree is dead or we're near the end of the queue, fill the
- * remaining slots.
- */
- if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
- target_pages > remaining_slots)
+ if (target_pages > remaining_slots)
target_pages = remaining_slots;
/*
@@ -1993,8 +2026,8 @@ fast: /* If the page can't be evicted, give up. */
if (restarts == 0)
WT_STAT_CONN_INCR(
session, cache_eviction_walks_abandoned);
- WT_RET(__wt_page_release(cache->walk_session,
- ref, WT_READ_NO_EVICT));
+ WT_RET(__wt_page_release(
+ cache->walk_session, ref, walk_flags));
ref = NULL;
} else if (WT_READGEN_EVICT_SOON(ref->page->read_gen))
WT_RET_NOTFOUND_OK(__wt_tree_walk_count(
@@ -2315,8 +2348,9 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
/* See if eviction is still needed. */
if (!__wt_eviction_needed(session, busy, &pct_full) ||
- (pct_full < 100 && cache->eviction_progress >
- initial_progress + max_progress))
+ ((pct_full < 100 || cache->eviction_scrub_limit > 0.0) &&
+ (cache->eviction_progress >
+ initial_progress + max_progress)))
break;
/*
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index 103c93a075b..65009dc3449 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -522,6 +522,13 @@ __evict_review(
return (0);
/*
+ * If reconciliation is disabled for this thread (e.g., during an
+ * eviction that writes to lookaside), give up.
+ */
+ if (F_ISSET(session, WT_SESSION_NO_RECONCILE))
+ return (EBUSY);
+
+ /*
* If the page is dirty, reconcile it to decide if we can evict it.
*
* If we have an exclusive lock (we're discarding the tree), assert
@@ -575,9 +582,7 @@ __evict_review(
* that can't be evicted, check if reconciliation
* suggests trying the lookaside table.
*/
- if (!F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE) &&
- (__wt_cache_lookaside_score(cache) > 50 ||
- __wt_cache_stuck(session)))
+ if (F_ISSET(cache, WT_CACHE_EVICT_LOOKASIDE))
lookaside_retryp = &lookaside_retry;
}
}
diff --git a/src/include/api.h b/src/include/api.h
index aa080d2bcca..2e3a2fe220f 100644
--- a/src/include/api.h
+++ b/src/include/api.h
@@ -6,12 +6,41 @@
* See the file LICENSE for redistribution information.
*/
+#ifdef HAVE_DIAGNOSTIC
+/*
+ * Capture cases where a single session handle is used by multiple threads
+ * in parallel. The check isn't trivial because some API calls re-enter
+ * via public API entry points and the session with ID 0 is the default
+ * session in the connection handle which can be used across multiple threads.
+ * It is safe to use the reference count without atomic operations because the
+ * reference count is only tracking a thread re-entering the API.
+ */
+#define WT_SINGLE_THREAD_CHECK_START(s) \
+ { \
+ uintmax_t __tmp_api_tid; \
+ __wt_thread_id(&__tmp_api_tid); \
+ WT_ASSERT(session, (s)->id == 0 || (s)->api_tid == 0 || \
+ (s)->api_tid == __tmp_api_tid); \
+ if ((s)->api_tid == 0) \
+ WT_PUBLISH((s)->api_tid, __tmp_api_tid); \
+ ++(s)->api_enter_refcnt; \
+ }
+
+#define WT_SINGLE_THREAD_CHECK_STOP(s) \
+ if (--(s)->api_enter_refcnt == 0) \
+ WT_PUBLISH((s)->api_tid, 0);
+#else
+#define WT_SINGLE_THREAD_CHECK_START(s)
+#define WT_SINGLE_THREAD_CHECK_STOP(s)
+#endif
+
/* Standard entry points to the API: declares/initializes local variables. */
#define API_SESSION_INIT(s, h, n, dh) \
WT_DATA_HANDLE *__olddh = (s)->dhandle; \
const char *__oldname = (s)->name; \
(s)->dhandle = (dh); \
(s)->name = (s)->lastop = #h "." #n; \
+ WT_SINGLE_THREAD_CHECK_START(s); \
WT_ERR(WT_SESSION_CHECK_PANIC(s)); \
__wt_verbose((s), WT_VERB_API, "%s", "CALL: " #h ":" #n)
@@ -28,6 +57,7 @@
#define API_END(s, ret) \
if ((s) != NULL) { \
+ WT_SINGLE_THREAD_CHECK_STOP(s); \
(s)->dhandle = __olddh; \
(s)->name = __oldname; \
if (F_ISSET(&(s)->txn, WT_TXN_RUNNING) && \
diff --git a/src/include/btmem.h b/src/include/btmem.h
index c3646a2ae59..abb7cc19972 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -167,11 +167,12 @@ struct __wt_ovfl_reuse {
* are written into a lookaside table, and restored as necessary if the page is
* read.
*
- * The key is a unique marker for the page (a file ID plus a page ID), a
- * counter (used to ensure the update records remain in the original order),
- * and the record's key (byte-string for row-store, record number for
- * column-store). The value is the WT_UPDATE structure's transaction ID,
- * timestamp, update type and value.
+ * The key is a unique marker for the page (a page ID plus a file ID, ordered
+ * this way so that overall the lookaside table is append-mostly), a counter
+ * (used to ensure the update records remain in the original order), and the
+ * record's key (byte-string for row-store, record number for column-store).
+ * The value is the WT_UPDATE structure's transaction ID, timestamp, update
+ * type and value.
*
* As the key for the lookaside table is different for row- and column-store, we
* store both key types in a WT_ITEM, building/parsing them in the code, because
@@ -181,8 +182,8 @@ struct __wt_ovfl_reuse {
* makes the lookaside table's value more likely to overflow the page size when
* the row-store key is relatively large.
*/
-#define WT_LAS_FORMAT \
- "key_format=" WT_UNCHECKED_STRING(IQQu) \
+#define WT_LAS_CONFIG \
+ "key_format=" WT_UNCHECKED_STRING(QIQu) \
",value_format=" WT_UNCHECKED_STRING(QuBu)
/*
diff --git a/src/include/btree.h b/src/include/btree.h
index 7dc9b4a11a7..8a3273d1b6b 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -134,13 +134,13 @@ struct __wt_btree {
u_int rec_multiblock_max; /* Maximum blocks written for a page */
uint64_t last_recno; /* Column-store last record number */
- uint64_t las_pageid; /* Lookaside table page ID counter */
WT_REF root; /* Root page reference */
bool modified; /* If the tree ever modified */
uint8_t original; /* Newly created: bulk-load possible
(want a bool but needs atomic cas) */
+ bool lookaside_entries; /* Has entries in the lookaside table */
bool lsm_primary; /* Handle is/was the LSM primary */
WT_BM *bm; /* Block manager reference */
diff --git a/src/include/btree.i b/src/include/btree.i
index f2948bfc90f..edc0973ee6f 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -149,7 +149,8 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
if (WT_PAGE_IS_INTERNAL(page)) {
(void)__wt_atomic_add64(&btree->bytes_dirty_intl, size);
(void)__wt_atomic_add64(&cache->bytes_dirty_intl, size);
- } else if (!btree->lsm_primary) {
+ } else if (!btree->lsm_primary &&
+ !F_ISSET(btree, WT_BTREE_LOOKASIDE)) {
(void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size);
(void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size);
}
@@ -189,7 +190,7 @@ __wt_cache_decr_check_size(
*/
static inline void
__wt_cache_decr_check_uint64(
- WT_SESSION_IMPL *session, uint64_t *vp, size_t v, const char *fld)
+ WT_SESSION_IMPL *session, uint64_t *vp, uint64_t v, const char *fld)
{
if (__wt_atomic_sub64(vp, v) < WT_EXABYTE)
return;
@@ -200,7 +201,7 @@ __wt_cache_decr_check_uint64(
*/
*vp = 0;
__wt_errx(session,
- "%s went negative with decrement of %" WT_SIZET_FMT, fld, v);
+ "%s went negative with decrement of %" PRIu64, fld, v);
#ifdef HAVE_DIAGNOSTIC
__wt_abort(session);
@@ -261,7 +262,7 @@ __wt_cache_page_byte_dirty_decr(
decr, "WT_BTREE.bytes_dirty_intl");
__wt_cache_decr_check_uint64(session, &cache->bytes_dirty_intl,
decr, "WT_CACHE.bytes_dirty_intl");
- } else if (!btree->lsm_primary) {
+ } else if (!btree->lsm_primary && !F_ISSET(btree, WT_BTREE_LOOKASIDE)) {
__wt_cache_decr_check_uint64(session, &btree->bytes_dirty_leaf,
decr, "WT_BTREE.bytes_dirty_leaf");
__wt_cache_decr_check_uint64(session, &cache->bytes_dirty_leaf,
@@ -321,7 +322,8 @@ __wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page)
(void)__wt_atomic_add64(&cache->bytes_dirty_intl, size);
(void)__wt_atomic_add64(&cache->pages_dirty_intl, 1);
} else {
- if (!btree->lsm_primary) {
+ if (!btree->lsm_primary &&
+ !F_ISSET(btree, WT_BTREE_LOOKASIDE)) {
(void)__wt_atomic_add64(&btree->bytes_dirty_leaf, size);
(void)__wt_atomic_add64(&cache->bytes_dirty_leaf, size);
}
@@ -420,7 +422,8 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page, bool rewrite)
__wt_cache_decr_check_uint64(session,
&cache->bytes_dirty_intl,
modify->bytes_dirty, "WT_CACHE.bytes_dirty_intl");
- } else if (!btree->lsm_primary) {
+ } else if (!btree->lsm_primary &&
+ !F_ISSET(btree, WT_BTREE_LOOKASIDE)) {
__wt_cache_decr_check_uint64(session,
&btree->bytes_dirty_leaf,
modify->bytes_dirty, "WT_BTREE.bytes_dirty_leaf");
@@ -1359,6 +1362,7 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
{
WT_BTREE *btree;
WT_PAGE *page;
+ bool inmem_split;
btree = S2BT(session);
@@ -1387,10 +1391,10 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
*/
page = ref->page;
if (!WT_READGEN_EVICT_SOON(page->read_gen) ||
- LF_ISSET(WT_READ_NO_EVICT) ||
- F_ISSET(session, WT_SESSION_NO_EVICTION) ||
+ LF_ISSET(WT_READ_NO_SPLIT) ||
btree->evict_disabled > 0 ||
- !__wt_page_can_evict(session, ref, NULL))
+ !__wt_page_can_evict(session, ref, &inmem_split) ||
+ (F_ISSET(session, WT_SESSION_NO_RECONCILE) && !inmem_split))
return (__wt_hazard_clear(session, ref));
WT_RET_BUSY_OK(__wt_page_release_evict(session, ref));
@@ -1622,6 +1626,6 @@ __wt_ref_state_yield_sleep(uint64_t *yield_count, uint64_t *sleep_count)
return;
}
- (*sleep_count) = WT_MIN((*sleep_count) + WT_THOUSAND, 10 * WT_THOUSAND);
+ (*sleep_count) = WT_MIN((*sleep_count) + 100, WT_THOUSAND);
__wt_sleep(0, (*sleep_count));
}
diff --git a/src/include/cache.h b/src/include/cache.h
index 0a42853b95b..f9ce4316e29 100644
--- a/src/include/cache.h
+++ b/src/include/cache.h
@@ -7,6 +7,12 @@
*/
/*
+ * Helper: in order to read without any calls to eviction, we have to ignore
+ * the cache size and disable splits.
+ */
+#define WT_READ_NO_EVICT (WT_READ_IGNORE_CACHE_SIZE | WT_READ_NO_SPLIT)
+
+/*
* Tuning constants: I hesitate to call this tuning, but we want to review some
* number of pages from each file's in-memory tree for each page we evict.
*/
@@ -176,6 +182,38 @@ struct __wt_cache {
int32_t evict_lookaside_score;
/*
+ * Shared lookaside lock, session and cursor, used by threads accessing
+ * the lookaside table (other than eviction server and worker threads
+ * and the sweep thread, all of which have their own lookaside cursors).
+ */
+#define WT_LAS_NUM_SESSIONS 5
+ WT_SPINLOCK las_lock;
+ WT_SESSION_IMPL *las_session[WT_LAS_NUM_SESSIONS];
+ bool las_session_inuse[WT_LAS_NUM_SESSIONS];
+
+ uint32_t las_fileid; /* Lookaside table file ID */
+ uint64_t las_entry_count; /* Count of entries in lookaside */
+ uint64_t las_pageid; /* Lookaside table page ID counter */
+
+ WT_SPINLOCK las_sweep_lock;
+ WT_ITEM las_sweep_key; /* Track sweep position. */
+ uint32_t las_sweep_dropmin; /* Minimum btree ID in current set. */
+ uint8_t *las_sweep_dropmap; /* Bitmap of dropped btree IDs. */
+ uint32_t las_sweep_dropmax; /* Maximum btree ID in current set. */
+
+ uint32_t *las_dropped; /* List of dropped btree IDs. */
+ size_t las_dropped_next; /* Next index into drop list. */
+ size_t las_dropped_alloc; /* Allocated size of drop list. */
+
+ /*
+ * The "lookaside_activity" verbose messages are throttled to once per
+ * checkpoint. To accomplish this we track the checkpoint generation
+ * for the most recent read and write verbose messages.
+ */
+ uint64_t las_verb_gen_read;
+ uint64_t las_verb_gen_write;
+
+ /*
* Cache pool information.
*/
uint64_t cp_pass_pressure; /* Calculated pressure from this pass */
@@ -200,8 +238,9 @@ struct __wt_cache {
#define WT_CACHE_EVICT_CLEAN_HARD 0x002 /* Clean % blocking app threads */
#define WT_CACHE_EVICT_DIRTY 0x004 /* Evict dirty pages */
#define WT_CACHE_EVICT_DIRTY_HARD 0x008 /* Dirty % blocking app threads */
-#define WT_CACHE_EVICT_SCRUB 0x010 /* Scrub dirty pages */
-#define WT_CACHE_EVICT_URGENT 0x020 /* Pages are in the urgent queue */
+#define WT_CACHE_EVICT_LOOKASIDE 0x010 /* Try lookaside eviction */
+#define WT_CACHE_EVICT_SCRUB 0x020 /* Scrub dirty pages */
+#define WT_CACHE_EVICT_URGENT 0x040 /* Pages are in the urgent queue */
#define WT_CACHE_EVICT_ALL (WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_DIRTY)
uint32_t flags;
};
diff --git a/src/include/cache.i b/src/include/cache.i
index e160dbf4d64..c7d802f8a5f 100644
--- a/src/include/cache.i
+++ b/src/include/cache.i
@@ -241,12 +241,12 @@ __wt_session_can_wait(WT_SESSION_IMPL *session)
return (false);
/*
- * LSM sets the no-eviction flag when holding the LSM tree lock, in that
- * case, or when holding the schema lock, we don't want to highjack the
- * thread for eviction.
+ * LSM sets the "ignore cache size" flag when holding the LSM tree
+ * lock, in that case, or when holding the schema lock, we don't want
+ * this thread to block for eviction.
*/
- return (!F_ISSET(
- session, WT_SESSION_NO_EVICTION | WT_SESSION_LOCKED_SCHEMA));
+ return (!F_ISSET(session,
+ WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_LOCKED_SCHEMA));
}
/*
@@ -395,12 +395,12 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool *didworkp)
txn_global->current != txn_global->oldest_id);
/*
- * LSM sets the no-cache-check flag when holding the LSM tree lock, in
- * that case, or when holding the handle list, schema or table locks
- * (which can block checkpoints and eviction), don't block the thread
- * for eviction.
+ * LSM sets the "ignore cache size" flag when holding the LSM tree
+ * lock, in that case, or when holding the handle list, schema or table
+ * locks (which can block checkpoints and eviction), don't block the
+ * thread for eviction.
*/
- if (F_ISSET(session, WT_SESSION_NO_EVICTION |
+ if (F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE |
WT_SESSION_LOCKED_HANDLE_LIST | WT_SESSION_LOCKED_SCHEMA |
WT_SESSION_LOCKED_TABLE))
return (0);
diff --git a/src/include/connection.h b/src/include/connection.h
index c1d1921bdcc..9288618c87e 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -358,23 +358,6 @@ struct __wt_connection_impl {
uint64_t sweep_interval; /* Handle sweep interval */
uint64_t sweep_handles_min;/* Handle sweep minimum open */
- /*
- * Shared lookaside lock, session and cursor, used by threads accessing
- * the lookaside table (other than eviction server and worker threads
- * and the sweep thread, all of which have their own lookaside cursors).
- */
- WT_SPINLOCK las_lock; /* Lookaside table spinlock */
- WT_SESSION_IMPL *las_session; /* Lookaside table session */
- uint32_t las_fileid; /* Lookaside table file ID */
-
- /*
- * The "lookaside_activity" verbose messages are throttled to once per
- * checkpoint. To accomplish this we track the checkpoint generation
- * for the most recent read and write verbose messages.
- */
- uint64_t las_verb_gen_read;
- uint64_t las_verb_gen_write;
-
/* Set of btree IDs not being rolled back */
uint8_t *stable_rollback_bitstring;
uint32_t stable_rollback_maxfile;
diff --git a/src/include/extern.h b/src/include/extern.h
index bbe66abf753..17afb48bda6 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -200,15 +200,18 @@ extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE
extern void __wt_update_obsolete_free( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd);
extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert, bool restore) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern bool __wt_las_nonempty(WT_SESSION_IMPL *session);
extern void __wt_las_stats_update(WT_SESSION_IMPL *session);
extern int __wt_las_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_cursor_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags);
-extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_las_insert_block(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CURSOR *cursor, WT_MULTI *multi, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_cursor_close( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_insert_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_PAGE *page, WT_MULTI *multi, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_cursor_position(WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_las_remove_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_save_dropped(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_sweep(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern uint32_t __wt_checksum_sw(const void *chunk, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern void __wt_checksum_init(void);
extern void __wt_config_initn( WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str, size_t len);
diff --git a/src/include/extern_posix.h b/src/include/extern_posix.h
index 864a40aa325..624cd815dad 100644
--- a/src/include/extern_posix.h
+++ b/src/include/extern_posix.h
@@ -27,6 +27,7 @@ extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds) WT_GCC_FUNC_DEC
extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_thread_id(uintmax_t *id) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
+extern int __wt_thread_str(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp);
extern void __wt_yield(void) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
diff --git a/src/include/extern_win.h b/src/include/extern_win.h
index 85db8175615..ae4195b20a1 100644
--- a/src/include/extern_win.h
+++ b/src/include/extern_win.h
@@ -25,7 +25,8 @@ extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds);
extern int __wt_vsnprintf_len_incr( char *buf, size_t size, size_t *retsizep, const char *fmt, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_thread_id(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_thread_id(uintmax_t *id);
+extern int __wt_thread_str(char *buf, size_t buflen) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_epoch_raw(WT_SESSION_IMPL *session, struct timespec *tsp);
extern int __wt_to_utf16_string( WT_SESSION_IMPL *session, const char*utf8, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_to_utf8_string( WT_SESSION_IMPL *session, const wchar_t*wide, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/include/flags.h b/src/include/flags.h
index 23be5fd2e14..b191e8fe01d 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -32,17 +32,18 @@
#define WT_LOG_FSYNC 0x00000008
#define WT_LOG_SYNC_ENABLED 0x00000010
#define WT_READ_CACHE 0x00000001
-#define WT_READ_LOOKASIDE 0x00000002
-#define WT_READ_NOTFOUND_OK 0x00000004
-#define WT_READ_NO_EMPTY 0x00000008
-#define WT_READ_NO_EVICT 0x00000010
+#define WT_READ_IGNORE_CACHE_SIZE 0x00000002
+#define WT_READ_LOOKASIDE 0x00000004
+#define WT_READ_NOTFOUND_OK 0x00000008
+#define WT_READ_NO_EMPTY 0x00000010
#define WT_READ_NO_GEN 0x00000020
-#define WT_READ_NO_WAIT 0x00000040
-#define WT_READ_PREV 0x00000080
-#define WT_READ_RESTART_OK 0x00000100
-#define WT_READ_SKIP_INTL 0x00000200
-#define WT_READ_TRUNCATE 0x00000400
-#define WT_READ_WONT_NEED 0x00000800
+#define WT_READ_NO_SPLIT 0x00000040
+#define WT_READ_NO_WAIT 0x00000080
+#define WT_READ_PREV 0x00000100
+#define WT_READ_RESTART_OK 0x00000200
+#define WT_READ_SKIP_INTL 0x00000400
+#define WT_READ_TRUNCATE 0x00000800
+#define WT_READ_WONT_NEED 0x00001000
#define WT_REC_CHECKPOINT 0x00000001
#define WT_REC_EVICT 0x00000002
#define WT_REC_IN_MEMORY 0x00000004
@@ -52,26 +53,27 @@
#define WT_REC_VISIBILITY_ERR 0x00000040
#define WT_REC_VISIBLE_ALL 0x00000080
#define WT_SESSION_CAN_WAIT 0x00000001
-#define WT_SESSION_INTERNAL 0x00000002
-#define WT_SESSION_LOCKED_CHECKPOINT 0x00000004
-#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000008
-#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00000010
-#define WT_SESSION_LOCKED_METADATA 0x00000020
-#define WT_SESSION_LOCKED_PASS 0x00000040
-#define WT_SESSION_LOCKED_SCHEMA 0x00000080
-#define WT_SESSION_LOCKED_SLOT 0x00000100
-#define WT_SESSION_LOCKED_TABLE_READ 0x00000200
-#define WT_SESSION_LOCKED_TABLE_WRITE 0x00000400
-#define WT_SESSION_LOCKED_TURTLE 0x00000800
-#define WT_SESSION_LOGGING_INMEM 0x00001000
-#define WT_SESSION_LOOKASIDE_CURSOR 0x00002000
-#define WT_SESSION_NO_CACHE 0x00004000
+#define WT_SESSION_IGNORE_CACHE_SIZE 0x00000002
+#define WT_SESSION_INTERNAL 0x00000004
+#define WT_SESSION_LOCKED_CHECKPOINT 0x00000008
+#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x00000010
+#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x00000020
+#define WT_SESSION_LOCKED_METADATA 0x00000040
+#define WT_SESSION_LOCKED_PASS 0x00000080
+#define WT_SESSION_LOCKED_SCHEMA 0x00000100
+#define WT_SESSION_LOCKED_SLOT 0x00000200
+#define WT_SESSION_LOCKED_TABLE_READ 0x00000400
+#define WT_SESSION_LOCKED_TABLE_WRITE 0x00000800
+#define WT_SESSION_LOCKED_TURTLE 0x00001000
+#define WT_SESSION_LOGGING_INMEM 0x00002000
+#define WT_SESSION_LOOKASIDE_CURSOR 0x00004000
#define WT_SESSION_NO_DATA_HANDLES 0x00008000
-#define WT_SESSION_NO_EVICTION 0x00010000
-#define WT_SESSION_NO_LOGGING 0x00020000
+#define WT_SESSION_NO_LOGGING 0x00010000
+#define WT_SESSION_NO_RECONCILE 0x00020000
#define WT_SESSION_NO_SCHEMA_LOCK 0x00040000
#define WT_SESSION_QUIET_CORRUPT_FILE 0x00080000
-#define WT_SESSION_SERVER_ASYNC 0x00100000
+#define WT_SESSION_READ_WONT_NEED 0x00100000
+#define WT_SESSION_SERVER_ASYNC 0x00200000
#define WT_STAT_CLEAR 0x00000001
#define WT_STAT_JSON 0x00000002
#define WT_STAT_ON_CLOSE 0x00000004
@@ -102,20 +104,20 @@
#define WT_VERB_HANDLEOPS 0x00000200
#define WT_VERB_LOG 0x00000400
#define WT_VERB_LOOKASIDE 0x00000800
-#define WT_VERB_LSM 0x00001000
-#define WT_VERB_LSM_MANAGER 0x00002000
-#define WT_VERB_METADATA 0x00004000
-#define WT_VERB_MUTEX 0x00008000
-#define WT_VERB_OVERFLOW 0x00010000
-#define WT_VERB_READ 0x00020000
-#define WT_VERB_REBALANCE 0x00040000
-#define WT_VERB_RECONCILE 0x00080000
-#define WT_VERB_RECOVERY 0x00100000
-#define WT_VERB_RECOVERY_PROGRESS 0x00200000
-#define WT_VERB_SALVAGE 0x00400000
-#define WT_VERB_SHARED_CACHE 0x00800000
-#define WT_VERB_SPLIT 0x01000000
-#define WT_VERB_TEMPORARY 0x02000000
+#define WT_VERB_LOOKASIDE_ACTIVITY 0x00001000
+#define WT_VERB_LSM 0x00002000
+#define WT_VERB_LSM_MANAGER 0x00004000
+#define WT_VERB_METADATA 0x00008000
+#define WT_VERB_MUTEX 0x00010000
+#define WT_VERB_OVERFLOW 0x00020000
+#define WT_VERB_READ 0x00040000
+#define WT_VERB_REBALANCE 0x00080000
+#define WT_VERB_RECONCILE 0x00100000
+#define WT_VERB_RECOVERY 0x00200000
+#define WT_VERB_RECOVERY_PROGRESS 0x00400000
+#define WT_VERB_SALVAGE 0x00800000
+#define WT_VERB_SHARED_CACHE 0x01000000
+#define WT_VERB_SPLIT 0x02000000
#define WT_VERB_THREAD_GROUP 0x04000000
#define WT_VERB_TIMESTAMP 0x08000000
#define WT_VERB_TRANSACTION 0x10000000
diff --git a/src/include/schema.h b/src/include/schema.h
index bae5fc8cc04..80513f1174b 100644
--- a/src/include/schema.h
+++ b/src/include/schema.h
@@ -63,7 +63,7 @@ struct __wt_table {
WT_INDEX **indices;
size_t idx_alloc;
- bool cg_complete, idx_complete, is_simple, is_simple_file;
+ bool cg_complete, idx_complete, is_simple;
u_int ncolgroups, nindices, nkey_columns;
};
diff --git a/src/include/session.h b/src/include/session.h
index bea436e05e2..23cf136d0aa 100644
--- a/src/include/session.h
+++ b/src/include/session.h
@@ -96,6 +96,12 @@ struct __wt_session_impl {
size_t scratch_cached; /* Scratch bytes cached */
#ifdef HAVE_DIAGNOSTIC
/*
+ * Variables used to look for violations of the contract that a
+ * session is only used by a single session at once.
+ */
+ volatile uintmax_t api_tid;
+ volatile uint32_t api_enter_refcnt;
+ /*
* It's hard to figure out from where a buffer was allocated after it's
* leaked, so in diagnostic mode we track them; DIAGNOSTIC can't simply
* add additional fields to WT_ITEM structures because they are visible
diff --git a/src/include/stat.h b/src/include/stat.h
index 12a7d532496..2477079a2a8 100644
--- a/src/include/stat.h
+++ b/src/include/stat.h
@@ -536,6 +536,8 @@ struct __wt_connection_stats {
int64_t txn_pinned_range;
int64_t txn_pinned_checkpoint_range;
int64_t txn_pinned_snapshot_range;
+ int64_t txn_pinned_timestamp;
+ int64_t txn_pinned_timestamp_oldest;
int64_t txn_sync;
int64_t txn_commit_queue_head;
int64_t txn_commit_queue_inserts;
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 41dd970d3ba..5d3b0c52cbd 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -687,9 +687,8 @@ struct __wt_cursor {
#define WT_CURSTD_OVERWRITE 0x00400
#define WT_CURSTD_RAW 0x00800
#define WT_CURSTD_RAW_SEARCH 0x01000
-#define WT_CURSTD_URI_SHARED 0x02000
-#define WT_CURSTD_VALUE_EXT 0x04000 /* Value points out of the tree. */
-#define WT_CURSTD_VALUE_INT 0x08000 /* Value points into the tree. */
+#define WT_CURSTD_VALUE_EXT 0x02000 /* Value points out of the tree. */
+#define WT_CURSTD_VALUE_INT 0x04000 /* Value points into the tree. */
#define WT_CURSTD_VALUE_SET (WT_CURSTD_VALUE_EXT | WT_CURSTD_VALUE_INT)
uint32_t flags;
#endif
@@ -2203,11 +2202,11 @@ struct __wt_connection {
* list\, with values chosen from the following options: \c "api"\, \c
* "block"\, \c "checkpoint"\, \c "checkpoint_progress"\, \c "compact"\,
* \c "evict"\, \c "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c
- * "handleops"\, \c "log"\, \c "lookaside_activity"\, \c "lsm"\, \c
- * "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c "overflow"\, \c
- * "read"\, \c "rebalance"\, \c "reconcile"\, \c "recovery"\, \c
- * "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c "split"\,
- * \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c
+ * "handleops"\, \c "log"\, \c "lookaside"\, \c "lookaside_activity"\,
+ * \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c "mutex"\, \c
+ * "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c
+ * "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c
+ * "shared_cache"\, \c "split"\, \c "thread_group"\, \c "timestamp"\, \c
* "transaction"\, \c "verify"\, \c "version"\, \c "write"; default
* empty.}
* @configend
@@ -2843,10 +2842,10 @@ struct __wt_connection {
* values chosen from the following options: \c "api"\, \c "block"\, \c
* "checkpoint"\, \c "checkpoint_progress"\, \c "compact"\, \c "evict"\, \c
* "evict_stuck"\, \c "evictserver"\, \c "fileops"\, \c "handleops"\, \c "log"\,
- * \c "lookaside_activity"\, \c "lsm"\, \c "lsm_manager"\, \c "metadata"\, \c
- * "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c "reconcile"\, \c
- * "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c "shared_cache"\, \c
- * "split"\, \c "temporary"\, \c "thread_group"\, \c "timestamp"\, \c
+ * \c "lookaside"\, \c "lookaside_activity"\, \c "lsm"\, \c "lsm_manager"\, \c
+ * "metadata"\, \c "mutex"\, \c "overflow"\, \c "read"\, \c "rebalance"\, \c
+ * "reconcile"\, \c "recovery"\, \c "recovery_progress"\, \c "salvage"\, \c
+ * "shared_cache"\, \c "split"\, \c "thread_group"\, \c "timestamp"\, \c
* "transaction"\, \c "verify"\, \c "version"\, \c "write"; default empty.}
* @config{write_through, Use \c FILE_FLAG_WRITE_THROUGH on Windows to write to
* files. Ignored on non-Windows systems. Options are given as a list\, such
@@ -5286,26 +5285,33 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
* snapshots
*/
#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1278
+/*! transaction: transaction range of timestamps currently pinned */
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP 1279
+/*!
+ * transaction: transaction range of timestamps pinned by the oldest
+ * timestamp
+ */
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1280
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1279
+#define WT_STAT_CONN_TXN_SYNC 1281
/*! transaction: transactions commit timestamp queue inserts to head */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD 1280
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD 1282
/*! transaction: transactions commit timestamp queue inserts total */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1281
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS 1283
/*! transaction: transactions commit timestamp queue length */
-#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1282
+#define WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN 1284
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1283
+#define WT_STAT_CONN_TXN_COMMIT 1285
/*! transaction: transactions read timestamp queue inserts to head */
-#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1284
+#define WT_STAT_CONN_TXN_READ_QUEUE_HEAD 1286
/*! transaction: transactions read timestamp queue inserts total */
-#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1285
+#define WT_STAT_CONN_TXN_READ_QUEUE_INSERTS 1287
/*! transaction: transactions read timestamp queue length */
-#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1286
+#define WT_STAT_CONN_TXN_READ_QUEUE_LEN 1288
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1287
+#define WT_STAT_CONN_TXN_ROLLBACK 1289
/*! transaction: update conflicts */
-#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1288
+#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1290
/*!
* @}
diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c
index d159005ee11..7a20686fb97 100644
--- a/src/lsm/lsm_merge.c
+++ b/src/lsm/lsm_merge.c
@@ -446,7 +446,7 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id)
}
/* Discard pages we read as soon as we're done with them. */
- F_SET(session, WT_SESSION_NO_CACHE);
+ F_SET(session, WT_SESSION_READ_WONT_NEED);
cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_open_cursor);
cfg[1] = "bulk,raw,skip_sort_check";
@@ -498,14 +498,14 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id)
WT_TRET(dest->close(dest));
src = dest = NULL;
- F_CLR(session, WT_SESSION_NO_CACHE);
+ F_CLR(session, WT_SESSION_READ_WONT_NEED);
/*
* We're doing advisory reads to fault the new trees into cache.
* Don't block if the cache is full: our next unit of work may be to
* discard some trees to free space.
*/
- F_SET(session, WT_SESSION_NO_EVICTION);
+ F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE);
if (create_bloom) {
if (ret == 0)
@@ -626,6 +626,7 @@ err: if (locked)
"Merge failed with %s",
__wt_strerror(session, ret, NULL, 0));
}
- F_CLR(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION);
+ F_CLR(session,
+ WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_READ_WONT_NEED);
return (ret);
}
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index 6195726ec67..6927fe909f8 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -1068,7 +1068,8 @@ __wt_lsm_tree_readlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
* Diagnostic: avoid deadlocks with the schema lock: if we need it for
* an operation, we should already have it.
*/
- F_SET(session, WT_SESSION_NO_EVICTION | WT_SESSION_NO_SCHEMA_LOCK);
+ F_SET(session,
+ WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
}
/*
@@ -1078,7 +1079,8 @@ __wt_lsm_tree_readlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
void
__wt_lsm_tree_readunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- F_CLR(session, WT_SESSION_NO_EVICTION | WT_SESSION_NO_SCHEMA_LOCK);
+ F_CLR(session,
+ WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
__wt_readunlock(session, &lsm_tree->rwlock);
}
@@ -1096,7 +1098,8 @@ __wt_lsm_tree_writelock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
* Diagnostic: avoid deadlocks with the schema lock: if we need it for
* an operation, we should already have it.
*/
- F_SET(session, WT_SESSION_NO_EVICTION | WT_SESSION_NO_SCHEMA_LOCK);
+ F_SET(session,
+ WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
}
/*
@@ -1106,7 +1109,8 @@ __wt_lsm_tree_writelock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
void
__wt_lsm_tree_writeunlock(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
- F_CLR(session, WT_SESSION_NO_EVICTION | WT_SESSION_NO_SCHEMA_LOCK);
+ F_CLR(session,
+ WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_NO_SCHEMA_LOCK);
__wt_writeunlock(session, &lsm_tree->rwlock);
}
diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c
index f6aea02e20d..76827f7888c 100644
--- a/src/lsm/lsm_work_unit.c
+++ b/src/lsm/lsm_work_unit.c
@@ -503,7 +503,8 @@ __lsm_bloom_create(WT_SESSION_IMPL *session,
* ourselves to get stuck creating bloom filters, the entire tree
* can stall since there may be no worker threads available to flush.
*/
- F_SET(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION);
+ F_SET(session,
+ WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_READ_WONT_NEED);
for (insert_count = 0; (ret = src->next(src)) == 0; insert_count++) {
WT_ERR(src->get_key(src, &key));
__wt_bloom_insert(bloom, &key);
@@ -514,7 +515,7 @@ __lsm_bloom_create(WT_SESSION_IMPL *session,
WT_TRET(__wt_bloom_finalize(bloom));
WT_ERR(ret);
- F_CLR(session, WT_SESSION_NO_CACHE);
+ F_CLR(session, WT_SESSION_READ_WONT_NEED);
/* Load the new Bloom filter into cache. */
WT_CLEAR(key);
@@ -537,7 +538,8 @@ __lsm_bloom_create(WT_SESSION_IMPL *session,
err: if (bloom != NULL)
WT_TRET(__wt_bloom_close(bloom));
- F_CLR(session, WT_SESSION_NO_CACHE | WT_SESSION_NO_EVICTION);
+ F_CLR(session,
+ WT_SESSION_IGNORE_CACHE_SIZE | WT_SESSION_READ_WONT_NEED);
return (ret);
}
diff --git a/src/os_posix/os_map.c b/src/os_posix/os_map.c
index 3d06461a9ba..5e625a49bac 100644
--- a/src/os_posix/os_map.c
+++ b/src/os_posix/os_map.c
@@ -88,7 +88,7 @@ __wt_posix_map_preload(WT_FILE_HANDLE *fh,
length += WT_PTRDIFF(map, blk);
/* XXX proxy for "am I doing a scan?" -- manual read-ahead */
- if (F_ISSET(session, WT_SESSION_NO_CACHE)) {
+ if (F_ISSET(session, WT_SESSION_READ_WONT_NEED)) {
/* Read in 2MB blocks every 1MB of data. */
if (((uintptr_t)((uint8_t *)blk + length) &
(uintptr_t)((1<<20) - 1)) < (uintptr_t)blk)
diff --git a/src/os_posix/os_thread.c b/src/os_posix/os_thread.c
index 8af672dd0d4..dc4d49ad493 100644
--- a/src/os_posix/os_thread.c
+++ b/src/os_posix/os_thread.c
@@ -67,10 +67,32 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid)
/*
* __wt_thread_id --
+ * Return an arithmetic representation of a thread ID on POSIX.
+ */
+void
+__wt_thread_id(uintmax_t *id)
+ WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+{
+ pthread_t self;
+
+ /*
+ * POSIX 1003.1 allows pthread_t to be an opaque type; on systems where
+ * it's a pointer, print the pointer to match gdb output.
+ */
+ self = pthread_self();
+#ifdef __sun
+ *id = (uintmax_t)self;
+#else
+ *id = (uintmax_t)(void *)self;
+#endif
+}
+
+/*
+ * __wt_thread_str --
* Fill in a printable version of the process and thread IDs.
*/
int
-__wt_thread_id(char *buf, size_t buflen)
+__wt_thread_str(char *buf, size_t buflen)
WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
pthread_t self;
diff --git a/src/os_win/os_thread.c b/src/os_win/os_thread.c
index 1ecf53e382e..1d549cf4712 100644
--- a/src/os_win/os_thread.c
+++ b/src/os_win/os_thread.c
@@ -77,10 +77,20 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid)
/*
* __wt_thread_id --
+ * Return an arithmetic representation of a thread ID on POSIX.
+ */
+void
+__wt_thread_id(uintmax_t *id)
+{
+ *id = (uintmax_t)GetCurrentThreadId();
+}
+
+/*
+ * __wt_thread_str --
* Fill in a printable version of the process and thread IDs.
*/
int
-__wt_thread_id(char *buf, size_t buflen)
+__wt_thread_str(char *buf, size_t buflen)
{
return (__wt_snprintf(buf, buflen,
"%" PRIu64 ":%" PRIu64,
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index 3e857fef324..b509c49cbbc 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -407,6 +407,18 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
*/
WT_PAGE_LOCK(session, page);
+ /*
+ * Now that the page is locked, if attempting to evict it, check again
+ * whether eviction is permitted. The page's state could have changed
+ * while we were waiting to acquire the lock (e.g., the page could have
+ * split).
+ */
+ if (LF_ISSET(WT_REC_EVICT) &&
+ !__wt_page_can_evict(session, ref, NULL)) {
+ WT_PAGE_UNLOCK(session, page);
+ return (EBUSY);
+ }
+
oldest_id = __wt_txn_oldest_id(session);
if (LF_ISSET(WT_REC_EVICT))
mod->last_eviction_id = oldest_id;
@@ -1449,6 +1461,8 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
if (uncommitted && !F_ISSET(r, WT_REC_UPDATE_RESTORE))
return (EBUSY);
+ WT_ASSERT(session, r->max_txn != WT_TXN_NONE);
+
/*
* The order of the updates on the list matters, we can't move only the
* unresolved updates, move the entire update list.
@@ -6050,7 +6064,7 @@ __rec_las_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
for (multi = r->multi, i = 0; i < r->multi_next; ++multi, ++i)
if (multi->supd != NULL)
WT_ERR(__wt_las_insert_block(
- session, r->page, cursor, multi, key));
+ session, cursor, r->page, multi, key));
err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
diff --git a/src/schema/schema_open.c b/src/schema/schema_open.c
index 420fab63d68..3b27c8300e2 100644
--- a/src/schema/schema_open.c
+++ b/src/schema/schema_open.c
@@ -106,8 +106,6 @@ __wt_schema_open_colgroups(WT_SESSION_IMPL *session, WT_TABLE *table)
}
table->cg_complete = true;
- table->is_simple_file = (table->is_simple &&
- WT_PREFIX_MATCH(table->cgroups[0]->source, "file:"));
err: __wt_scr_free(session, &buf);
__wt_schema_destroy_colgroup(session, &colgroup);
diff --git a/src/session/session_api.c b/src/session/session_api.c
index fa33b55c936..d81735234a0 100644
--- a/src/session/session_api.c
+++ b/src/session/session_api.c
@@ -259,9 +259,9 @@ __session_reconfigure(WT_SESSION *wt_session, const char *config)
ret = __wt_config_getones(session, config, "ignore_cache_size", &cval);
if (ret == 0) {
if (cval.val)
- F_SET(session, WT_SESSION_NO_EVICTION);
+ F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE);
else
- F_CLR(session, WT_SESSION_NO_EVICTION);
+ F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE);
}
WT_ERR_NOTFOUND_OK(ret);
@@ -1489,7 +1489,12 @@ __session_timestamp_transaction(WT_SESSION *wt_session, const char *config)
WT_SESSION_IMPL *session;
session = (WT_SESSION_IMPL *)wt_session;
+#ifdef HAVE_DIAGNOSTIC
SESSION_API_CALL(session, timestamp_transaction, config, cfg);
+#else
+ SESSION_API_CALL(session, timestamp_transaction, NULL, cfg);
+ cfg[1] = config;
+#endif
WT_TRET(__wt_txn_set_timestamp(session, cfg));
err: API_END_RET(session, ret);
}
diff --git a/src/session/session_compact.c b/src/session/session_compact.c
index 6ccf3161229..aa2f1bc3bd8 100644
--- a/src/session/session_compact.c
+++ b/src/session/session_compact.c
@@ -349,23 +349,21 @@ __wt_session_compact(
WT_DECL_RET;
WT_SESSION_IMPL *session;
u_int i;
- bool no_eviction_set;
+ bool ignore_cache_size_set;
- no_eviction_set = false;
+ ignore_cache_size_set = false;
session = (WT_SESSION_IMPL *)wt_session;
SESSION_API_CALL(session, compact, config, cfg);
/*
- * Don't highjack the compaction thread for eviction; it's holding locks
- * blocking checkpoints and once an application is tapped for eviction,
- * it can spend a long time doing nothing else. (And, if we're tapping
- * application threads for eviction, compaction should quit, it's not
- * making anything better.)
+ * The compaction thread should not block when the cache is full: it is
+ * holding locks blocking checkpoints and once the cache is full, it can
+ * spend a long time doing eviction.
*/
- if (!F_ISSET(session, WT_SESSION_NO_EVICTION)) {
- no_eviction_set = true;
- F_SET(session, WT_SESSION_NO_EVICTION);
+ if (!F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE)) {
+ ignore_cache_size_set = true;
+ F_SET(session, WT_SESSION_IGNORE_CACHE_SIZE);
}
/* In-memory ignores compaction operations. */
@@ -437,8 +435,8 @@ err: session->compact = NULL;
*/
WT_TRET(__wt_session_release_resources(session));
- if (no_eviction_set)
- F_CLR(session, WT_SESSION_NO_EVICTION);
+ if (ignore_cache_size_set)
+ F_CLR(session, WT_SESSION_IGNORE_CACHE_SIZE);
if (ret != 0)
WT_STAT_CONN_INCR(session, session_table_compact_fail);
diff --git a/src/support/err.c b/src/support/err.c
index ffbdba0b25b..a3b87b6edde 100644
--- a/src/support/err.c
+++ b/src/support/err.c
@@ -217,7 +217,7 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error,
* followed by a colon.
*/
__wt_epoch(session, &ts);
- WT_ERR(__wt_thread_id(tid, sizeof(tid)));
+ WT_ERR(__wt_thread_str(tid, sizeof(tid)));
WT_ERROR_APPEND(p, remain,
"[%" PRIuMAX ":%" PRIuMAX "][%s]",
(uintmax_t)ts.tv_sec, (uintmax_t)ts.tv_nsec / WT_THOUSAND, tid);
diff --git a/src/support/stat.c b/src/support/stat.c
index 924afaa21d6..b4533841ec6 100644
--- a/src/support/stat.c
+++ b/src/support/stat.c
@@ -1004,6 +1004,8 @@ static const char * const __stats_connection_desc[] = {
"transaction: transaction range of IDs currently pinned",
"transaction: transaction range of IDs currently pinned by a checkpoint",
"transaction: transaction range of IDs currently pinned by named snapshots",
+ "transaction: transaction range of timestamps currently pinned",
+ "transaction: transaction range of timestamps pinned by the oldest timestamp",
"transaction: transaction sync calls",
"transaction: transactions commit timestamp queue inserts to head",
"transaction: transactions commit timestamp queue inserts total",
@@ -1335,6 +1337,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing txn_pinned_range */
/* not clearing txn_pinned_checkpoint_range */
/* not clearing txn_pinned_snapshot_range */
+ /* not clearing txn_pinned_timestamp */
+ /* not clearing txn_pinned_timestamp_oldest */
stats->txn_sync = 0;
stats->txn_commit_queue_head = 0;
stats->txn_commit_queue_inserts = 0;
@@ -1769,6 +1773,9 @@ __wt_stat_connection_aggregate(
WT_STAT_READ(from, txn_pinned_checkpoint_range);
to->txn_pinned_snapshot_range +=
WT_STAT_READ(from, txn_pinned_snapshot_range);
+ to->txn_pinned_timestamp += WT_STAT_READ(from, txn_pinned_timestamp);
+ to->txn_pinned_timestamp_oldest +=
+ WT_STAT_READ(from, txn_pinned_timestamp_oldest);
to->txn_sync += WT_STAT_READ(from, txn_sync);
to->txn_commit_queue_head +=
WT_STAT_READ(from, txn_commit_queue_head);
diff --git a/src/txn/txn.c b/src/txn/txn.c
index 3d45ff8a88c..8b4a7fc7936 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -612,7 +612,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
WT_TXN_GLOBAL *txn_global;
WT_TXN_OP *op;
u_int i;
- bool did_update, locked;
+ bool locked;
#ifdef HAVE_TIMESTAMPS
wt_timestamp_t prev_commit_timestamp, ts;
bool update_timestamp;
@@ -621,11 +621,11 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
txn = &session->txn;
conn = S2C(session);
txn_global = &conn->txn_global;
- did_update = txn->mod_count != 0;
locked = false;
WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
- WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || !did_update);
+ WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) ||
+ txn->mod_count == 0);
/*
* Look for a commit timestamp.
@@ -716,7 +716,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
}
/* If we are logging, write a commit log record. */
- if (did_update &&
+ if (txn->logrec != NULL &&
FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) &&
!F_ISSET(session, WT_SESSION_NO_LOGGING)) {
/*
@@ -757,8 +757,8 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
* Writes to the lookaside file can be evicted as soon
* as they commit.
*/
- if (conn->las_fileid != 0 &&
- op->fileid == conn->las_fileid) {
+ if (conn->cache->las_fileid != 0 &&
+ op->fileid == conn->cache->las_fileid) {
op->u.upd->txnid = WT_TXN_NONE;
break;
}
@@ -823,6 +823,20 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
* write lock and re-check.
*/
if (update_timestamp) {
+#if WT_TIMESTAMP_SIZE == 8
+ while (__wt_timestamp_cmp(
+ &txn->commit_timestamp, &prev_commit_timestamp) > 0) {
+ if (__wt_atomic_cas64(
+ &txn_global->commit_timestamp.val,
+ prev_commit_timestamp.val,
+ txn->commit_timestamp.val)) {
+ txn_global->has_commit_timestamp = true;
+ break;
+ }
+ __wt_timestamp_set(
+ &prev_commit_timestamp, &txn_global->commit_timestamp);
+ }
+#else
__wt_writelock(session, &txn_global->rwlock);
if (__wt_timestamp_cmp(&txn->commit_timestamp,
&txn_global->commit_timestamp) > 0) {
@@ -831,6 +845,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
txn_global->has_commit_timestamp = true;
}
__wt_writeunlock(session, &txn_global->rwlock);
+#endif
}
#endif
@@ -881,8 +896,9 @@ __wt_txn_rollback(WT_SESSION_IMPL *session, const char *cfg[])
case WT_TXN_OP_BASIC_TS:
case WT_TXN_OP_INMEM:
WT_ASSERT(session, op->u.upd->txnid == txn->id);
- WT_ASSERT(session, S2C(session)->las_fileid == 0 ||
- op->fileid != S2C(session)->las_fileid);
+ WT_ASSERT(session,
+ S2C(session)->cache->las_fileid == 0 ||
+ op->fileid != S2C(session)->cache->las_fileid);
op->u.upd->txnid = WT_TXN_ABORTED;
break;
case WT_TXN_OP_REF:
@@ -962,6 +978,15 @@ __wt_txn_stats_update(WT_SESSION_IMPL *session)
WT_STAT_SET(session, stats, txn_pinned_range,
txn_global->current - txn_global->oldest_id);
+#if WT_TIMESTAMP_SIZE == 8
+ WT_STAT_SET(session, stats, txn_pinned_timestamp,
+ txn_global->commit_timestamp.val -
+ txn_global->pinned_timestamp.val);
+ WT_STAT_SET(session, stats, txn_pinned_timestamp_oldest,
+ txn_global->commit_timestamp.val -
+ txn_global->oldest_timestamp.val);
+#endif
+
WT_STAT_SET(session, stats, txn_pinned_snapshot_range,
snapshot_pinned == WT_TXN_NONE ?
0 : txn_global->current - snapshot_pinned);
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index eb32ef2d06a..c82187daf85 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -122,7 +122,7 @@ __checkpoint_update_generation(WT_SESSION_IMPL *session)
*/
static int
__checkpoint_apply_all(WT_SESSION_IMPL *session, const char *cfg[],
- int (*op)(WT_SESSION_IMPL *, const char *[]), bool *fullp)
+ int (*op)(WT_SESSION_IMPL *, const char *[]), bool *fullp)
{
WT_CONFIG targetconf;
WT_CONFIG_ITEM cval, k, v;
@@ -205,7 +205,7 @@ err: __wt_scr_free(session, &tmp);
*/
static int
__checkpoint_apply(WT_SESSION_IMPL *session, const char *cfg[],
- int (*op)(WT_SESSION_IMPL *, const char *[]))
+ int (*op)(WT_SESSION_IMPL *, const char *[]))
{
WT_DECL_RET;
u_int i;
@@ -440,6 +440,13 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
if (current_dirty <= (double)cache->eviction_checkpoint_target)
break;
+ /*
+ * Don't scrub when the lookaside table is in use: scrubbing is
+ * counter-productive in that case.
+ */
+ if (F_ISSET(cache, WT_CACHE_EVICT_LOOKASIDE))
+ break;
+
__wt_sleep(0, stepdown_us / 10);
__wt_epoch(session, &stop);
current_us = WT_TIMEDIFF_US(stop, last);
@@ -1080,7 +1087,7 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[], bool waiting)
*/
#undef WT_CHECKPOINT_SESSION_FLAGS
#define WT_CHECKPOINT_SESSION_FLAGS \
- (WT_SESSION_CAN_WAIT | WT_SESSION_NO_EVICTION)
+ (WT_SESSION_CAN_WAIT | WT_SESSION_IGNORE_CACHE_SIZE)
#undef WT_CHECKPOINT_SESSION_FLAGS_OFF
#define WT_CHECKPOINT_SESSION_FLAGS_OFF \
(WT_SESSION_LOOKASIDE_CURSOR)
diff --git a/src/txn/txn_rollback_to_stable.c b/src/txn/txn_rollback_to_stable.c
index 929aba30155..c68d00d7503 100644
--- a/src/txn/txn_rollback_to_stable.c
+++ b/src/txn/txn_rollback_to_stable.c
@@ -46,12 +46,12 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
__wt_las_cursor(session, &cursor, &session_flags);
/* Discard pages we read as soon as we're done with them. */
- F_SET(session, WT_SESSION_NO_CACHE);
+ F_SET(session, WT_SESSION_READ_WONT_NEED);
/* Walk the file. */
for (; (ret = cursor->next(cursor)) == 0; ) {
WT_ERR(cursor->get_key(cursor,
- &las_id, &las_pageid, &las_counter, &las_key));
+ &las_pageid, &las_id, &las_counter, &las_key));
/* Check the file ID so we can skip durable tables */
if (las_id >= conn->stable_rollback_maxfile)
@@ -79,7 +79,7 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
WT_STAT_CONN_SET(session, cache_lookaside_entries, las_total);
- F_CLR(session, WT_SESSION_NO_CACHE);
+ F_CLR(session, WT_SESSION_READ_WONT_NEED);
return (ret);
}
diff --git a/src/txn/txn_timestamp.c b/src/txn/txn_timestamp.c
index 98887627bfc..5a39a6d84dc 100644
--- a/src/txn/txn_timestamp.c
+++ b/src/txn/txn_timestamp.c
@@ -210,6 +210,10 @@ __txn_global_query_timestamp(
__wt_timestamp_set(&ts, &txn_global->commit_timestamp));
WT_ASSERT(session, !__wt_timestamp_iszero(&ts));
+ /* Skip the lock if there are no running transactions. */
+ if (TAILQ_EMPTY(&txn_global->commit_timestamph))
+ goto done;
+
/* Compare with the oldest running transaction. */
__wt_readlock(session, &txn_global->commit_timestamp_rwlock);
txn = TAILQ_FIRST(&txn_global->commit_timestamph);
@@ -254,7 +258,7 @@ __txn_global_query_timestamp(
WT_RET_MSG(session, EINVAL,
"unknown timestamp query %.*s", (int)cval.len, cval.str);
- __wt_timestamp_set(tsp, &ts);
+done: __wt_timestamp_set(tsp, &ts);
return (0);
}
#endif
@@ -292,7 +296,8 @@ __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session)
{
WT_DECL_RET;
WT_TXN_GLOBAL *txn_global;
- wt_timestamp_t active_timestamp, oldest_timestamp, pinned_timestamp;
+ wt_timestamp_t active_timestamp, last_pinned_timestamp;
+ wt_timestamp_t oldest_timestamp, pinned_timestamp;
const char *query_cfg[] = { WT_CONFIG_BASE(session,
WT_CONNECTION_query_timestamp), "get=pinned", NULL };
@@ -316,6 +321,16 @@ __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session)
} else
__wt_timestamp_set(&pinned_timestamp, &active_timestamp);
+ if (txn_global->has_pinned_timestamp) {
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ __wt_timestamp_set(
+ &last_pinned_timestamp, &txn_global->pinned_timestamp));
+
+ if (__wt_timestamp_cmp(
+ &pinned_timestamp, &last_pinned_timestamp) <= 0)
+ return (0);
+ }
+
__wt_writelock(session, &txn_global->rwlock);
if (!txn_global->has_pinned_timestamp || __wt_timestamp_cmp(
&txn_global->pinned_timestamp, &pinned_timestamp) < 0) {
@@ -364,6 +379,7 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
{
WT_TXN_GLOBAL *txn_global;
wt_timestamp_t commit_ts, oldest_ts, stable_ts;
+ wt_timestamp_t last_oldest_ts, last_stable_ts;
txn_global = &S2C(session)->txn_global;
/*
@@ -376,7 +392,11 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
session, "oldest", &oldest_ts, &oldest_cval));
WT_RET(__wt_txn_parse_timestamp(
session, "stable", &stable_ts, &stable_cval));
- __wt_writelock(session, &txn_global->rwlock);
+
+ __wt_readlock(session, &txn_global->rwlock);
+
+ __wt_timestamp_set(&last_oldest_ts, &txn_global->oldest_timestamp);
+ __wt_timestamp_set(&last_stable_ts, &txn_global->stable_timestamp);
/*
* First do error checking on the timestamp values. The
@@ -388,9 +408,9 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
if (!has_commit && txn_global->has_commit_timestamp)
__wt_timestamp_set(&commit_ts, &txn_global->commit_timestamp);
if (!has_oldest && txn_global->has_oldest_timestamp)
- __wt_timestamp_set(&oldest_ts, &txn_global->oldest_timestamp);
- if (!has_stable && txn_global->has_oldest_timestamp)
- __wt_timestamp_set(&stable_ts, &txn_global->stable_timestamp);
+ __wt_timestamp_set(&oldest_ts, &last_oldest_ts);
+ if (!has_stable && txn_global->has_stable_timestamp)
+ __wt_timestamp_set(&stable_ts, &last_stable_ts);
/*
* If a commit timestamp was supplied, check that it is no older than
@@ -398,7 +418,7 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
*/
if (has_commit && (has_oldest || txn_global->has_oldest_timestamp) &&
__wt_timestamp_cmp(&oldest_ts, &commit_ts) > 0) {
- __wt_writeunlock(session, &txn_global->rwlock);
+ __wt_readunlock(session, &txn_global->rwlock);
WT_RET_MSG(session, EINVAL,
"set_timestamp: oldest timestamp must not be later than "
"commit timestamp");
@@ -406,7 +426,7 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
if (has_commit && (has_stable || txn_global->has_stable_timestamp) &&
__wt_timestamp_cmp(&stable_ts, &commit_ts) > 0) {
- __wt_writeunlock(session, &txn_global->rwlock);
+ __wt_readunlock(session, &txn_global->rwlock);
WT_RET_MSG(session, EINVAL,
"set_timestamp: stable timestamp must not be later than "
"commit timestamp");
@@ -420,12 +440,27 @@ __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
(has_oldest || txn_global->has_oldest_timestamp) &&
(has_stable || txn_global->has_stable_timestamp) &&
__wt_timestamp_cmp(&oldest_ts, &stable_ts) > 0) {
- __wt_writeunlock(session, &txn_global->rwlock);
+ __wt_readunlock(session, &txn_global->rwlock);
WT_RET_MSG(session, EINVAL,
"set_timestamp: oldest timestamp must not be later than "
"stable timestamp");
}
+ __wt_readunlock(session, &txn_global->rwlock);
+
+ /* Check if we are actually updating anything. */
+ if (has_oldest && txn_global->has_oldest_timestamp &&
+ __wt_timestamp_cmp(&oldest_ts, &last_oldest_ts) <= 0)
+ has_oldest = false;
+
+ if (has_stable && txn_global->has_stable_timestamp &&
+ __wt_timestamp_cmp(&stable_ts, &last_stable_ts) <= 0)
+ has_stable = false;
+
+ if (!has_commit && !has_oldest && !has_stable)
+ return (0);
+
+ __wt_writelock(session, &txn_global->rwlock);
/*
* This method can be called from multiple threads, check that we are
* moving the global timestamps forwards.
@@ -543,7 +578,7 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
/*
* Look for a commit timestamp.
*/
- ret = __wt_config_gets(session, cfg, "commit_timestamp", &cval);
+ ret = __wt_config_gets_def(session, cfg, "commit_timestamp", 0, &cval);
if (ret == 0 && cval.len != 0) {
#ifdef HAVE_TIMESTAMPS
WT_TXN *txn = &session->txn;
diff --git a/test/checkpoint/checkpointer.c b/test/checkpoint/checkpointer.c
index 7299784ab35..ae499c2e96b 100644
--- a/test/checkpoint/checkpointer.c
+++ b/test/checkpoint/checkpointer.c
@@ -67,7 +67,7 @@ checkpointer(void *arg)
WT_UNUSED(arg);
- testutil_check(__wt_thread_id(tid, sizeof(tid)));
+ testutil_check(__wt_thread_str(tid, sizeof(tid)));
printf("checkpointer thread starting: tid: %s\n", tid);
(void)real_checkpointer();
diff --git a/test/checkpoint/workers.c b/test/checkpoint/workers.c
index e75f86f141a..cd32db6746f 100644
--- a/test/checkpoint/workers.c
+++ b/test/checkpoint/workers.c
@@ -148,7 +148,7 @@ worker(void *arg)
WT_UNUSED(arg);
- testutil_check(__wt_thread_id(tid, sizeof(tid)));
+ testutil_check(__wt_thread_str(tid, sizeof(tid)));
printf("worker thread starting: tid: %s\n", tid);
(void)real_worker();
diff --git a/test/csuite/timestamp_abort/main.c b/test/csuite/timestamp_abort/main.c
index ca5fa10c2db..79b232b532a 100644
--- a/test/csuite/timestamp_abort/main.c
+++ b/test/csuite/timestamp_abort/main.c
@@ -75,7 +75,7 @@ static const char * const ckpt_file = "checkpoint_done";
static bool compat, inmem, use_ts;
static volatile uint64_t global_ts = 1;
-static uint64_t th_ts[MAX_TH];
+static volatile uint64_t th_ts[MAX_TH];
#define ENV_CONFIG_COMPAT ",compatibility=(release=\"2.9\")"
#define ENV_CONFIG_DEF \
@@ -121,7 +121,7 @@ thread_ts_run(void *arg)
WT_CURSOR *cur_stable;
WT_SESSION *session;
THREAD_DATA *td;
- uint64_t i, last_ts, oldest_ts;
+ uint64_t i, last_ts, oldest_ts, this_ts;
char tscfg[64];
td = (THREAD_DATA *)arg;
@@ -148,10 +148,11 @@ thread_ts_run(void *arg)
* any thread still with a zero timestamp we go to
* sleep.
*/
- if (th_ts[i] == 0)
+ this_ts = th_ts[i];
+ if (this_ts == 0)
goto ts_wait;
- if (th_ts[i] != 0 && th_ts[i] < oldest_ts)
- oldest_ts = th_ts[i];
+ else if (this_ts < oldest_ts)
+ oldest_ts = this_ts;
}
if (oldest_ts != UINT64_MAX &&
@@ -638,7 +639,9 @@ main(int argc, char *argv[])
}
/*
* !!! If we wanted to take a copy of the directory before recovery,
- * this is the place to do it.
+ * this is the place to do it. Don't do it all the time because
+ * it can use a lot of disk space, which can cause test machine
+ * issues.
*/
if (chdir(home) != 0)
testutil_die(errno, "parent chdir: %s", home);
diff --git a/test/csuite/wt2719_reconfig/main.c b/test/csuite/wt2719_reconfig/main.c
index 10824aec744..58e2a0bc113 100644
--- a/test/csuite/wt2719_reconfig/main.c
+++ b/test/csuite/wt2719_reconfig/main.c
@@ -171,7 +171,6 @@ static const char * const list[] = {
",verbose=(\"salvage\")",
",verbose=(\"shared_cache\")",
",verbose=(\"split\")",
- ",verbose=(\"temporary\")",
",verbose=(\"transaction\")",
",verbose=(\"verify\")",
",verbose=(\"version\")",
diff --git a/test/cursor_order/cursor_order_ops.c b/test/cursor_order/cursor_order_ops.c
index 810bf895b42..576860483ba 100644
--- a/test/cursor_order/cursor_order_ops.c
+++ b/test/cursor_order/cursor_order_ops.c
@@ -221,7 +221,7 @@ reverse_scan(void *arg)
id = (uintmax_t)arg;
s = &run_info[id];
cfg = s->cfg;
- testutil_check(__wt_thread_id(tid, sizeof(tid)));
+ testutil_check(__wt_thread_str(tid, sizeof(tid)));
__wt_random_init(&s->rnd);
printf(" reverse scan thread %2" PRIuMAX
@@ -305,7 +305,7 @@ append_insert(void *arg)
id = (uintmax_t)arg;
s = &run_info[id];
cfg = s->cfg;
- testutil_check(__wt_thread_id(tid, sizeof(tid)));
+ testutil_check(__wt_thread_str(tid, sizeof(tid)));
__wt_random_init(&s->rnd);
printf("write thread %2" PRIuMAX " starting: tid: %s, file: %s\n",
diff --git a/test/format/util.c b/test/format/util.c
index 83ddf307cc9..3c61ab5a66b 100644
--- a/test/format/util.c
+++ b/test/format/util.c
@@ -591,7 +591,7 @@ timestamp(void *arg)
WT_SESSION *session;
TINFO **tinfo_list, *tinfo;
time_t last, now;
- uint64_t oldest_timestamp, usecs;
+ uint64_t oldest_timestamp, this_ts, usecs;
uint32_t i;
char config_buf[64];
@@ -614,9 +614,10 @@ timestamp(void *arg)
oldest_timestamp = UINT64_MAX;
for (i = 0; i < g.c_threads; ++i) {
tinfo = tinfo_list[i];
- if (tinfo->timestamp != 0 &&
- tinfo->timestamp < oldest_timestamp)
- oldest_timestamp = tinfo->timestamp;
+ this_ts = tinfo->timestamp;
+ if (this_ts != 0 &&
+ this_ts < oldest_timestamp)
+ oldest_timestamp = this_ts;
}
if (oldest_timestamp == UINT64_MAX) {
__wt_sleep(1, 0);
diff --git a/test/thread/rw.c b/test/thread/rw.c
index 49af7c782b5..bda54d388b5 100644
--- a/test/thread/rw.c
+++ b/test/thread/rw.c
@@ -191,7 +191,7 @@ reader(void *arg)
id = (int)(uintptr_t)arg;
s = &run_info[id];
- testutil_check(__wt_thread_id(tid, sizeof(tid)));
+ testutil_check(__wt_thread_str(tid, sizeof(tid)));
__wt_random_init(&s->rnd);
printf(" read thread %2d starting: tid: %s, file: %s\n",
@@ -287,7 +287,7 @@ writer(void *arg)
id = (int)(uintptr_t)arg;
s = &run_info[id];
- testutil_check(__wt_thread_id(tid, sizeof(tid)));
+ testutil_check(__wt_thread_str(tid, sizeof(tid)));
__wt_random_init(&s->rnd);
printf("write thread %2d starting: tid: %s, file: %s\n",