summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorRamon Fernandez <ramon@mongodb.com>2016-06-28 15:03:54 -0400
committerRamon Fernandez <ramon@mongodb.com>2016-06-28 15:04:02 -0400
commite8dc6b98c1c91727f7def84f2fb4b57bf67ccc88 (patch)
treed42e295804d3c8247cbde5feed070c242b63dee0 /src
parent30162fa8bbb9d7e7f7a789361aed7e046995f7b3 (diff)
downloadmongo-e8dc6b98c1c91727f7def84f2fb4b57bf67ccc88.tar.gz
Import wiredtiger-wiredtiger-2.8.0-219-gf4954f6.tar.gz from wiredtiger branch mongodb-3.2
ref: a6a64e9..f4954f6 SERVER-24580 Performance is poor when WiredTiger cache is full WT-2672 Handle system calls that don't set errno WT-2696 Missing log records with large updates WT-2702 Under high thread load, WiredTiger exceeds cache size WT-2708 split child-update race with reconciliation/eviction WT-2729 Focus eviction walks in largest trees
Diffstat (limited to 'src')
-rw-r--r--src/third_party/wiredtiger/dist/flags.py4
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok2
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py7
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c24
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_stat.c3
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c8
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache.c8
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_open.c3
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_file.c3
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c188
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c3
-rw-r--r--src/third_party/wiredtiger/src/include/btree.h2
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i29
-rw-r--r--src/third_party/wiredtiger/src/include/cache.h2
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h2
-rw-r--r--src/third_party/wiredtiger/src/include/flags.h2
-rw-r--r--src/third_party/wiredtiger/src/include/os.h27
-rw-r--r--src/third_party/wiredtiger/src/include/serial.i2
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h7
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in382
-rw-r--r--src/third_party/wiredtiger/src/log/log_slot.c11
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_work_unit.c3
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_dir.c2
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_fs.c45
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_map.c2
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_fs.c5
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c28
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c15
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c6
-rw-r--r--src/third_party/wiredtiger/test/recovery/random-abort.c185
-rw-r--r--src/third_party/wiredtiger/tools/wtstats/stat_data.py4
32 files changed, 639 insertions, 377 deletions
diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py
index 8f7827ad160..806fac2137d 100644
--- a/src/third_party/wiredtiger/dist/flags.py
+++ b/src/third_party/wiredtiger/dist/flags.py
@@ -57,6 +57,10 @@ flags = {
'TXN_LOG_CKPT_STOP',
'TXN_LOG_CKPT_SYNC',
],
+ 'txn_update_oldest' : [
+ 'TXN_OLDEST_STRICT',
+ 'TXN_OLDEST_WAIT',
+ ],
'verbose' : [
'VERB_API',
'VERB_BLOCK',
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index eed034abb47..631f2a5c909 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -439,6 +439,7 @@ bzDecompressInit
bzalloc
bzfree
bzip
+call's
calloc
cas
catfmt
@@ -1067,6 +1068,7 @@ unescaped
unicode
uninstantiated
unistd
+unlink
unlinked
unmap
unmarshall
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index bd951e64999..483e0bd3ef2 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -162,6 +162,7 @@ connection_stats = [
CacheStat('cache_bytes_write', 'bytes written from cache', 'size'),
CacheStat('cache_eviction_aggressive_set', 'eviction currently operating in aggressive mode', 'no_clear,no_scale'),
CacheStat('cache_eviction_app', 'pages evicted by application threads'),
+ CacheStat('cache_eviction_app_dirty', 'modified pages evicted by application threads'),
CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'),
CacheStat('cache_eviction_clean', 'unmodified pages evicted'),
CacheStat('cache_eviction_deepen', 'page split during eviction deepened the tree'),
@@ -173,6 +174,9 @@ connection_stats = [
CacheStat('cache_eviction_hazard', 'hazard pointer blocked page eviction'),
CacheStat('cache_eviction_internal', 'internal pages evicted'),
CacheStat('cache_eviction_maximum_page_size', 'maximum page size at eviction', 'no_clear,no_scale,size'),
+ CacheStat('cache_eviction_pages_queued', 'pages queued for eviction'),
+ CacheStat('cache_eviction_pages_queued_oldest', 'pages queued for urgent eviction'),
+ CacheStat('cache_eviction_pages_seen', 'pages seen by eviction walk'),
CacheStat('cache_eviction_queue_empty', 'eviction server candidate queue empty when topping up'),
CacheStat('cache_eviction_queue_not_empty', 'eviction server candidate queue not empty when topping up'),
CacheStat('cache_eviction_server_evicting', 'eviction server evicting pages'),
@@ -181,6 +185,8 @@ connection_stats = [
CacheStat('cache_eviction_split_internal', 'internal pages split during eviction'),
CacheStat('cache_eviction_split_leaf', 'leaf pages split during eviction'),
CacheStat('cache_eviction_walk', 'pages walked for eviction'),
+ CacheStat('cache_eviction_walks_active', 'files with active eviction walks', 'no_clear,no_scale,size'),
+ CacheStat('cache_eviction_walks_started', 'files with new eviction walks started'),
CacheStat('cache_eviction_worker_evicting', 'eviction worker thread evicting pages'),
CacheStat('cache_inmem_split', 'in-memory page splits'),
CacheStat('cache_inmem_splittable', 'in-memory page passed criteria to be split'),
@@ -408,6 +414,7 @@ dsrc_stats = [
##########################################
# Cache and eviction statistics
##########################################
+ CacheStat('cache_bytes_inuse', 'bytes currently in the cache', 'no_clear,no_scale,size'),
CacheStat('cache_bytes_read', 'bytes read into cache', 'size'),
CacheStat('cache_bytes_write', 'bytes written from cache', 'size'),
CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'),
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index 6a1203628a9..df5f5cc2df8 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -326,7 +326,7 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
__wt_page_evict_soon(page);
/* Bump the oldest ID, we're about to do some visibility checks. */
- WT_RET(__wt_txn_update_oldest(session, false));
+ WT_RET(__wt_txn_update_oldest(session, 0));
/* If eviction cannot succeed, don't try. */
return (__wt_page_can_evict(session, ref, NULL));
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 4f16a290958..00bea5a6773 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -298,7 +298,7 @@ static int
__split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
WT_REF **from_refp, size_t *decrp, WT_REF **to_refp, size_t *incrp)
{
- WT_ADDR *addr;
+ WT_ADDR *addr, *ref_addr;
WT_CELL_UNPACK unpack;
WT_DECL_RET;
WT_IKEY *ikey;
@@ -345,13 +345,18 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
}
/*
- * If there's no address (the page has never been written), or the
- * address has been instantiated, there's no work to do. Otherwise,
- * instantiate the address in-memory, from the on-page cell.
+ * If there's no address at all (the page has never been written), or
+ * the address has already been instantiated, there's no work to do.
+ * Otherwise, the address still references a split page on-page cell,
+ * instantiate it. We can race with reconciliation and/or eviction of
+ * the child pages, be cautious: read the address and verify it, and
+ * only update it if the value is unchanged from the original. In the
+ * case of a race, the address must no longer reference the split page,
+ * we're done.
*/
- addr = ref->addr;
- if (addr != NULL && !__wt_off_page(from_home, addr)) {
- __wt_cell_unpack((WT_CELL *)ref->addr, &unpack);
+ WT_ORDERED_READ(ref_addr, ref->addr);
+ if (ref_addr != NULL && !__wt_off_page(from_home, ref_addr)) {
+ __wt_cell_unpack((WT_CELL *)ref_addr, &unpack);
WT_RET(__wt_calloc_one(session, &addr));
if ((ret = __wt_strndup(
session, unpack.data, unpack.size, &addr->addr)) != 0) {
@@ -371,7 +376,10 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
break;
WT_ILLEGAL_VALUE(session);
}
- ref->addr = addr;
+ if (!__wt_atomic_cas_ptr(&ref->addr, ref_addr, addr)) {
+ __wt_free(session, addr->addr);
+ __wt_free(session, addr);
+ }
}
/* And finally, copy the WT_REF pointer itself. */
diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c
index 3d5abf34147..f1e3c0b40d5 100644
--- a/src/third_party/wiredtiger/src/btree/bt_stat.c
+++ b/src/third_party/wiredtiger/src/btree/bt_stat.c
@@ -41,6 +41,9 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
WT_STAT_SET(session, stats, btree_maxleafpage, btree->maxleafpage);
WT_STAT_SET(session, stats, btree_maxleafvalue, btree->maxleafvalue);
+ WT_STAT_SET(session, stats, cache_bytes_inuse,
+ __wt_btree_bytes_inuse(session));
+
/* Everything else is really, really expensive. */
if (!F_ISSET(cst, WT_CONN_STAT_ALL))
return (0);
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index 5d60c436a08..df4ceea8ffa 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -26,12 +26,14 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages;
uint64_t oldest_id, saved_snap_min;
uint32_t flags;
+ u_int saved_evict_walk_period;
conn = S2C(session);
btree = S2BT(session);
walk = NULL;
txn = &session->txn;
saved_snap_min = WT_SESSION_TXN_STATE(session)->snap_min;
+ saved_evict_walk_period = btree->evict_walk_period;
flags = WT_READ_CACHE | WT_READ_NO_GEN;
internal_bytes = leaf_bytes = 0;
@@ -236,10 +238,10 @@ err: /* On error, clear any left-over tree walk. */
WT_FULL_BARRIER();
/*
- * If this tree was being skipped by the eviction server during
- * the checkpoint, clear the wait.
+ * In case this tree was being skipped by the eviction server
+ * during the checkpoint, restore the previous state.
*/
- btree->evict_walk_period = 0;
+ btree->evict_walk_period = saved_evict_walk_period;
/*
* Wake the eviction server, in case application threads have
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c
index 9a2c394e9a6..9dfd1cdcbfa 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache.c
@@ -217,6 +217,14 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session)
WT_STAT_SET(
session, stats, cache_bytes_overflow, cache->bytes_overflow);
WT_STAT_SET(session, stats, cache_bytes_leaf, leaf);
+
+ /*
+ * The number of files with active walks ~= number of hazard pointers
+ * in the walk session. Note: reading without locking.
+ */
+ if (conn->evict_session != NULL)
+ WT_STAT_SET(session, stats, cache_eviction_walks_active,
+ conn->evict_session->nhazard);
}
/*
diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c
index f5722d343f7..9c978fed843 100644
--- a/src/third_party/wiredtiger/src/conn/conn_open.c
+++ b/src/third_party/wiredtiger/src/conn/conn_open.c
@@ -93,7 +93,8 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
* transaction ID will catch up with the current ID.
*/
for (;;) {
- WT_TRET(__wt_txn_update_oldest(session, true));
+ WT_TRET(__wt_txn_update_oldest(session,
+ WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
if (txn_global->oldest_id == txn_global->current)
break;
__wt_yield();
diff --git a/src/third_party/wiredtiger/src/evict/evict_file.c b/src/third_party/wiredtiger/src/evict/evict_file.c
index 1da2e959b6a..4b9e2442f32 100644
--- a/src/third_party/wiredtiger/src/evict/evict_file.c
+++ b/src/third_party/wiredtiger/src/evict/evict_file.c
@@ -26,7 +26,8 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
WT_RET(__wt_evict_file_exclusive_on(session));
/* Make sure the oldest transaction ID is up-to-date. */
- WT_RET(__wt_txn_update_oldest(session, true));
+ WT_RET(__wt_txn_update_oldest(
+ session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
/* Walk the tree, discarding pages. */
next_ref = NULL;
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index be8cc1df956..360a3f69cd2 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -16,7 +16,7 @@ static int __evict_lru_walk(WT_SESSION_IMPL *);
static int __evict_page(WT_SESSION_IMPL *, bool);
static int __evict_pass(WT_SESSION_IMPL *);
static int __evict_walk(WT_SESSION_IMPL *);
-static int __evict_walk_file(WT_SESSION_IMPL *, u_int *);
+static int __evict_walk_file(WT_SESSION_IMPL *, u_int, u_int *);
static WT_THREAD_RET __evict_worker(void *);
static int __evict_server_work(WT_SESSION_IMPL *);
@@ -32,11 +32,6 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry)
uint64_t read_gen;
btree = entry->btree;
-
- /* Never prioritize empty slots. */
- if (entry->ref == NULL)
- return (UINT64_MAX);
-
page = entry->ref->page;
/* Any page set to the oldest generation should be discarded. */
@@ -71,14 +66,15 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry)
* Qsort function: sort the eviction array.
*/
static int WT_CDECL
-__evict_lru_cmp(const void *a, const void *b)
+__evict_lru_cmp(const void *a_arg, const void *b_arg)
{
- uint64_t a_lru, b_lru;
+ const WT_EVICT_ENTRY *a = a_arg, *b = b_arg;
+ uint64_t a_score, b_score;
- a_lru = __evict_read_gen(a);
- b_lru = __evict_read_gen(b);
+ a_score = (a->ref == NULL ? UINT64_MAX : a->score);
+ b_score = (b->ref == NULL ? UINT64_MAX : b->score);
- return ((a_lru < b_lru) ? -1 : (a_lru == b_lru) ? 0 : 1);
+ return ((a_score < b_score) ? -1 : (a_score == b_score) ? 0 : 1);
}
/*
@@ -592,9 +588,10 @@ __evict_pass(WT_SESSION_IMPL *session)
*
* Do this every time the eviction server wakes up, regardless
* of whether the cache is full, to prevent the oldest ID
- * falling too far behind.
+ * falling too far behind. Don't wait to lock the table: with
+ * highly threaded workloads, that creates a bottleneck.
*/
- WT_RET(__wt_txn_update_oldest(session, loop > 0));
+ WT_RET(__wt_txn_update_oldest(session, WT_TXN_OLDEST_STRICT));
if (!__evict_update_work(session))
break;
@@ -900,7 +897,7 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
{
WT_CACHE *cache;
WT_DECL_RET;
- uint64_t cutoff, read_gen_oldest;
+ uint64_t read_gen_oldest;
uint32_t candidates, entries;
cache = S2C(session)->cache;
@@ -958,7 +955,7 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
read_gen_oldest = WT_READGEN_OLDEST;
for (candidates = 0; candidates < entries; ++candidates) {
read_gen_oldest =
- __evict_read_gen(&cache->evict_queue[candidates]);
+ cache->evict_queue[candidates].score;
if (read_gen_oldest != WT_READGEN_OLDEST)
break;
}
@@ -967,35 +964,29 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
* Take all candidates if we only gathered pages with an oldest
* read generation set.
*
- * We normally never take more than 50% of the entries; if 50%
- * of the entries were at the oldest read generation, take them.
+ * We normally never take more than 50% of the entries but if
+ * 50% of the entries were at the oldest read generation, take
+ * all of them.
*/
if (read_gen_oldest == WT_READGEN_OLDEST)
cache->evict_candidates = entries;
else if (candidates >= entries / 2)
cache->evict_candidates = candidates;
else {
- /* Save the calculated oldest generation. */
- cache->read_gen_oldest = read_gen_oldest;
-
- /* Find the bottom 25% of read generations. */
- cutoff =
- (3 * read_gen_oldest + __evict_read_gen(
- &cache->evict_queue[entries - 1])) / 4;
-
/*
- * Don't take less than 10% or more than 50% of entries,
- * regardless. That said, if there is only one entry,
- * which is normal when populating an empty file, don't
- * exclude it.
+ * Take all of the urgent pages plus a third of
+ * ordinary candidates (which could be expressed as
+ * WT_EVICT_WALK_INCR / WT_EVICT_WALK_BASE). In the
+ * steady state, we want to get as many candidates as
+ * the eviction walk adds to the queue.
+ *
+ * That said, if there is only one entry, which is
+ * normal when populating an empty file, don't exclude
+ * it.
*/
- for (candidates = 1 + entries / 10;
- candidates < entries / 2;
- candidates++)
- if (__evict_read_gen(
- &cache->evict_queue[candidates]) > cutoff)
- break;
- cache->evict_candidates = candidates;
+ cache->evict_candidates =
+ 1 + candidates + ((entries - candidates) - 1) / 3;
+ cache->read_gen_oldest = read_gen_oldest;
}
}
@@ -1071,7 +1062,7 @@ __evict_walk(WT_SESSION_IMPL *session)
* per walk.
*/
start_slot = slot = cache->evict_entries;
- max_entries = slot + WT_EVICT_WALK_INCR;
+ max_entries = WT_MIN(slot + WT_EVICT_WALK_INCR, cache->evict_slots);
retry: while (slot < max_entries && ret == 0) {
/*
@@ -1154,7 +1145,6 @@ retry: while (slot < max_entries && ret == 0) {
* useful in the past.
*/
if (btree->evict_walk_period != 0 &&
- cache->evict_entries >= WT_EVICT_WALK_INCR &&
btree->evict_walk_skips++ < btree->evict_walk_period)
continue;
btree->evict_walk_skips = 0;
@@ -1180,7 +1170,8 @@ retry: while (slot < max_entries && ret == 0) {
if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) {
cache->evict_file_next = dhandle;
WT_WITH_DHANDLE(session, dhandle,
- ret = __evict_walk_file(session, &slot));
+ ret = __evict_walk_file(
+ session, max_entries, &slot));
WT_ASSERT(session, session->split_gen == 0);
}
__wt_spin_unlock(session, &cache->evict_walk_lock);
@@ -1247,8 +1238,9 @@ __evict_init_candidate(
if (evict->ref != NULL)
__evict_list_clear(session, evict);
- evict->ref = ref;
evict->btree = S2BT(session);
+ evict->ref = ref;
+ evict->score = __evict_read_gen(evict);
/* Mark the page on the list; set last to flush the other updates. */
F_SET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU);
@@ -1259,7 +1251,7 @@ __evict_init_candidate(
* Get a few page eviction candidates from a single underlying file.
*/
static int
-__evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
+__evict_walk_file(WT_SESSION_IMPL *session, u_int max_entries, u_int *slotp)
{
WT_BTREE *btree;
WT_CACHE *cache;
@@ -1269,8 +1261,9 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
WT_REF *ref;
- uint64_t pages_walked;
- uint32_t walk_flags;
+ uint64_t btree_inuse, bytes_per_slot, cache_inuse;
+ uint64_t pages_seen, refs_walked;
+ uint32_t remaining_slots, target_pages, total_slots, walk_flags;
int internal_pages, restarts;
bool enough, modified;
@@ -1280,11 +1273,43 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
internal_pages = restarts = 0;
enough = false;
+ /*
+ * Figure out how many slots to fill from this tree.
+ * Note that some care is taken in the calculation to avoid overflow.
+ */
start = cache->evict_queue + *slotp;
- end = start + WT_EVICT_WALK_PER_FILE;
+ btree_inuse = __wt_btree_bytes_inuse(session);
+ cache_inuse = __wt_cache_bytes_inuse(cache);
+ remaining_slots = max_entries - *slotp;
+ total_slots = max_entries - cache->evict_entries;
+ target_pages = (uint32_t)(btree_inuse /
+ (cache_inuse / total_slots));
+
+ /*
+ * The target number of pages for this tree is proportional to the
+ * space it is taking up in cache. Round to the nearest number of
+ * slots so we assign all of the slots to a tree filling 99+% of the
+ * cache (and only have to walk it once).
+ */
+ bytes_per_slot = cache_inuse / total_slots;
+ target_pages = (uint32_t)(
+ (btree_inuse + bytes_per_slot / 2) / bytes_per_slot);
+ if (target_pages == 0) {
+ /*
+ * Randomly walk trees with a tiny fraction of the cache in
+ * case there are so many trees that none of them use enough of
+ * the cache to be allocated slots.
+ */
+ if (__wt_random(&session->rnd) / (double)UINT32_MAX >
+ btree_inuse / (double)cache_inuse)
+ return (0);
+ target_pages = 10;
+ }
+
if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
- end > cache->evict_queue + cache->evict_slots)
- end = cache->evict_queue + cache->evict_slots;
+ target_pages > remaining_slots)
+ target_pages = remaining_slots;
+ end = start + target_pages;
walk_flags =
WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT;
@@ -1303,17 +1328,21 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
* Once we hit the page limit, do one more step through the walk in
* case we are appending and only the last page in the file is live.
*/
- for (evict = start, pages_walked = 0;
+ for (evict = start, pages_seen = refs_walked = 0;
evict < end && !enough && (ret == 0 || ret == WT_NOTFOUND);
ret = __wt_tree_walk_count(
- session, &btree->evict_ref, &pages_walked, walk_flags)) {
- enough = pages_walked > cache->evict_max_refs_per_file;
+ session, &btree->evict_ref, &refs_walked, walk_flags)) {
+ enough = refs_walked > cache->evict_max_refs_per_file;
if ((ref = btree->evict_ref) == NULL) {
if (++restarts == 2 || enough)
break;
+ WT_STAT_FAST_CONN_INCR(
+ session, cache_eviction_walks_started);
continue;
}
+ ++pages_seen;
+
/* Ignore root pages entirely. */
if (__wt_ref_is_root(ref))
continue;
@@ -1341,9 +1370,13 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
}
/* Pages we no longer need (clean or dirty), are found money. */
+ if (page->read_gen == WT_READGEN_OLDEST) {
+ WT_STAT_FAST_CONN_INCR(
+ session, cache_eviction_pages_queued_oldest);
+ goto fast;
+ }
if (__wt_page_is_empty(page) ||
- F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
- page->read_gen == WT_READGEN_OLDEST)
+ F_ISSET(session->dhandle, WT_DHANDLE_DEAD))
goto fast;
/* Skip clean pages if appropriate. */
@@ -1409,24 +1442,31 @@ fast: /* If the page can't be evicted, give up. */
WT_RET_NOTFOUND_OK(ret);
*slotp += (u_int)(evict - start);
+ WT_STAT_FAST_CONN_INCRV(
+ session, cache_eviction_pages_queued, (u_int)(evict - start));
/*
* If we happen to end up on the root page, clear it. We have to track
* hazard pointers, and the root page complicates that calculation.
*
+ * Likewise if we found no new candidates during the walk: there is no
+ * point keeping a page pinned, since it may be the only candidate in an
+ * idle tree.
+ *
* If we land on a page requiring forced eviction, move on to the next
* page: we want this page evicted as quickly as possible.
*/
if ((ref = btree->evict_ref) != NULL) {
- if (__wt_ref_is_root(ref))
+ if (__wt_ref_is_root(ref) || evict == start)
WT_RET(__evict_clear_walk(session));
else if (ref->page->read_gen == WT_READGEN_OLDEST)
WT_RET_NOTFOUND_OK(__wt_tree_walk_count(
session, &btree->evict_ref,
- &pages_walked, walk_flags));
+ &refs_walked, walk_flags));
}
- WT_STAT_FAST_CONN_INCRV(session, cache_eviction_walk, pages_walked);
+ WT_STAT_FAST_CONN_INCRV(session, cache_eviction_walk, refs_walked);
+ WT_STAT_FAST_CONN_INCRV(session, cache_eviction_pages_seen, pages_seen);
return (0);
}
@@ -1459,6 +1499,8 @@ __evict_get_ref(
return (WT_NOTFOUND);
if (__wt_spin_trylock(session, &cache->evict_lock) == 0)
break;
+ if (!F_ISSET(session, WT_SESSION_INTERNAL))
+ return (WT_NOTFOUND);
__wt_yield();
}
@@ -1472,13 +1514,14 @@ __evict_get_ref(
candidates /= 2;
/* Get the next page queued for eviction. */
- while ((evict = cache->evict_current) != NULL &&
- evict < cache->evict_queue + candidates && evict->ref != NULL) {
+ for (evict = cache->evict_current;
+ evict >= cache->evict_queue &&
+ evict < cache->evict_queue + candidates;
+ ++evict) {
+ if (evict->ref == NULL)
+ continue;
WT_ASSERT(session, evict->btree != NULL);
- /* Move to the next item. */
- ++cache->evict_current;
-
/*
* Lock the page while holding the eviction mutex to prevent
* multiple attempts to evict it. For pages that are already
@@ -1508,8 +1551,11 @@ __evict_get_ref(
}
/* Clear the current pointer if there are no more candidates. */
- if (evict >= cache->evict_queue + cache->evict_candidates)
+ if (evict == NULL || evict + 1 >=
+ cache->evict_queue + cache->evict_candidates)
cache->evict_current = NULL;
+ else
+ cache->evict_current = evict + 1;
__wt_spin_unlock(session, &cache->evict_lock);
return ((*refp == NULL) ? WT_NOTFOUND : 0);
@@ -1533,15 +1579,18 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
* An internal session flags either the server itself or an eviction
* worker thread.
*/
- if (F_ISSET(session, WT_SESSION_INTERNAL)) {
- if (is_server)
- WT_STAT_FAST_CONN_INCR(
- session, cache_eviction_server_evicting);
- else
+ if (is_server)
+ WT_STAT_FAST_CONN_INCR(
+ session, cache_eviction_server_evicting);
+ else if (F_ISSET(session, WT_SESSION_INTERNAL))
+ WT_STAT_FAST_CONN_INCR(
+ session, cache_eviction_worker_evicting);
+ else {
+ if (__wt_page_is_modified(ref->page))
WT_STAT_FAST_CONN_INCR(
- session, cache_eviction_worker_evicting);
- } else
+ session, cache_eviction_app_dirty);
WT_STAT_FAST_CONN_INCR(session, cache_eviction_app);
+ }
/*
* In case something goes wrong, don't pick the same set of pages every
@@ -1628,8 +1677,9 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
}
/* See if eviction is still needed. */
- if (!__wt_eviction_needed(session, NULL) ||
- cache->pages_evict > init_evict_count + max_pages_evicted)
+ if (!__wt_eviction_needed(session, &pct_full) ||
+ (pct_full < 100 &&
+ cache->pages_evict > init_evict_count + max_pages_evicted))
return (0);
/* Evict a page. */
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 2d20f53e9ae..305b81fe69e 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -420,7 +420,8 @@ __evict_review(
* fallen behind current.
*/
if (modified)
- WT_RET(__wt_txn_update_oldest(session, false));
+ WT_RET(__wt_txn_update_oldest(
+ session, WT_TXN_OLDEST_STRICT));
if (!__wt_page_can_evict(session, ref, inmem_splitp))
return (EBUSY);
diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h
index fd921677751..96097115afd 100644
--- a/src/third_party/wiredtiger/src/include/btree.h
+++ b/src/third_party/wiredtiger/src/include/btree.h
@@ -129,6 +129,8 @@ struct __wt_btree {
uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */
uint64_t write_gen; /* Write generation */
+ uint64_t bytes_inmem; /* Cache bytes in memory. */
+
WT_REF *evict_ref; /* Eviction thread's location */
uint64_t evict_priority; /* Relative priority of cached pages */
u_int evict_walk_period; /* Skip this many LRU walks */
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 6df7f87073f..03f27861e75 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -55,6 +55,27 @@ __wt_btree_block_free(
}
/*
+ * __wt_btree_bytes_inuse --
+ * Return the number of bytes in use.
+ */
+static inline uint64_t
+__wt_btree_bytes_inuse(WT_SESSION_IMPL *session)
+{
+ WT_CACHE *cache;
+ uint64_t bytes_inuse;
+
+ cache = S2C(session)->cache;
+
+ /* Adjust the cache size to take allocation overhead into account. */
+ bytes_inuse = S2BT(session)->bytes_inmem;
+ if (cache->overhead_pct != 0)
+ bytes_inuse +=
+ (bytes_inuse * (uint64_t)cache->overhead_pct) / 100;
+
+ return (bytes_inuse);
+}
+
+/*
* __wt_cache_page_inmem_incr --
* Increment a page's memory footprint in the cache.
*/
@@ -66,6 +87,7 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
WT_ASSERT(session, size < WT_EXABYTE);
cache = S2C(session)->cache;
+ (void)__wt_atomic_add64(&S2BT(session)->bytes_inmem, size);
(void)__wt_atomic_add64(&cache->bytes_inmem, size);
(void)__wt_atomic_addsize(&page->memory_footprint, size);
if (__wt_page_is_modified(page)) {
@@ -196,6 +218,8 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
WT_ASSERT(session, size < WT_EXABYTE);
__wt_cache_decr_check_uint64(
+ session, &S2BT(session)->bytes_inmem, size, "WT_BTREE.bytes_inmem");
+ __wt_cache_decr_check_uint64(
session, &cache->bytes_inmem, size, "WT_CACHE.bytes_inmem");
__wt_cache_decr_check_size(
session, &page->memory_footprint, size, "WT_PAGE.memory_footprint");
@@ -274,8 +298,9 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page)
modify = page->modify;
/* Update the bytes in-memory to reflect the eviction. */
- __wt_cache_decr_check_uint64(session,
- &cache->bytes_inmem,
+ __wt_cache_decr_check_uint64(session, &S2BT(session)->bytes_inmem,
+ page->memory_footprint, "WT_BTREE.bytes_inmem");
+ __wt_cache_decr_check_uint64(session, &cache->bytes_inmem,
page->memory_footprint, "WT_CACHE.bytes_inmem");
/* Update the bytes_internal value to reflect the eviction */
diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h
index 9184a2fe6ed..f683ed6b0f8 100644
--- a/src/third_party/wiredtiger/src/include/cache.h
+++ b/src/third_party/wiredtiger/src/include/cache.h
@@ -13,7 +13,6 @@
#define WT_EVICT_INT_SKEW (1<<20) /* Prefer leaf pages over internal
pages by this many increments of the
read generation. */
-#define WT_EVICT_WALK_PER_FILE 10 /* Pages to queue per file */
#define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */
#define WT_EVICT_WALK_INCR 100 /* Pages added each walk */
@@ -24,6 +23,7 @@
struct __wt_evict_entry {
WT_BTREE *btree; /* Enclosing btree object */
WT_REF *ref; /* Page to flush/evict */
+ uint64_t score; /* Relative eviction priority */
};
/*
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 792700555dd..f2b13023386 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -677,7 +677,7 @@ extern void __wt_stat_join_clear_all(WT_JOIN_STATS **stats);
extern void __wt_stat_join_aggregate( WT_JOIN_STATS **from, WT_JOIN_STATS *to);
extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session);
extern int __wt_txn_get_snapshot(WT_SESSION_IMPL *session);
-extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force);
+extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags);
extern int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]);
extern void __wt_txn_release(WT_SESSION_IMPL *session);
extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]);
diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h
index 3d9b0ed716b..7682af5a4b8 100644
--- a/src/third_party/wiredtiger/src/include/flags.h
+++ b/src/third_party/wiredtiger/src/include/flags.h
@@ -76,6 +76,8 @@
#define WT_TXN_LOG_CKPT_START 0x00000004
#define WT_TXN_LOG_CKPT_STOP 0x00000008
#define WT_TXN_LOG_CKPT_SYNC 0x00000010
+#define WT_TXN_OLDEST_STRICT 0x00000001
+#define WT_TXN_OLDEST_WAIT 0x00000002
#define WT_VERB_API 0x00000001
#define WT_VERB_BLOCK 0x00000002
#define WT_VERB_CHECKPOINT 0x00000004
diff --git a/src/third_party/wiredtiger/src/include/os.h b/src/third_party/wiredtiger/src/include/os.h
index 2ff41d39f46..44cceee6c40 100644
--- a/src/third_party/wiredtiger/src/include/os.h
+++ b/src/third_party/wiredtiger/src/include/os.h
@@ -17,15 +17,26 @@
#define WT_SYSCALL_RETRY(call, ret) do { \
int __retry; \
for (__retry = 0; __retry < 10; ++__retry) { \
- if ((call) == 0) { \
- (ret) = 0; \
- break; \
- } \
- switch ((ret) = __wt_errno()) { \
- case 0: \
- /* The call failed but didn't set errno. */ \
- (ret) = WT_ERROR; \
+ /* \
+ * A call returning 0 indicates success; any call where \
+ * 0 is not the only successful return must provide an \
+ * expression evaluating to 0 in all successful cases. \
+ */ \
+ if (((ret) = (call)) == 0) \
break; \
+ /* \
+ * The call's error was either returned by the call or \
+ * is in errno, and there are cases where it depends on \
+ * the software release as to which it is (for example, \
+ * posix_fadvise on FreeBSD and OS X). Failing calls \
+ * must either return a non-zero error value, or -1 if \
+ * the error value is in errno. (The WiredTiger errno \
+ * function returns WT_ERROR if errno is 0, which isn't \
+ * ideal but won't discard the failure.) \
+ */ \
+ if ((ret) == -1) \
+ (ret) = __wt_errno(); \
+ switch (ret) { \
case EAGAIN: \
case EBUSY: \
case EINTR: \
diff --git a/src/third_party/wiredtiger/src/include/serial.i b/src/third_party/wiredtiger/src/include/serial.i
index c0cd9c85ee9..67edc1c9ce1 100644
--- a/src/third_party/wiredtiger/src/include/serial.i
+++ b/src/third_party/wiredtiger/src/include/serial.i
@@ -306,7 +306,7 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
if ((txn = page->modify->obsolete_check_txn) != WT_TXN_NONE) {
if (!__wt_txn_visible_all(session, txn)) {
/* Try to move the oldest ID forward and re-check. */
- WT_RET(__wt_txn_update_oldest(session, false));
+ WT_RET(__wt_txn_update_oldest(session, 0));
if (!__wt_txn_visible_all(session, txn))
return (0);
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index f9170dc1a79..e728b634c6e 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -269,6 +269,8 @@ struct __wt_connection_stats {
int64_t cache_eviction_slow;
int64_t cache_eviction_worker_evicting;
int64_t cache_eviction_force_fail;
+ int64_t cache_eviction_walks_active;
+ int64_t cache_eviction_walks_started;
int64_t cache_eviction_hazard;
int64_t cache_inmem_splittable;
int64_t cache_inmem_split;
@@ -280,14 +282,18 @@ struct __wt_connection_stats {
int64_t cache_bytes_max;
int64_t cache_eviction_maximum_page_size;
int64_t cache_eviction_dirty;
+ int64_t cache_eviction_app_dirty;
int64_t cache_eviction_deepen;
int64_t cache_write_lookaside;
int64_t cache_pages_inuse;
int64_t cache_eviction_force;
int64_t cache_eviction_force_delete;
int64_t cache_eviction_app;
+ int64_t cache_eviction_pages_queued;
+ int64_t cache_eviction_pages_queued_oldest;
int64_t cache_read;
int64_t cache_read_lookaside;
+ int64_t cache_eviction_pages_seen;
int64_t cache_eviction_fail;
int64_t cache_eviction_walk;
int64_t cache_write;
@@ -441,6 +447,7 @@ struct __wt_dsrc_stats {
int64_t btree_compact_rewrite;
int64_t btree_row_internal;
int64_t btree_row_leaf;
+ int64_t cache_bytes_inuse;
int64_t cache_bytes_read;
int64_t cache_bytes_write;
int64_t cache_eviction_checkpoint;
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 5c2efad77e0..4f422af32d4 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -3787,257 +3787,269 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1040
/*! cache: failed eviction of pages that exceeded the in-memory maximum */
#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1041
+/*! cache: files with active eviction walks */
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1042
+/*! cache: files with new eviction walks started */
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1043
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1042
+#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1044
/*! cache: in-memory page passed criteria to be split */
-#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1043
+#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1045
/*! cache: in-memory page splits */
-#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1044
+#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1046
/*! cache: internal pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1045
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1047
/*! cache: internal pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1046
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1048
/*! cache: leaf pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1047
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1049
/*! cache: lookaside table insert calls */
-#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1048
+#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1050
/*! cache: lookaside table remove calls */
-#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1049
+#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1051
/*! cache: maximum bytes configured */
-#define WT_STAT_CONN_CACHE_BYTES_MAX 1050
+#define WT_STAT_CONN_CACHE_BYTES_MAX 1052
/*! cache: maximum page size at eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1051
+#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1053
/*! cache: modified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1052
+#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1054
+/*! cache: modified pages evicted by application threads */
+#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1055
/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1053
+#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1056
/*! cache: page written requiring lookaside records */
-#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1054
+#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1057
/*! cache: pages currently held in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_INUSE 1055
+#define WT_STAT_CONN_CACHE_PAGES_INUSE 1058
/*! cache: pages evicted because they exceeded the in-memory maximum */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1056
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1059
/*! cache: pages evicted because they had chains of deleted items */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1057
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1060
/*! cache: pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP 1058
+#define WT_STAT_CONN_CACHE_EVICTION_APP 1061
+/*! cache: pages queued for eviction */
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1062
+/*! cache: pages queued for urgent eviction */
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1063
/*! cache: pages read into cache */
-#define WT_STAT_CONN_CACHE_READ 1059
+#define WT_STAT_CONN_CACHE_READ 1064
/*! cache: pages read into cache requiring lookaside entries */
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1060
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1065
+/*! cache: pages seen by eviction walk */
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1066
/*! cache: pages selected for eviction unable to be evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1061
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1067
/*! cache: pages walked for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK 1062
+#define WT_STAT_CONN_CACHE_EVICTION_WALK 1068
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 1063
+#define WT_STAT_CONN_CACHE_WRITE 1069
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1064
+#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1070
/*! cache: percentage overhead */
-#define WT_STAT_CONN_CACHE_OVERHEAD 1065
+#define WT_STAT_CONN_CACHE_OVERHEAD 1071
/*! cache: tracked bytes belonging to internal pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1066
+#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1072
/*! cache: tracked bytes belonging to leaf pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_LEAF 1067
+#define WT_STAT_CONN_CACHE_BYTES_LEAF 1073
/*! cache: tracked bytes belonging to overflow pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_OVERFLOW 1068
+#define WT_STAT_CONN_CACHE_BYTES_OVERFLOW 1074
/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1069
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1075
/*! cache: tracked dirty pages in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1070
+#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1076
/*! cache: unmodified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1071
+#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1077
/*! connection: auto adjusting condition resets */
-#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1072
+#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1078
/*! connection: auto adjusting condition wait calls */
-#define WT_STAT_CONN_COND_AUTO_WAIT 1073
+#define WT_STAT_CONN_COND_AUTO_WAIT 1079
/*! connection: files currently open */
-#define WT_STAT_CONN_FILE_OPEN 1074
+#define WT_STAT_CONN_FILE_OPEN 1080
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1075
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1081
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1076
+#define WT_STAT_CONN_MEMORY_FREE 1082
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1077
+#define WT_STAT_CONN_MEMORY_GROW 1083
/*! connection: pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 1078
+#define WT_STAT_CONN_COND_WAIT 1084
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1079
+#define WT_STAT_CONN_RWLOCK_READ 1085
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1080
+#define WT_STAT_CONN_RWLOCK_WRITE 1086
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1081
+#define WT_STAT_CONN_READ_IO 1087
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1082
+#define WT_STAT_CONN_WRITE_IO 1088
/*! cursor: cursor create calls */
-#define WT_STAT_CONN_CURSOR_CREATE 1083
+#define WT_STAT_CONN_CURSOR_CREATE 1089
/*! cursor: cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 1084
+#define WT_STAT_CONN_CURSOR_INSERT 1090
/*! cursor: cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 1085
+#define WT_STAT_CONN_CURSOR_NEXT 1091
/*! cursor: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1086
+#define WT_STAT_CONN_CURSOR_PREV 1092
/*! cursor: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1087
+#define WT_STAT_CONN_CURSOR_REMOVE 1093
/*! cursor: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1088
+#define WT_STAT_CONN_CURSOR_RESET 1094
/*! cursor: cursor restarted searches */
-#define WT_STAT_CONN_CURSOR_RESTART 1089
+#define WT_STAT_CONN_CURSOR_RESTART 1095
/*! cursor: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1090
+#define WT_STAT_CONN_CURSOR_SEARCH 1096
/*! cursor: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1091
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1097
/*! cursor: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1092
+#define WT_STAT_CONN_CURSOR_UPDATE 1098
/*! cursor: truncate calls */
-#define WT_STAT_CONN_CURSOR_TRUNCATE 1093
+#define WT_STAT_CONN_CURSOR_TRUNCATE 1099
/*! data-handle: connection data handles currently active */
-#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1094
+#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1100
/*! data-handle: connection sweep candidate became referenced */
-#define WT_STAT_CONN_DH_SWEEP_REF 1095
+#define WT_STAT_CONN_DH_SWEEP_REF 1101
/*! data-handle: connection sweep dhandles closed */
-#define WT_STAT_CONN_DH_SWEEP_CLOSE 1096
+#define WT_STAT_CONN_DH_SWEEP_CLOSE 1102
/*! data-handle: connection sweep dhandles removed from hash list */
-#define WT_STAT_CONN_DH_SWEEP_REMOVE 1097
+#define WT_STAT_CONN_DH_SWEEP_REMOVE 1103
/*! data-handle: connection sweep time-of-death sets */
-#define WT_STAT_CONN_DH_SWEEP_TOD 1098
+#define WT_STAT_CONN_DH_SWEEP_TOD 1104
/*! data-handle: connection sweeps */
-#define WT_STAT_CONN_DH_SWEEPS 1099
+#define WT_STAT_CONN_DH_SWEEPS 1105
/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1100
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1106
/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1101
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1107
/*! log: busy returns attempting to switch slots */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1102
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1108
/*! log: consolidated slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1103
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1109
/*! log: consolidated slot join races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1104
+#define WT_STAT_CONN_LOG_SLOT_RACES 1110
/*! log: consolidated slot join transitions */
-#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1105
+#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1111
/*! log: consolidated slot joins */
-#define WT_STAT_CONN_LOG_SLOT_JOINS 1106
+#define WT_STAT_CONN_LOG_SLOT_JOINS 1112
/*! log: consolidated slot unbuffered writes */
-#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1107
+#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1113
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1108
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1114
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1109
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1115
/*! log: log files manually zero-filled */
-#define WT_STAT_CONN_LOG_ZERO_FILLS 1110
+#define WT_STAT_CONN_LOG_ZERO_FILLS 1116
/*! log: log flush operations */
-#define WT_STAT_CONN_LOG_FLUSH 1111
+#define WT_STAT_CONN_LOG_FLUSH 1117
/*! log: log force write operations */
-#define WT_STAT_CONN_LOG_FORCE_WRITE 1112
+#define WT_STAT_CONN_LOG_FORCE_WRITE 1118
/*! log: log force write operations skipped */
-#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1113
+#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1119
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1114
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1120
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1115
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1121
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1116
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1122
/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1117
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1123
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1118
+#define WT_STAT_CONN_LOG_SCANS 1124
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1119
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1125
/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1120
+#define WT_STAT_CONN_LOG_WRITE_LSN 1126
/*! log: log server thread write LSN walk skipped */
-#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1121
+#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1127
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1122
+#define WT_STAT_CONN_LOG_SYNC 1128
/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1123
+#define WT_STAT_CONN_LOG_SYNC_DIR 1129
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1124
+#define WT_STAT_CONN_LOG_WRITES 1130
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1125
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1131
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1126
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1132
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1127
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1133
/*! log: pre-allocated log files not ready and missed */
-#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1128
+#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1134
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1129
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1135
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1130
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1136
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1131
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1137
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1132
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1138
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1133
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1139
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1134
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1140
/*! log: written slots coalesced */
-#define WT_STAT_CONN_LOG_SLOT_COALESCED 1135
+#define WT_STAT_CONN_LOG_SLOT_COALESCED 1141
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1136
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1142
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1137
+#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1143
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1138
+#define WT_STAT_CONN_REC_PAGES 1144
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1139
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1145
/*! reconciliation: pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE 1140
+#define WT_STAT_CONN_REC_PAGE_DELETE 1146
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1141
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1147
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1142
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1148
/*! session: open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1143
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1149
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1144
+#define WT_STAT_CONN_SESSION_OPEN 1150
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1145
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1151
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1146
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1152
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1147
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1153
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1148
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1154
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1149
+#define WT_STAT_CONN_PAGE_SLEEP 1155
/*! transaction: number of named snapshots created */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1150
+#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1156
/*! transaction: number of named snapshots dropped */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1151
+#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1157
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1152
+#define WT_STAT_CONN_TXN_BEGIN 1158
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1153
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1159
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1154
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1160
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1155
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1161
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1156
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1162
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1157
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1163
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1158
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1164
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1159
+#define WT_STAT_CONN_TXN_CHECKPOINT 1165
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1160
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1166
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1161
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1167
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1162
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1168
/*! transaction: transaction range of IDs currently pinned by named
* snapshots */
-#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1163
+#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1169
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1164
+#define WT_STAT_CONN_TXN_SYNC 1170
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1165
+#define WT_STAT_CONN_TXN_COMMIT 1171
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1166
+#define WT_STAT_CONN_TXN_ROLLBACK 1172
/*!
* @}
@@ -4126,125 +4138,127 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_DSRC_BTREE_ROW_INTERNAL 2038
/*! btree: row-store leaf pages */
#define WT_STAT_DSRC_BTREE_ROW_LEAF 2039
+/*! cache: bytes currently in the cache */
+#define WT_STAT_DSRC_CACHE_BYTES_INUSE 2040
/*! cache: bytes read into cache */
-#define WT_STAT_DSRC_CACHE_BYTES_READ 2040
+#define WT_STAT_DSRC_CACHE_BYTES_READ 2041
/*! cache: bytes written from cache */
-#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2041
+#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2042
/*! cache: checkpoint blocked page eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2042
+#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2043
/*! cache: data source pages selected for eviction unable to be evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2043
+#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2044
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2044
+#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2045
/*! cache: in-memory page passed criteria to be split */
-#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2045
+#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2046
/*! cache: in-memory page splits */
-#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2046
+#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2047
/*! cache: internal pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2047
+#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2048
/*! cache: internal pages split during eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2048
+#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2049
/*! cache: leaf pages split during eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2049
+#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2050
/*! cache: modified pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2050
+#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2051
/*! cache: overflow pages read into cache */
-#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2051
+#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2052
/*! cache: overflow values cached in memory */
-#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2052
+#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2053
/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2053
+#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2054
/*! cache: page written requiring lookaside records */
-#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2054
+#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2055
/*! cache: pages read into cache */
-#define WT_STAT_DSRC_CACHE_READ 2055
+#define WT_STAT_DSRC_CACHE_READ 2056
/*! cache: pages read into cache requiring lookaside entries */
-#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2056
+#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2057
/*! cache: pages written from cache */
-#define WT_STAT_DSRC_CACHE_WRITE 2057
+#define WT_STAT_DSRC_CACHE_WRITE 2058
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2058
+#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2059
/*! cache: unmodified pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2059
+#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2060
/*! compression: compressed pages read */
-#define WT_STAT_DSRC_COMPRESS_READ 2060
+#define WT_STAT_DSRC_COMPRESS_READ 2061
/*! compression: compressed pages written */
-#define WT_STAT_DSRC_COMPRESS_WRITE 2061
+#define WT_STAT_DSRC_COMPRESS_WRITE 2062
/*! compression: page written failed to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2062
+#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2063
/*! compression: page written was too small to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2063
+#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2064
/*! compression: raw compression call failed, additional data available */
-#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2064
+#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2065
/*! compression: raw compression call failed, no additional data available */
-#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2065
+#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2066
/*! compression: raw compression call succeeded */
-#define WT_STAT_DSRC_COMPRESS_RAW_OK 2066
+#define WT_STAT_DSRC_COMPRESS_RAW_OK 2067
/*! cursor: bulk-loaded cursor-insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2067
+#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2068
/*! cursor: create calls */
-#define WT_STAT_DSRC_CURSOR_CREATE 2068
+#define WT_STAT_DSRC_CURSOR_CREATE 2069
/*! cursor: cursor-insert key and value bytes inserted */
-#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2069
+#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2070
/*! cursor: cursor-remove key bytes removed */
-#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2070
+#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2071
/*! cursor: cursor-update value bytes updated */
-#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2071
+#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2072
/*! cursor: insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT 2072
+#define WT_STAT_DSRC_CURSOR_INSERT 2073
/*! cursor: next calls */
-#define WT_STAT_DSRC_CURSOR_NEXT 2073
+#define WT_STAT_DSRC_CURSOR_NEXT 2074
/*! cursor: prev calls */
-#define WT_STAT_DSRC_CURSOR_PREV 2074
+#define WT_STAT_DSRC_CURSOR_PREV 2075
/*! cursor: remove calls */
-#define WT_STAT_DSRC_CURSOR_REMOVE 2075
+#define WT_STAT_DSRC_CURSOR_REMOVE 2076
/*! cursor: reset calls */
-#define WT_STAT_DSRC_CURSOR_RESET 2076
+#define WT_STAT_DSRC_CURSOR_RESET 2077
/*! cursor: restarted searches */
-#define WT_STAT_DSRC_CURSOR_RESTART 2077
+#define WT_STAT_DSRC_CURSOR_RESTART 2078
/*! cursor: search calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH 2078
+#define WT_STAT_DSRC_CURSOR_SEARCH 2079
/*! cursor: search near calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2079
+#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2080
/*! cursor: truncate calls */
-#define WT_STAT_DSRC_CURSOR_TRUNCATE 2080
+#define WT_STAT_DSRC_CURSOR_TRUNCATE 2081
/*! cursor: update calls */
-#define WT_STAT_DSRC_CURSOR_UPDATE 2081
+#define WT_STAT_DSRC_CURSOR_UPDATE 2082
/*! reconciliation: dictionary matches */
-#define WT_STAT_DSRC_REC_DICTIONARY 2082
+#define WT_STAT_DSRC_REC_DICTIONARY 2083
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2083
+#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2084
/*! reconciliation: internal page key bytes discarded using suffix
* compression */
-#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2084
+#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2085
/*! reconciliation: internal page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2085
+#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2086
/*! reconciliation: internal-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2086
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2087
/*! reconciliation: leaf page key bytes discarded using prefix compression */
-#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2087
+#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2088
/*! reconciliation: leaf page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2088
+#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2089
/*! reconciliation: leaf-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2089
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2090
/*! reconciliation: maximum blocks required for a page */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2090
+#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2091
/*! reconciliation: overflow values written */
-#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2091
+#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2092
/*! reconciliation: page checksum matches */
-#define WT_STAT_DSRC_REC_PAGE_MATCH 2092
+#define WT_STAT_DSRC_REC_PAGE_MATCH 2093
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_DSRC_REC_PAGES 2093
+#define WT_STAT_DSRC_REC_PAGES 2094
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_DSRC_REC_PAGES_EVICTION 2094
+#define WT_STAT_DSRC_REC_PAGES_EVICTION 2095
/*! reconciliation: pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE 2095
+#define WT_STAT_DSRC_REC_PAGE_DELETE 2096
/*! session: object compaction */
-#define WT_STAT_DSRC_SESSION_COMPACT 2096
+#define WT_STAT_DSRC_SESSION_COMPACT 2097
/*! session: open cursor count */
-#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2097
+#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2098
/*! transaction: update conflicts */
-#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2098
+#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2099
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c
index b7efb1d9018..47071211450 100644
--- a/src/third_party/wiredtiger/src/log/log_slot.c
+++ b/src/third_party/wiredtiger/src/log/log_slot.c
@@ -94,6 +94,17 @@ retry:
if (WT_LOG_SLOT_DONE(new_state))
*releasep = 1;
slot->slot_end_lsn = slot->slot_start_lsn;
+ /*
+ * A thread setting the unbuffered flag sets the unbuffered size after
+ * setting the flag. There could be a delay between a thread setting
+ * the flag, a thread closing the slot, and the original thread setting
+ * that value. If the state is unbuffered, wait for the unbuffered
+ * size to be set.
+ */
+ while (WT_LOG_SLOT_UNBUFFERED_ISSET(old_state) &&
+ slot->slot_unbuffered == 0)
+ __wt_yield();
+
end_offset =
WT_LOG_SLOT_JOINED_BUFFERED(old_state) + slot->slot_unbuffered;
slot->slot_end_lsn.l.offset += (uint32_t)end_offset;
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
index 51cf2e981de..f5bb4cfd337 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
@@ -289,7 +289,8 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
}
/* Stop if a running transaction needs the chunk. */
- WT_RET(__wt_txn_update_oldest(session, true));
+ WT_RET(__wt_txn_update_oldest(
+ session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
if (chunk->switch_txn == WT_TXN_NONE ||
!__wt_txn_visible_all(session, chunk->switch_txn)) {
WT_RET(__wt_verbose(session, WT_VERB_LSM,
diff --git a/src/third_party/wiredtiger/src/os_posix/os_dir.c b/src/third_party/wiredtiger/src/os_posix/os_dir.c
index 78ae5f8edd4..02f12ec7311 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_dir.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_dir.c
@@ -36,7 +36,7 @@ __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir,
dirsz = 0;
entries = NULL;
- WT_SYSCALL_RETRY(((dirp = opendir(path)) == NULL ? 1 : 0), ret);
+ WT_SYSCALL_RETRY(((dirp = opendir(path)) == NULL ? -1 : 0), ret);
if (ret != 0)
WT_ERR_MSG(session, ret, "%s: directory-list: opendir", path);
diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c
index 86aa8db8f4f..7d8f3b937b6 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_fs.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c
@@ -52,7 +52,7 @@ __posix_sync(WT_SESSION_IMPL *session,
* "This is currently implemented on HFS, MS-DOS (FAT), and Universal
* Disk Format (UDF) file systems."
*/
- WT_SYSCALL_RETRY(fcntl(fd, F_FULLFSYNC, 0), ret);
+ WT_SYSCALL_RETRY(fcntl(fd, F_FULLFSYNC, 0) == -1 ? -1 : 0, ret);
if (ret == 0)
return (0);
/*
@@ -107,7 +107,7 @@ __posix_directory_sync(WT_SESSION_IMPL *session, const char *path)
}
WT_SYSCALL_RETRY((
- (fd = open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret);
+ (fd = open(path, O_RDONLY, 0444)) == -1 ? -1 : 0), ret);
if (ret != 0)
WT_ERR_MSG(session, ret, "%s: directory-sync: open", path);
@@ -172,14 +172,19 @@ __posix_file_remove(WT_SESSION_IMPL *session, const char *name)
#endif
WT_RET(__wt_filename(session, name, &path));
- name = path;
-
- WT_SYSCALL_RETRY(remove(name), ret);
- if (ret != 0)
- __wt_err(session, ret, "%s: file-remove: remove", name);
+ /*
+ * ISO C doesn't require remove return -1 on failure or set errno (note
+ * POSIX 1003.1 extends C with those requirements). Regardless, use the
+ * unlink system call, instead of remove, to simplify error handling;
+ * where we're not doing any special checking for standards compliance,
+ * using unlink may be marginally safer.
+ */
+ WT_SYSCALL_RETRY(unlink(path), ret);
__wt_free(session, path);
- return (ret);
+ if (ret == 0)
+ return (0);
+ WT_RET_MSG(session, ret, "%s: file-remove: unlink", name);
}
/*
@@ -203,18 +208,22 @@ __posix_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
from_path = to_path = NULL;
WT_ERR(__wt_filename(session, from, &from_path));
- from = from_path;
WT_ERR(__wt_filename(session, to, &to_path));
- to = to_path;
- WT_SYSCALL_RETRY(rename(from, to), ret);
- if (ret != 0)
- __wt_err(session, ret,
- "%s to %s: file-rename: rename", from, to);
+ /*
+ * ISO C doesn't require rename return -1 on failure or set errno (note
+ * POSIX 1003.1 extends C with those requirements). Be cautious, force
+ * any non-zero return to -1 so we'll check errno. We can still end up
+ * with the wrong errno (if errno is garbage), or the generic WT_ERROR
+ * return (if errno is 0), but we've done the best we can.
+ */
+ WT_SYSCALL_RETRY(rename(from_path, to_path) != 0 ? -1 : 0, ret);
err: __wt_free(session, from_path);
__wt_free(session, to_path);
- return (ret);
+ if (ret == 0)
+ return (0);
+ WT_RET_MSG(session, ret, "%s to %s: file-rename: rename", from, to);
}
/*
@@ -360,7 +369,7 @@ __posix_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock)
fl.l_type = lock ? F_WRLCK : F_UNLCK;
fl.l_whence = SEEK_SET;
- WT_SYSCALL_RETRY(fcntl(fh->fd, F_SETLK, &fl), ret);
+ WT_SYSCALL_RETRY(fcntl(fh->fd, F_SETLK, &fl) == -1 ? -1 : 0, ret);
if (ret == 0)
return (0);
WT_RET_MSG(session, ret, "%s: handle-lock: fcntl", fh->name);
@@ -560,7 +569,7 @@ __posix_handle_open(WT_SESSION_IMPL *session,
f |= O_CLOEXEC;
#endif
WT_SYSCALL_RETRY((
- (fd = open(name, f, 0444)) == -1 ? 1 : 0), ret);
+ (fd = open(name, f, 0444)) == -1 ? -1 : 0), ret);
if (ret != 0)
WT_ERR_MSG(session, ret, "%s: handle-open: open", name);
WT_ERR(__posix_handle_open_cloexec(session, fd, name));
@@ -622,7 +631,7 @@ __posix_handle_open(WT_SESSION_IMPL *session,
#endif
}
- WT_SYSCALL_RETRY(((fd = open(name, f, mode)) == -1 ? 1 : 0), ret);
+ WT_SYSCALL_RETRY(((fd = open(name, f, mode)) == -1 ? -1 : 0), ret);
if (ret != 0)
WT_ERR_MSG(session, ret,
direct_io ?
diff --git a/src/third_party/wiredtiger/src/os_posix/os_map.c b/src/third_party/wiredtiger/src/os_posix/os_map.c
index de28891ffd1..e161e268f6d 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_map.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_map.c
@@ -98,6 +98,7 @@ __posix_map_preload_madvise(
if (size <= (size_t)conn->page_size ||
(ret = posix_madvise(blk, size, POSIX_MADV_WILLNEED)) == 0)
return (0);
+
WT_RET_MSG(session, ret,
"%s: memory-map preload: posix_madvise: POSIX_MADV_WILLNEED",
fh->name);
@@ -145,6 +146,7 @@ __posix_map_discard_madvise(
if ((ret = posix_madvise(blk, size, POSIX_MADV_DONTNEED)) == 0)
return (0);
+
WT_RET_MSG(session, ret,
"%s: memory-map discard: posix_madvise: POSIX_MADV_DONTNEED",
fh->name);
diff --git a/src/third_party/wiredtiger/src/os_win/os_fs.c b/src/third_party/wiredtiger/src/os_win/os_fs.c
index 95c0ea40ce6..4ac613fc9f9 100644
--- a/src/third_party/wiredtiger/src/os_win/os_fs.c
+++ b/src/third_party/wiredtiger/src/os_win/os_fs.c
@@ -286,11 +286,6 @@ __win_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock)
* WiredTiger requires this function be able to acquire locks past
* the end of file.
*
- * Note we're using fcntl(2) locking: all fcntl locks associated with a
- * file for a given process are removed when any file descriptor for the
- * file is closed by the process, even if a lock was never requested for
- * that file descriptor.
- *
* http://msdn.microsoft.com/
* en-us/library/windows/desktop/aa365202%28v=vs.85%29.aspx
*
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 2a826eda962..2f5609567da 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -43,6 +43,7 @@ static const char * const __stats_dsrc_desc[] = {
"btree: pages rewritten by compaction",
"btree: row-store internal pages",
"btree: row-store leaf pages",
+ "cache: bytes currently in the cache",
"cache: bytes read into cache",
"cache: bytes written from cache",
"cache: checkpoint blocked page eviction",
@@ -172,6 +173,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->btree_compact_rewrite = 0;
stats->btree_row_internal = 0;
stats->btree_row_leaf = 0;
+ /* not clearing cache_bytes_inuse */
stats->cache_bytes_read = 0;
stats->cache_bytes_write = 0;
stats->cache_eviction_checkpoint = 0;
@@ -298,6 +300,7 @@ __wt_stat_dsrc_aggregate_single(
to->btree_compact_rewrite += from->btree_compact_rewrite;
to->btree_row_internal += from->btree_row_internal;
to->btree_row_leaf += from->btree_row_leaf;
+ to->cache_bytes_inuse += from->cache_bytes_inuse;
to->cache_bytes_read += from->cache_bytes_read;
to->cache_bytes_write += from->cache_bytes_write;
to->cache_eviction_checkpoint += from->cache_eviction_checkpoint;
@@ -430,6 +433,7 @@ __wt_stat_dsrc_aggregate(
WT_STAT_READ(from, btree_compact_rewrite);
to->btree_row_internal += WT_STAT_READ(from, btree_row_internal);
to->btree_row_leaf += WT_STAT_READ(from, btree_row_leaf);
+ to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse);
to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read);
to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write);
to->cache_eviction_checkpoint +=
@@ -551,6 +555,8 @@ static const char * const __stats_connection_desc[] = {
"cache: eviction server unable to reach eviction goal",
"cache: eviction worker thread evicting pages",
"cache: failed eviction of pages that exceeded the in-memory maximum",
+ "cache: files with active eviction walks",
+ "cache: files with new eviction walks started",
"cache: hazard pointer blocked page eviction",
"cache: in-memory page passed criteria to be split",
"cache: in-memory page splits",
@@ -562,14 +568,18 @@ static const char * const __stats_connection_desc[] = {
"cache: maximum bytes configured",
"cache: maximum page size at eviction",
"cache: modified pages evicted",
+ "cache: modified pages evicted by application threads",
"cache: page split during eviction deepened the tree",
"cache: page written requiring lookaside records",
"cache: pages currently held in the cache",
"cache: pages evicted because they exceeded the in-memory maximum",
"cache: pages evicted because they had chains of deleted items",
"cache: pages evicted by application threads",
+ "cache: pages queued for eviction",
+ "cache: pages queued for urgent eviction",
"cache: pages read into cache",
"cache: pages read into cache requiring lookaside entries",
+ "cache: pages seen by eviction walk",
"cache: pages selected for eviction unable to be evicted",
"cache: pages walked for eviction",
"cache: pages written from cache",
@@ -748,6 +758,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_eviction_slow = 0;
stats->cache_eviction_worker_evicting = 0;
stats->cache_eviction_force_fail = 0;
+ /* not clearing cache_eviction_walks_active */
+ stats->cache_eviction_walks_started = 0;
stats->cache_eviction_hazard = 0;
stats->cache_inmem_splittable = 0;
stats->cache_inmem_split = 0;
@@ -759,14 +771,18 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing cache_bytes_max */
/* not clearing cache_eviction_maximum_page_size */
stats->cache_eviction_dirty = 0;
+ stats->cache_eviction_app_dirty = 0;
stats->cache_eviction_deepen = 0;
stats->cache_write_lookaside = 0;
/* not clearing cache_pages_inuse */
stats->cache_eviction_force = 0;
stats->cache_eviction_force_delete = 0;
stats->cache_eviction_app = 0;
+ stats->cache_eviction_pages_queued = 0;
+ stats->cache_eviction_pages_queued_oldest = 0;
stats->cache_read = 0;
stats->cache_read_lookaside = 0;
+ stats->cache_eviction_pages_seen = 0;
stats->cache_eviction_fail = 0;
stats->cache_eviction_walk = 0;
stats->cache_write = 0;
@@ -943,6 +959,10 @@ __wt_stat_connection_aggregate(
WT_STAT_READ(from, cache_eviction_worker_evicting);
to->cache_eviction_force_fail +=
WT_STAT_READ(from, cache_eviction_force_fail);
+ to->cache_eviction_walks_active +=
+ WT_STAT_READ(from, cache_eviction_walks_active);
+ to->cache_eviction_walks_started +=
+ WT_STAT_READ(from, cache_eviction_walks_started);
to->cache_eviction_hazard +=
WT_STAT_READ(from, cache_eviction_hazard);
to->cache_inmem_splittable +=
@@ -962,6 +982,8 @@ __wt_stat_connection_aggregate(
to->cache_eviction_maximum_page_size +=
WT_STAT_READ(from, cache_eviction_maximum_page_size);
to->cache_eviction_dirty += WT_STAT_READ(from, cache_eviction_dirty);
+ to->cache_eviction_app_dirty +=
+ WT_STAT_READ(from, cache_eviction_app_dirty);
to->cache_eviction_deepen +=
WT_STAT_READ(from, cache_eviction_deepen);
to->cache_write_lookaside +=
@@ -971,8 +993,14 @@ __wt_stat_connection_aggregate(
to->cache_eviction_force_delete +=
WT_STAT_READ(from, cache_eviction_force_delete);
to->cache_eviction_app += WT_STAT_READ(from, cache_eviction_app);
+ to->cache_eviction_pages_queued +=
+ WT_STAT_READ(from, cache_eviction_pages_queued);
+ to->cache_eviction_pages_queued_oldest +=
+ WT_STAT_READ(from, cache_eviction_pages_queued_oldest);
to->cache_read += WT_STAT_READ(from, cache_read);
to->cache_read_lookaside += WT_STAT_READ(from, cache_read_lookaside);
+ to->cache_eviction_pages_seen +=
+ WT_STAT_READ(from, cache_eviction_pages_seen);
to->cache_eviction_fail += WT_STAT_READ(from, cache_eviction_fail);
to->cache_eviction_walk += WT_STAT_READ(from, cache_eviction_walk);
to->cache_write += WT_STAT_READ(from, cache_write);
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index ab1cd622057..9d5975b2bc5 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -257,7 +257,7 @@ __txn_oldest_scan(WT_SESSION_IMPL *session,
* Sweep the running transactions to update the oldest ID required.
*/
int
-__wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force)
+__wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
@@ -265,9 +265,12 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force)
WT_TXN_GLOBAL *txn_global;
uint64_t current_id, last_running, oldest_id;
uint64_t prev_last_running, prev_oldest_id;
+ bool strict, wait;
conn = S2C(session);
txn_global = &conn->txn_global;
+ strict = LF_ISSET(WT_TXN_OLDEST_STRICT);
+ wait = LF_ISSET(WT_TXN_OLDEST_WAIT);
current_id = last_running = txn_global->current;
prev_last_running = txn_global->last_running;
@@ -278,11 +281,11 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force)
* oldest ID isn't too far behind, avoid scanning.
*/
if (prev_oldest_id == current_id ||
- (!force && WT_TXNID_LT(current_id, prev_oldest_id + 100)))
+ (!strict && WT_TXNID_LT(current_id, prev_oldest_id + 100)))
return (0);
/* First do a read-only scan. */
- if (force)
+ if (wait)
WT_RET(__wt_readlock(session, txn_global->scan_rwlock));
else if ((ret =
__wt_try_readlock(session, txn_global->scan_rwlock)) != 0)
@@ -295,13 +298,13 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force)
* non-forced updates), give up.
*/
if ((oldest_id == prev_oldest_id ||
- (!force && WT_TXNID_LT(oldest_id, prev_oldest_id + 100))) &&
+ (!strict && WT_TXNID_LT(oldest_id, prev_oldest_id + 100))) &&
((last_running == prev_last_running) ||
- (!force && WT_TXNID_LT(last_running, prev_last_running + 100))))
+ (!strict && WT_TXNID_LT(last_running, prev_last_running + 100))))
return (0);
/* It looks like an update is necessary, wait for exclusive access. */
- if (force)
+ if (wait)
WT_RET(__wt_writelock(session, txn_global->scan_rwlock));
else if ((ret =
__wt_try_writelock(session, txn_global->scan_rwlock)) != 0)
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index c1b435d9897..5c0c55963a3 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -404,7 +404,8 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* This is particularly important for compact, so that all dirty pages
* can be fully written.
*/
- WT_ERR(__wt_txn_update_oldest(session, true));
+ WT_ERR(__wt_txn_update_oldest(
+ session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
/* Flush data-sources before we start the checkpoint. */
WT_ERR(__checkpoint_data_source(session, cfg));
@@ -1284,7 +1285,8 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final)
* for active readers.
*/
if (!btree->modified && !bulk) {
- WT_RET(__wt_txn_update_oldest(session, true));
+ WT_RET(__wt_txn_update_oldest(
+ session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
return (__wt_txn_visible_all(session, btree->rec_max_txn) ?
__wt_cache_op(session, WT_SYNC_DISCARD) : EBUSY);
}
diff --git a/src/third_party/wiredtiger/test/recovery/random-abort.c b/src/third_party/wiredtiger/test/recovery/random-abort.c
index cd7d1b08708..2454aa6056b 100644
--- a/src/third_party/wiredtiger/test/recovery/random-abort.c
+++ b/src/third_party/wiredtiger/test/recovery/random-abort.c
@@ -36,7 +36,7 @@
#include <unistd.h>
#endif
-#include <wiredtiger.h>
+#include <wt_internal.h>
#include "test_util.i"
@@ -44,7 +44,8 @@ static char home[512]; /* Program working dir */
static const char *progname; /* Program name */
static const char * const uri = "table:main";
-#define RECORDS_FILE "records"
+#define NTHREADS 5
+#define RECORDS_FILE "records-%u"
#define ENV_CONFIG \
"create,log=(file_max=10M,archive=false,enabled)," \
@@ -55,71 +56,66 @@ static const char * const uri = "table:main";
static void
usage(void)
{
- fprintf(stderr, "usage: %s [-h dir]\n", progname);
+ fprintf(stderr, "usage: %s [-h dir] [-T threads]\n", progname);
exit(EXIT_FAILURE);
}
+typedef struct {
+ WT_CONNECTION *conn;
+ uint64_t start;
+ uint32_t id;
+} WT_THREAD_DATA;
+
/*
* Child process creates the database and table, and then writes data into
* the table until it is killed by the parent.
*/
-static void
-fill_db(void)
+static void *
+thread_run(void *arg)
{
FILE *fp;
- WT_CONNECTION *conn;
WT_CURSOR *cursor;
WT_ITEM data;
WT_RAND_STATE rnd;
WT_SESSION *session;
+ WT_THREAD_DATA *td;
uint64_t i;
int ret;
- uint8_t buf[MAX_VAL];
+ char buf[MAX_VAL], kname[64];
__wt_random_init(&rnd);
memset(buf, 0, sizeof(buf));
- /*
- * Initialize the first 25% to random values. Leave a bunch of data
- * space at the end to emphasize zero data.
- */
- for (i = 0; i < MAX_VAL/4; i++)
- buf[i] = (uint8_t)__wt_random(&rnd);
+ memset(kname, 0, sizeof(kname));
+ td = (WT_THREAD_DATA *)arg;
/*
- * Run in the home directory so that the records file is in there too.
+ * The value is the name of the record file with our id appended.
*/
- if (chdir(home) != 0)
- testutil_die(errno, "chdir: %s", home);
- if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn)) != 0)
- testutil_die(ret, "wiredtiger_open");
- if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
- testutil_die(ret, "WT_CONNECTION:open_session");
- if ((ret = session->create(session,
- uri, "key_format=Q,value_format=u")) != 0)
- testutil_die(ret, "WT_SESSION.create: %s", uri);
- if ((ret =
- session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0)
- testutil_die(ret, "WT_SESSION.open_cursor: %s", uri);
-
+ snprintf(buf, sizeof(buf), RECORDS_FILE, td->id);
/*
* Keep a separate file with the records we wrote for checking.
*/
- (void)unlink(RECORDS_FILE);
- if ((fp = fopen(RECORDS_FILE, "w")) == NULL)
+ (void)unlink(buf);
+ if ((fp = fopen(buf, "w")) == NULL)
testutil_die(errno, "fopen");
/*
* Set to no buffering.
*/
__wt_stream_set_no_buffer(fp);
-
+ if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "WT_CONNECTION:open_session");
+ if ((ret =
+ session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0)
+ testutil_die(ret, "WT_SESSION.open_cursor: %s", uri);
+ data.data = buf;
+ data.size = sizeof(buf);
/*
- * Write data into the table until we are killed by the parent.
- * The data in the buffer is already set to random content.
+ * Write our portion of the key space until we're killed.
*/
- data.data = buf;
- for (i = 0;; ++i) {
+ for (i = td->start; ; ++i) {
+ snprintf(kname, sizeof(kname), "%" PRIu64, i);
data.size = __wt_random(&rnd) % MAX_VAL;
- cursor->set_key(cursor, i);
+ cursor->set_key(cursor, kname);
cursor->set_value(cursor, &data);
if ((ret = cursor->insert(cursor)) != 0)
testutil_die(ret, "WT_CURSOR.insert");
@@ -128,9 +124,62 @@ fill_db(void)
*/
if (fprintf(fp, "%" PRIu64 "\n", i) == -1)
testutil_die(errno, "fprintf");
- if (i % 5000)
- __wt_yield();
}
+ return (NULL);
+}
+
+/*
+ * Child process creates the database and table, and then creates worker
+ * threads to add data until it is killed by the parent.
+ */
+static void fill_db(uint32_t)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void
+fill_db(uint32_t nth)
+{
+ pthread_t *thr;
+ WT_CONNECTION *conn;
+ WT_SESSION *session;
+ WT_THREAD_DATA *td;
+ uint32_t i;
+ int ret;
+
+ thr = calloc(nth, sizeof(pthread_t));
+ td = calloc(nth, sizeof(WT_THREAD_DATA));
+ if (chdir(home) != 0)
+ testutil_die(errno, "Child chdir: %s", home);
+ if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn)) != 0)
+ testutil_die(ret, "wiredtiger_open");
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "WT_CONNECTION:open_session");
+ if ((ret = session->create(session,
+ uri, "key_format=S,value_format=u")) != 0)
+ testutil_die(ret, "WT_SESSION.create: %s", uri);
+ if ((ret = session->close(session, NULL)) != 0)
+ testutil_die(ret, "WT_SESSION:close");
+
+ for (i = 0; i < nth; ++i) {
+ td[i].conn = conn;
+ td[i].start = (UINT64_MAX / nth) * i;
+ td[i].id = i;
+ if ((ret = pthread_create(
+ &thr[i], NULL, thread_run, &td[i])) != 0)
+ testutil_die(ret, "pthread_create");
+ }
+ printf("Spawned %" PRIu32 " writer threads\n", nth);
+ fflush(stdout);
+ /*
+ * The threads never exit, so the child will just wait here until
+ * it is killed.
+ */
+ for (i = 0; i < nth; ++i)
+ pthread_join(thr[i], NULL);
+ /*
+ * NOTREACHED
+ */
+ free(thr);
+ free(td);
+ exit(EXIT_SUCCESS);
}
extern int __wt_optind;
@@ -147,23 +196,28 @@ main(int argc, char *argv[])
WT_SESSION *session;
WT_RAND_STATE rnd;
uint64_t key;
- uint32_t absent, count, timeout;
+ uint32_t absent, count, i, nth, timeout;
int ch, status, ret;
pid_t pid;
const char *working_dir;
+ char fname[64], kname[64];
if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL)
progname = argv[0];
else
++progname;
- working_dir = "WT_TEST.random-abort";
+ working_dir = "WT_TEST.random-abort-many";
timeout = 10;
- while ((ch = __wt_getopt(progname, argc, argv, "h:t:")) != EOF)
+ nth = NTHREADS;
+ while ((ch = __wt_getopt(progname, argc, argv, "h:T:t:")) != EOF)
switch (ch) {
case 'h':
working_dir = __wt_optarg;
break;
+ case 'T':
+ nth = (uint32_t)atoi(__wt_optarg);
+ break;
case 't':
timeout = (uint32_t)atoi(__wt_optarg);
break;
@@ -187,7 +241,7 @@ main(int argc, char *argv[])
testutil_die(errno, "fork");
if (pid == 0) { /* child */
- fill_db();
+ fill_db(nth);
return (EXIT_SUCCESS);
}
@@ -212,7 +266,7 @@ main(int argc, char *argv[])
* this is the place to do it.
*/
if (chdir(home) != 0)
- testutil_die(errno, "chdir: %s", home);
+ testutil_die(errno, "parent chdir: %s", home);
printf("Open database, run recovery and verify content\n");
if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn)) != 0)
testutil_die(ret, "wiredtiger_open");
@@ -222,30 +276,35 @@ main(int argc, char *argv[])
session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0)
testutil_die(ret, "WT_SESSION.open_cursor: %s", uri);
- if ((fp = fopen(RECORDS_FILE, "r")) == NULL)
- testutil_die(errno, "fopen");
+ absent = count = 0;
+ for (i = 0; i < nth; ++i) {
+ snprintf(fname, sizeof(fname), RECORDS_FILE, i);
+ if ((fp = fopen(fname, "r")) == NULL)
+ testutil_die(errno, "fopen");
- /*
- * For every key in the saved file, verify that the key exists
- * in the table after recovery. Since we did write-no-sync, we
- * expect every key to have been recovered.
- */
- for (absent = count = 0;; ++count) {
- ret = fscanf(fp, "%" SCNu64 "\n", &key);
- if (ret != EOF && ret != 1)
- testutil_die(errno, "fscanf");
- if (ret == EOF)
- break;
- cursor->set_key(cursor, key);
- if ((ret = cursor->search(cursor)) != 0) {
- if (ret != WT_NOTFOUND)
- testutil_die(ret, "search");
- printf("no record with key %" PRIu64 "\n", key);
- ++absent;
+ /*
+ * For every key in the saved file, verify that the key exists
+ * in the table after recovery. Since we did write-no-sync, we
+ * expect every key to have been recovered.
+ */
+ for (count = 0;; ++count) {
+ ret = fscanf(fp, "%" SCNu64 "\n", &key);
+ if (ret != EOF && ret != 1)
+ testutil_die(errno, "fscanf");
+ if (ret == EOF)
+ break;
+ snprintf(kname, sizeof(kname), "%" PRIu64, key);
+ cursor->set_key(cursor, kname);
+ if ((ret = cursor->search(cursor)) != 0) {
+ if (ret != WT_NOTFOUND)
+ testutil_die(ret, "search");
+ printf("no record with key %" PRIu64 "\n", key);
+ ++absent;
+ }
}
+ if (fclose(fp) != 0)
+ testutil_die(errno, "fclose");
}
- if (fclose(fp) != 0)
- testutil_die(errno, "fclose");
if ((ret = conn->close(conn, NULL)) != 0)
testutil_die(ret, "WT_CONNECTION:close");
if (absent) {
diff --git a/src/third_party/wiredtiger/tools/wtstats/stat_data.py b/src/third_party/wiredtiger/tools/wtstats/stat_data.py
index c75e4f194dd..7c42ab4d926 100644
--- a/src/third_party/wiredtiger/tools/wtstats/stat_data.py
+++ b/src/third_party/wiredtiger/tools/wtstats/stat_data.py
@@ -5,6 +5,7 @@ no_scale_per_second_list = [
'async: maximum work queue length',
'cache: bytes currently in the cache',
'cache: eviction currently operating in aggressive mode',
+ 'cache: files with active eviction walks',
'cache: maximum bytes configured',
'cache: maximum page size at eviction',
'cache: pages currently held in the cache',
@@ -59,6 +60,7 @@ no_scale_per_second_list = [
'btree: overflow pages',
'btree: row-store internal pages',
'btree: row-store leaf pages',
+ 'cache: bytes currently in the cache',
'cache: overflow values cached in memory',
'LSM: bloom filters in the LSM tree',
'LSM: chunks in the LSM tree',
@@ -71,6 +73,7 @@ no_clear_list = [
'async: maximum work queue length',
'cache: bytes currently in the cache',
'cache: eviction currently operating in aggressive mode',
+ 'cache: files with active eviction walks',
'cache: maximum bytes configured',
'cache: maximum page size at eviction',
'cache: pages currently held in the cache',
@@ -102,6 +105,7 @@ no_clear_list = [
'transaction: transaction range of IDs currently pinned by a checkpoint',
'transaction: transaction range of IDs currently pinned by named snapshots',
'btree: btree checkpoint generation',
+ 'cache: bytes currently in the cache',
'session: open cursor count',
]
prefix_list = [