diff options
author | Ramon Fernandez <ramon@mongodb.com> | 2016-06-28 15:03:54 -0400 |
---|---|---|
committer | Ramon Fernandez <ramon@mongodb.com> | 2016-06-28 15:04:02 -0400 |
commit | e8dc6b98c1c91727f7def84f2fb4b57bf67ccc88 (patch) | |
tree | d42e295804d3c8247cbde5feed070c242b63dee0 | |
parent | 30162fa8bbb9d7e7f7a789361aed7e046995f7b3 (diff) | |
download | mongo-e8dc6b98c1c91727f7def84f2fb4b57bf67ccc88.tar.gz |
Import wiredtiger-wiredtiger-2.8.0-219-gf4954f6.tar.gz from wiredtiger branch mongodb-3.2
ref: a6a64e9..f4954f6
SERVER-24580 Performance is poor when WiredTiger cache is full
WT-2672 Handle system calls that don't set errno
WT-2696 Missing log records with large updates
WT-2702 Under high thread load, WiredTiger exceeds cache size
WT-2708 split child-update race with reconciliation/eviction
WT-2729 Focus eviction walks in largest trees
32 files changed, 639 insertions, 377 deletions
diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py index 8f7827ad160..806fac2137d 100644 --- a/src/third_party/wiredtiger/dist/flags.py +++ b/src/third_party/wiredtiger/dist/flags.py @@ -57,6 +57,10 @@ flags = { 'TXN_LOG_CKPT_STOP', 'TXN_LOG_CKPT_SYNC', ], + 'txn_update_oldest' : [ + 'TXN_OLDEST_STRICT', + 'TXN_OLDEST_WAIT', + ], 'verbose' : [ 'VERB_API', 'VERB_BLOCK', diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index eed034abb47..631f2a5c909 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -439,6 +439,7 @@ bzDecompressInit bzalloc bzfree bzip +call's calloc cas catfmt @@ -1067,6 +1068,7 @@ unescaped unicode uninstantiated unistd +unlink unlinked unmap unmarshall diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index bd951e64999..483e0bd3ef2 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -162,6 +162,7 @@ connection_stats = [ CacheStat('cache_bytes_write', 'bytes written from cache', 'size'), CacheStat('cache_eviction_aggressive_set', 'eviction currently operating in aggressive mode', 'no_clear,no_scale'), CacheStat('cache_eviction_app', 'pages evicted by application threads'), + CacheStat('cache_eviction_app_dirty', 'modified pages evicted by application threads'), CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'), CacheStat('cache_eviction_clean', 'unmodified pages evicted'), CacheStat('cache_eviction_deepen', 'page split during eviction deepened the tree'), @@ -173,6 +174,9 @@ connection_stats = [ CacheStat('cache_eviction_hazard', 'hazard pointer blocked page eviction'), CacheStat('cache_eviction_internal', 'internal pages evicted'), CacheStat('cache_eviction_maximum_page_size', 'maximum page size at eviction', 'no_clear,no_scale,size'), + CacheStat('cache_eviction_pages_queued', 'pages queued for eviction'), + CacheStat('cache_eviction_pages_queued_oldest', 'pages queued for urgent eviction'), + CacheStat('cache_eviction_pages_seen', 'pages seen by eviction walk'), CacheStat('cache_eviction_queue_empty', 'eviction server candidate queue empty when topping up'), CacheStat('cache_eviction_queue_not_empty', 'eviction server candidate queue not empty when topping up'), CacheStat('cache_eviction_server_evicting', 'eviction server evicting pages'), @@ -181,6 +185,8 @@ connection_stats = [ CacheStat('cache_eviction_split_internal', 'internal pages split during eviction'), CacheStat('cache_eviction_split_leaf', 'leaf pages split during eviction'), CacheStat('cache_eviction_walk', 'pages walked for eviction'), + CacheStat('cache_eviction_walks_active', 'files with active eviction walks', 'no_clear,no_scale,size'), + CacheStat('cache_eviction_walks_started', 'files with new eviction walks started'), CacheStat('cache_eviction_worker_evicting', 'eviction worker thread evicting pages'), CacheStat('cache_inmem_split', 'in-memory page splits'), CacheStat('cache_inmem_splittable', 'in-memory page passed criteria to be split'), @@ -408,6 +414,7 @@ dsrc_stats = [ ########################################## # Cache and eviction statistics ########################################## + CacheStat('cache_bytes_inuse', 'bytes currently in the cache', 'no_clear,no_scale,size'), CacheStat('cache_bytes_read', 'bytes read into cache', 'size'), CacheStat('cache_bytes_write', 'bytes written from cache', 'size'), CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'), diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c index 6a1203628a9..df5f5cc2df8 100644 --- a/src/third_party/wiredtiger/src/btree/bt_read.c +++ b/src/third_party/wiredtiger/src/btree/bt_read.c @@ -326,7 +326,7 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref) __wt_page_evict_soon(page); /* Bump the oldest ID, we're about to do some visibility checks. */ - WT_RET(__wt_txn_update_oldest(session, false)); + WT_RET(__wt_txn_update_oldest(session, 0)); /* If eviction cannot succeed, don't try. */ return (__wt_page_can_evict(session, ref, NULL)); diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index 4f16a290958..00bea5a6773 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -298,7 +298,7 @@ static int __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, WT_REF **from_refp, size_t *decrp, WT_REF **to_refp, size_t *incrp) { - WT_ADDR *addr; + WT_ADDR *addr, *ref_addr; WT_CELL_UNPACK unpack; WT_DECL_RET; WT_IKEY *ikey; @@ -345,13 +345,18 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, } /* - * If there's no address (the page has never been written), or the - * address has been instantiated, there's no work to do. Otherwise, - * instantiate the address in-memory, from the on-page cell. + * If there's no address at all (the page has never been written), or + * the address has already been instantiated, there's no work to do. + * Otherwise, the address still references a split page on-page cell, + * instantiate it. We can race with reconciliation and/or eviction of + * the child pages, be cautious: read the address and verify it, and + * only update it if the value is unchanged from the original. In the + * case of a race, the address must no longer reference the split page, + * we're done. */ - addr = ref->addr; - if (addr != NULL && !__wt_off_page(from_home, addr)) { - __wt_cell_unpack((WT_CELL *)ref->addr, &unpack); + WT_ORDERED_READ(ref_addr, ref->addr); + if (ref_addr != NULL && !__wt_off_page(from_home, ref_addr)) { + __wt_cell_unpack((WT_CELL *)ref_addr, &unpack); WT_RET(__wt_calloc_one(session, &addr)); if ((ret = __wt_strndup( session, unpack.data, unpack.size, &addr->addr)) != 0) { @@ -371,7 +376,10 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, break; WT_ILLEGAL_VALUE(session); } - ref->addr = addr; + if (!__wt_atomic_cas_ptr(&ref->addr, ref_addr, addr)) { + __wt_free(session, addr->addr); + __wt_free(session, addr); + } } /* And finally, copy the WT_REF pointer itself. */ diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c index 3d5abf34147..f1e3c0b40d5 100644 --- a/src/third_party/wiredtiger/src/btree/bt_stat.c +++ b/src/third_party/wiredtiger/src/btree/bt_stat.c @@ -41,6 +41,9 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst) WT_STAT_SET(session, stats, btree_maxleafpage, btree->maxleafpage); WT_STAT_SET(session, stats, btree_maxleafvalue, btree->maxleafvalue); + WT_STAT_SET(session, stats, cache_bytes_inuse, + __wt_btree_bytes_inuse(session)); + /* Everything else is really, really expensive. */ if (!F_ISSET(cst, WT_CONN_STAT_ALL)) return (0); diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c index 5d60c436a08..df4ceea8ffa 100644 --- a/src/third_party/wiredtiger/src/btree/bt_sync.c +++ b/src/third_party/wiredtiger/src/btree/bt_sync.c @@ -26,12 +26,14 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages; uint64_t oldest_id, saved_snap_min; uint32_t flags; + u_int saved_evict_walk_period; conn = S2C(session); btree = S2BT(session); walk = NULL; txn = &session->txn; saved_snap_min = WT_SESSION_TXN_STATE(session)->snap_min; + saved_evict_walk_period = btree->evict_walk_period; flags = WT_READ_CACHE | WT_READ_NO_GEN; internal_bytes = leaf_bytes = 0; @@ -236,10 +238,10 @@ err: /* On error, clear any left-over tree walk. */ WT_FULL_BARRIER(); /* - * If this tree was being skipped by the eviction server during - * the checkpoint, clear the wait. + * In case this tree was being skipped by the eviction server + * during the checkpoint, restore the previous state. */ - btree->evict_walk_period = 0; + btree->evict_walk_period = saved_evict_walk_period; /* * Wake the eviction server, in case application threads have diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c index 9a2c394e9a6..9dfd1cdcbfa 100644 --- a/src/third_party/wiredtiger/src/conn/conn_cache.c +++ b/src/third_party/wiredtiger/src/conn/conn_cache.c @@ -217,6 +217,14 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session) WT_STAT_SET( session, stats, cache_bytes_overflow, cache->bytes_overflow); WT_STAT_SET(session, stats, cache_bytes_leaf, leaf); + + /* + * The number of files with active walks ~= number of hazard pointers + * in the walk session. Note: reading without locking. + */ + if (conn->evict_session != NULL) + WT_STAT_SET(session, stats, cache_eviction_walks_active, + conn->evict_session->nhazard); } /* diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c index f5722d343f7..9c978fed843 100644 --- a/src/third_party/wiredtiger/src/conn/conn_open.c +++ b/src/third_party/wiredtiger/src/conn/conn_open.c @@ -93,7 +93,8 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) * transaction ID will catch up with the current ID. */ for (;;) { - WT_TRET(__wt_txn_update_oldest(session, true)); + WT_TRET(__wt_txn_update_oldest(session, + WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT)); if (txn_global->oldest_id == txn_global->current) break; __wt_yield(); diff --git a/src/third_party/wiredtiger/src/evict/evict_file.c b/src/third_party/wiredtiger/src/evict/evict_file.c index 1da2e959b6a..4b9e2442f32 100644 --- a/src/third_party/wiredtiger/src/evict/evict_file.c +++ b/src/third_party/wiredtiger/src/evict/evict_file.c @@ -26,7 +26,8 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) WT_RET(__wt_evict_file_exclusive_on(session)); /* Make sure the oldest transaction ID is up-to-date. */ - WT_RET(__wt_txn_update_oldest(session, true)); + WT_RET(__wt_txn_update_oldest( + session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT)); /* Walk the tree, discarding pages. */ next_ref = NULL; diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index be8cc1df956..360a3f69cd2 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -16,7 +16,7 @@ static int __evict_lru_walk(WT_SESSION_IMPL *); static int __evict_page(WT_SESSION_IMPL *, bool); static int __evict_pass(WT_SESSION_IMPL *); static int __evict_walk(WT_SESSION_IMPL *); -static int __evict_walk_file(WT_SESSION_IMPL *, u_int *); +static int __evict_walk_file(WT_SESSION_IMPL *, u_int, u_int *); static WT_THREAD_RET __evict_worker(void *); static int __evict_server_work(WT_SESSION_IMPL *); @@ -32,11 +32,6 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry) uint64_t read_gen; btree = entry->btree; - - /* Never prioritize empty slots. */ - if (entry->ref == NULL) - return (UINT64_MAX); - page = entry->ref->page; /* Any page set to the oldest generation should be discarded. */ @@ -71,14 +66,15 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry) * Qsort function: sort the eviction array. */ static int WT_CDECL -__evict_lru_cmp(const void *a, const void *b) +__evict_lru_cmp(const void *a_arg, const void *b_arg) { - uint64_t a_lru, b_lru; + const WT_EVICT_ENTRY *a = a_arg, *b = b_arg; + uint64_t a_score, b_score; - a_lru = __evict_read_gen(a); - b_lru = __evict_read_gen(b); + a_score = (a->ref == NULL ? UINT64_MAX : a->score); + b_score = (b->ref == NULL ? UINT64_MAX : b->score); - return ((a_lru < b_lru) ? -1 : (a_lru == b_lru) ? 0 : 1); + return ((a_score < b_score) ? -1 : (a_score == b_score) ? 0 : 1); } /* @@ -592,9 +588,10 @@ __evict_pass(WT_SESSION_IMPL *session) * * Do this every time the eviction server wakes up, regardless * of whether the cache is full, to prevent the oldest ID - * falling too far behind. + * falling too far behind. Don't wait to lock the table: with + * highly threaded workloads, that creates a bottleneck. */ - WT_RET(__wt_txn_update_oldest(session, loop > 0)); + WT_RET(__wt_txn_update_oldest(session, WT_TXN_OLDEST_STRICT)); if (!__evict_update_work(session)) break; @@ -900,7 +897,7 @@ __evict_lru_walk(WT_SESSION_IMPL *session) { WT_CACHE *cache; WT_DECL_RET; - uint64_t cutoff, read_gen_oldest; + uint64_t read_gen_oldest; uint32_t candidates, entries; cache = S2C(session)->cache; @@ -958,7 +955,7 @@ __evict_lru_walk(WT_SESSION_IMPL *session) read_gen_oldest = WT_READGEN_OLDEST; for (candidates = 0; candidates < entries; ++candidates) { read_gen_oldest = - __evict_read_gen(&cache->evict_queue[candidates]); + cache->evict_queue[candidates].score; if (read_gen_oldest != WT_READGEN_OLDEST) break; } @@ -967,35 +964,29 @@ __evict_lru_walk(WT_SESSION_IMPL *session) * Take all candidates if we only gathered pages with an oldest * read generation set. * - * We normally never take more than 50% of the entries; if 50% - * of the entries were at the oldest read generation, take them. + * We normally never take more than 50% of the entries but if + * 50% of the entries were at the oldest read generation, take + * all of them. */ if (read_gen_oldest == WT_READGEN_OLDEST) cache->evict_candidates = entries; else if (candidates >= entries / 2) cache->evict_candidates = candidates; else { - /* Save the calculated oldest generation. */ - cache->read_gen_oldest = read_gen_oldest; - - /* Find the bottom 25% of read generations. */ - cutoff = - (3 * read_gen_oldest + __evict_read_gen( - &cache->evict_queue[entries - 1])) / 4; - /* - * Don't take less than 10% or more than 50% of entries, - * regardless. That said, if there is only one entry, - * which is normal when populating an empty file, don't - * exclude it. + * Take all of the urgent pages plus a third of + * ordinary candidates (which could be expressed as + * WT_EVICT_WALK_INCR / WT_EVICT_WALK_BASE). In the + * steady state, we want to get as many candidates as + * the eviction walk adds to the queue. + * + * That said, if there is only one entry, which is + * normal when populating an empty file, don't exclude + * it. */ - for (candidates = 1 + entries / 10; - candidates < entries / 2; - candidates++) - if (__evict_read_gen( - &cache->evict_queue[candidates]) > cutoff) - break; - cache->evict_candidates = candidates; + cache->evict_candidates = + 1 + candidates + ((entries - candidates) - 1) / 3; + cache->read_gen_oldest = read_gen_oldest; } } @@ -1071,7 +1062,7 @@ __evict_walk(WT_SESSION_IMPL *session) * per walk. */ start_slot = slot = cache->evict_entries; - max_entries = slot + WT_EVICT_WALK_INCR; + max_entries = WT_MIN(slot + WT_EVICT_WALK_INCR, cache->evict_slots); retry: while (slot < max_entries && ret == 0) { /* @@ -1154,7 +1145,6 @@ retry: while (slot < max_entries && ret == 0) { * useful in the past. */ if (btree->evict_walk_period != 0 && - cache->evict_entries >= WT_EVICT_WALK_INCR && btree->evict_walk_skips++ < btree->evict_walk_period) continue; btree->evict_walk_skips = 0; @@ -1180,7 +1170,8 @@ retry: while (slot < max_entries && ret == 0) { if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) { cache->evict_file_next = dhandle; WT_WITH_DHANDLE(session, dhandle, - ret = __evict_walk_file(session, &slot)); + ret = __evict_walk_file( + session, max_entries, &slot)); WT_ASSERT(session, session->split_gen == 0); } __wt_spin_unlock(session, &cache->evict_walk_lock); @@ -1247,8 +1238,9 @@ __evict_init_candidate( if (evict->ref != NULL) __evict_list_clear(session, evict); - evict->ref = ref; evict->btree = S2BT(session); + evict->ref = ref; + evict->score = __evict_read_gen(evict); /* Mark the page on the list; set last to flush the other updates. */ F_SET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU); @@ -1259,7 +1251,7 @@ __evict_init_candidate( * Get a few page eviction candidates from a single underlying file. */ static int -__evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp) +__evict_walk_file(WT_SESSION_IMPL *session, u_int max_entries, u_int *slotp) { WT_BTREE *btree; WT_CACHE *cache; @@ -1269,8 +1261,9 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp) WT_PAGE *page; WT_PAGE_MODIFY *mod; WT_REF *ref; - uint64_t pages_walked; - uint32_t walk_flags; + uint64_t btree_inuse, bytes_per_slot, cache_inuse; + uint64_t pages_seen, refs_walked; + uint32_t remaining_slots, target_pages, total_slots, walk_flags; int internal_pages, restarts; bool enough, modified; @@ -1280,11 +1273,43 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp) internal_pages = restarts = 0; enough = false; + /* + * Figure out how many slots to fill from this tree. + * Note that some care is taken in the calculation to avoid overflow. + */ start = cache->evict_queue + *slotp; - end = start + WT_EVICT_WALK_PER_FILE; + btree_inuse = __wt_btree_bytes_inuse(session); + cache_inuse = __wt_cache_bytes_inuse(cache); + remaining_slots = max_entries - *slotp; + total_slots = max_entries - cache->evict_entries; + target_pages = (uint32_t)(btree_inuse / + (cache_inuse / total_slots)); + + /* + * The target number of pages for this tree is proportional to the + * space it is taking up in cache. Round to the nearest number of + * slots so we assign all of the slots to a tree filling 99+% of the + * cache (and only have to walk it once). + */ + bytes_per_slot = cache_inuse / total_slots; + target_pages = (uint32_t)( + (btree_inuse + bytes_per_slot / 2) / bytes_per_slot); + if (target_pages == 0) { + /* + * Randomly walk trees with a tiny fraction of the cache in + * case there are so many trees that none of them use enough of + * the cache to be allocated slots. + */ + if (__wt_random(&session->rnd) / (double)UINT32_MAX > + btree_inuse / (double)cache_inuse) + return (0); + target_pages = 10; + } + if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || - end > cache->evict_queue + cache->evict_slots) - end = cache->evict_queue + cache->evict_slots; + target_pages > remaining_slots) + target_pages = remaining_slots; + end = start + target_pages; walk_flags = WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT; @@ -1303,17 +1328,21 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp) * Once we hit the page limit, do one more step through the walk in * case we are appending and only the last page in the file is live. */ - for (evict = start, pages_walked = 0; + for (evict = start, pages_seen = refs_walked = 0; evict < end && !enough && (ret == 0 || ret == WT_NOTFOUND); ret = __wt_tree_walk_count( - session, &btree->evict_ref, &pages_walked, walk_flags)) { - enough = pages_walked > cache->evict_max_refs_per_file; + session, &btree->evict_ref, &refs_walked, walk_flags)) { + enough = refs_walked > cache->evict_max_refs_per_file; if ((ref = btree->evict_ref) == NULL) { if (++restarts == 2 || enough) break; + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_walks_started); continue; } + ++pages_seen; + /* Ignore root pages entirely. */ if (__wt_ref_is_root(ref)) continue; @@ -1341,9 +1370,13 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp) } /* Pages we no longer need (clean or dirty), are found money. */ + if (page->read_gen == WT_READGEN_OLDEST) { + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_pages_queued_oldest); + goto fast; + } if (__wt_page_is_empty(page) || - F_ISSET(session->dhandle, WT_DHANDLE_DEAD) || - page->read_gen == WT_READGEN_OLDEST) + F_ISSET(session->dhandle, WT_DHANDLE_DEAD)) goto fast; /* Skip clean pages if appropriate. */ @@ -1409,24 +1442,31 @@ fast: /* If the page can't be evicted, give up. */ WT_RET_NOTFOUND_OK(ret); *slotp += (u_int)(evict - start); + WT_STAT_FAST_CONN_INCRV( + session, cache_eviction_pages_queued, (u_int)(evict - start)); /* * If we happen to end up on the root page, clear it. We have to track * hazard pointers, and the root page complicates that calculation. * + * Likewise if we found no new candidates during the walk: there is no + * point keeping a page pinned, since it may be the only candidate in an + * idle tree. + * * If we land on a page requiring forced eviction, move on to the next * page: we want this page evicted as quickly as possible. */ if ((ref = btree->evict_ref) != NULL) { - if (__wt_ref_is_root(ref)) + if (__wt_ref_is_root(ref) || evict == start) WT_RET(__evict_clear_walk(session)); else if (ref->page->read_gen == WT_READGEN_OLDEST) WT_RET_NOTFOUND_OK(__wt_tree_walk_count( session, &btree->evict_ref, - &pages_walked, walk_flags)); + &refs_walked, walk_flags)); } - WT_STAT_FAST_CONN_INCRV(session, cache_eviction_walk, pages_walked); + WT_STAT_FAST_CONN_INCRV(session, cache_eviction_walk, refs_walked); + WT_STAT_FAST_CONN_INCRV(session, cache_eviction_pages_seen, pages_seen); return (0); } @@ -1459,6 +1499,8 @@ __evict_get_ref( return (WT_NOTFOUND); if (__wt_spin_trylock(session, &cache->evict_lock) == 0) break; + if (!F_ISSET(session, WT_SESSION_INTERNAL)) + return (WT_NOTFOUND); __wt_yield(); } @@ -1472,13 +1514,14 @@ __evict_get_ref( candidates /= 2; /* Get the next page queued for eviction. */ - while ((evict = cache->evict_current) != NULL && - evict < cache->evict_queue + candidates && evict->ref != NULL) { + for (evict = cache->evict_current; + evict >= cache->evict_queue && + evict < cache->evict_queue + candidates; + ++evict) { + if (evict->ref == NULL) + continue; WT_ASSERT(session, evict->btree != NULL); - /* Move to the next item. */ - ++cache->evict_current; - /* * Lock the page while holding the eviction mutex to prevent * multiple attempts to evict it. For pages that are already @@ -1508,8 +1551,11 @@ __evict_get_ref( } /* Clear the current pointer if there are no more candidates. */ - if (evict >= cache->evict_queue + cache->evict_candidates) + if (evict == NULL || evict + 1 >= + cache->evict_queue + cache->evict_candidates) cache->evict_current = NULL; + else + cache->evict_current = evict + 1; __wt_spin_unlock(session, &cache->evict_lock); return ((*refp == NULL) ? WT_NOTFOUND : 0); @@ -1533,15 +1579,18 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server) * An internal session flags either the server itself or an eviction * worker thread. */ - if (F_ISSET(session, WT_SESSION_INTERNAL)) { - if (is_server) - WT_STAT_FAST_CONN_INCR( - session, cache_eviction_server_evicting); - else + if (is_server) + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_server_evicting); + else if (F_ISSET(session, WT_SESSION_INTERNAL)) + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_worker_evicting); + else { + if (__wt_page_is_modified(ref->page)) WT_STAT_FAST_CONN_INCR( - session, cache_eviction_worker_evicting); - } else + session, cache_eviction_app_dirty); WT_STAT_FAST_CONN_INCR(session, cache_eviction_app); + } /* * In case something goes wrong, don't pick the same set of pages every @@ -1628,8 +1677,9 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full) } /* See if eviction is still needed. */ - if (!__wt_eviction_needed(session, NULL) || - cache->pages_evict > init_evict_count + max_pages_evicted) + if (!__wt_eviction_needed(session, &pct_full) || + (pct_full < 100 && + cache->pages_evict > init_evict_count + max_pages_evicted)) return (0); /* Evict a page. */ diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c index 2d20f53e9ae..305b81fe69e 100644 --- a/src/third_party/wiredtiger/src/evict/evict_page.c +++ b/src/third_party/wiredtiger/src/evict/evict_page.c @@ -420,7 +420,8 @@ __evict_review( * fallen behind current. */ if (modified) - WT_RET(__wt_txn_update_oldest(session, false)); + WT_RET(__wt_txn_update_oldest( + session, WT_TXN_OLDEST_STRICT)); if (!__wt_page_can_evict(session, ref, inmem_splitp)) return (EBUSY); diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h index fd921677751..96097115afd 100644 --- a/src/third_party/wiredtiger/src/include/btree.h +++ b/src/third_party/wiredtiger/src/include/btree.h @@ -129,6 +129,8 @@ struct __wt_btree { uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */ uint64_t write_gen; /* Write generation */ + uint64_t bytes_inmem; /* Cache bytes in memory. */ + WT_REF *evict_ref; /* Eviction thread's location */ uint64_t evict_priority; /* Relative priority of cached pages */ u_int evict_walk_period; /* Skip this many LRU walks */ diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 6df7f87073f..03f27861e75 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -55,6 +55,27 @@ __wt_btree_block_free( } /* + * __wt_btree_bytes_inuse -- + * Return the number of bytes in use. + */ +static inline uint64_t +__wt_btree_bytes_inuse(WT_SESSION_IMPL *session) +{ + WT_CACHE *cache; + uint64_t bytes_inuse; + + cache = S2C(session)->cache; + + /* Adjust the cache size to take allocation overhead into account. */ + bytes_inuse = S2BT(session)->bytes_inmem; + if (cache->overhead_pct != 0) + bytes_inuse += + (bytes_inuse * (uint64_t)cache->overhead_pct) / 100; + + return (bytes_inuse); +} + +/* * __wt_cache_page_inmem_incr -- * Increment a page's memory footprint in the cache. */ @@ -66,6 +87,7 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) WT_ASSERT(session, size < WT_EXABYTE); cache = S2C(session)->cache; + (void)__wt_atomic_add64(&S2BT(session)->bytes_inmem, size); (void)__wt_atomic_add64(&cache->bytes_inmem, size); (void)__wt_atomic_addsize(&page->memory_footprint, size); if (__wt_page_is_modified(page)) { @@ -196,6 +218,8 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) WT_ASSERT(session, size < WT_EXABYTE); __wt_cache_decr_check_uint64( + session, &S2BT(session)->bytes_inmem, size, "WT_BTREE.bytes_inmem"); + __wt_cache_decr_check_uint64( session, &cache->bytes_inmem, size, "WT_CACHE.bytes_inmem"); __wt_cache_decr_check_size( session, &page->memory_footprint, size, "WT_PAGE.memory_footprint"); @@ -274,8 +298,9 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page) modify = page->modify; /* Update the bytes in-memory to reflect the eviction. */ - __wt_cache_decr_check_uint64(session, - &cache->bytes_inmem, + __wt_cache_decr_check_uint64(session, &S2BT(session)->bytes_inmem, + page->memory_footprint, "WT_BTREE.bytes_inmem"); + __wt_cache_decr_check_uint64(session, &cache->bytes_inmem, page->memory_footprint, "WT_CACHE.bytes_inmem"); /* Update the bytes_internal value to reflect the eviction */ diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h index 9184a2fe6ed..f683ed6b0f8 100644 --- a/src/third_party/wiredtiger/src/include/cache.h +++ b/src/third_party/wiredtiger/src/include/cache.h @@ -13,7 +13,6 @@ #define WT_EVICT_INT_SKEW (1<<20) /* Prefer leaf pages over internal pages by this many increments of the read generation. */ -#define WT_EVICT_WALK_PER_FILE 10 /* Pages to queue per file */ #define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */ #define WT_EVICT_WALK_INCR 100 /* Pages added each walk */ @@ -24,6 +23,7 @@ struct __wt_evict_entry { WT_BTREE *btree; /* Enclosing btree object */ WT_REF *ref; /* Page to flush/evict */ + uint64_t score; /* Relative eviction priority */ }; /* diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 792700555dd..f2b13023386 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -677,7 +677,7 @@ extern void __wt_stat_join_clear_all(WT_JOIN_STATS **stats); extern void __wt_stat_join_aggregate( WT_JOIN_STATS **from, WT_JOIN_STATS *to); extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session); extern int __wt_txn_get_snapshot(WT_SESSION_IMPL *session); -extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force); +extern int __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags); extern int __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[]); extern void __wt_txn_release(WT_SESSION_IMPL *session); extern int __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[]); diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h index 3d9b0ed716b..7682af5a4b8 100644 --- a/src/third_party/wiredtiger/src/include/flags.h +++ b/src/third_party/wiredtiger/src/include/flags.h @@ -76,6 +76,8 @@ #define WT_TXN_LOG_CKPT_START 0x00000004 #define WT_TXN_LOG_CKPT_STOP 0x00000008 #define WT_TXN_LOG_CKPT_SYNC 0x00000010 +#define WT_TXN_OLDEST_STRICT 0x00000001 +#define WT_TXN_OLDEST_WAIT 0x00000002 #define WT_VERB_API 0x00000001 #define WT_VERB_BLOCK 0x00000002 #define WT_VERB_CHECKPOINT 0x00000004 diff --git a/src/third_party/wiredtiger/src/include/os.h b/src/third_party/wiredtiger/src/include/os.h index 2ff41d39f46..44cceee6c40 100644 --- a/src/third_party/wiredtiger/src/include/os.h +++ b/src/third_party/wiredtiger/src/include/os.h @@ -17,15 +17,26 @@ #define WT_SYSCALL_RETRY(call, ret) do { \ int __retry; \ for (__retry = 0; __retry < 10; ++__retry) { \ - if ((call) == 0) { \ - (ret) = 0; \ - break; \ - } \ - switch ((ret) = __wt_errno()) { \ - case 0: \ - /* The call failed but didn't set errno. */ \ - (ret) = WT_ERROR; \ + /* \ + * A call returning 0 indicates success; any call where \ + * 0 is not the only successful return must provide an \ + * expression evaluating to 0 in all successful cases. \ + */ \ + if (((ret) = (call)) == 0) \ break; \ + /* \ + * The call's error was either returned by the call or \ + * is in errno, and there are cases where it depends on \ + * the software release as to which it is (for example, \ + * posix_fadvise on FreeBSD and OS X). Failing calls \ + * must either return a non-zero error value, or -1 if \ + * the error value is in errno. (The WiredTiger errno \ + * function returns WT_ERROR if errno is 0, which isn't \ + * ideal but won't discard the failure.) \ + */ \ + if ((ret) == -1) \ + (ret) = __wt_errno(); \ + switch (ret) { \ case EAGAIN: \ case EBUSY: \ case EINTR: \ diff --git a/src/third_party/wiredtiger/src/include/serial.i b/src/third_party/wiredtiger/src/include/serial.i index c0cd9c85ee9..67edc1c9ce1 100644 --- a/src/third_party/wiredtiger/src/include/serial.i +++ b/src/third_party/wiredtiger/src/include/serial.i @@ -306,7 +306,7 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page, if ((txn = page->modify->obsolete_check_txn) != WT_TXN_NONE) { if (!__wt_txn_visible_all(session, txn)) { /* Try to move the oldest ID forward and re-check. */ - WT_RET(__wt_txn_update_oldest(session, false)); + WT_RET(__wt_txn_update_oldest(session, 0)); if (!__wt_txn_visible_all(session, txn)) return (0); diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index f9170dc1a79..e728b634c6e 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -269,6 +269,8 @@ struct __wt_connection_stats { int64_t cache_eviction_slow; int64_t cache_eviction_worker_evicting; int64_t cache_eviction_force_fail; + int64_t cache_eviction_walks_active; + int64_t cache_eviction_walks_started; int64_t cache_eviction_hazard; int64_t cache_inmem_splittable; int64_t cache_inmem_split; @@ -280,14 +282,18 @@ struct __wt_connection_stats { int64_t cache_bytes_max; int64_t cache_eviction_maximum_page_size; int64_t cache_eviction_dirty; + int64_t cache_eviction_app_dirty; int64_t cache_eviction_deepen; int64_t cache_write_lookaside; int64_t cache_pages_inuse; int64_t cache_eviction_force; int64_t cache_eviction_force_delete; int64_t cache_eviction_app; + int64_t cache_eviction_pages_queued; + int64_t cache_eviction_pages_queued_oldest; int64_t cache_read; int64_t cache_read_lookaside; + int64_t cache_eviction_pages_seen; int64_t cache_eviction_fail; int64_t cache_eviction_walk; int64_t cache_write; @@ -441,6 +447,7 @@ struct __wt_dsrc_stats { int64_t btree_compact_rewrite; int64_t btree_row_internal; int64_t btree_row_leaf; + int64_t cache_bytes_inuse; int64_t cache_bytes_read; int64_t cache_bytes_write; int64_t cache_eviction_checkpoint; diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 5c2efad77e0..4f422af32d4 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -3787,257 +3787,269 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1040 /*! cache: failed eviction of pages that exceeded the in-memory maximum */ #define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1041 +/*! cache: files with active eviction walks */ +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1042 +/*! cache: files with new eviction walks started */ +#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1043 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1042 +#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1044 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1043 +#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1045 /*! cache: in-memory page splits */ -#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1044 +#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1046 /*! cache: internal pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1045 +#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1047 /*! cache: internal pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1046 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1048 /*! cache: leaf pages split during eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1047 +#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1049 /*! cache: lookaside table insert calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1048 +#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1050 /*! cache: lookaside table remove calls */ -#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1049 +#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1051 /*! cache: maximum bytes configured */ -#define WT_STAT_CONN_CACHE_BYTES_MAX 1050 +#define WT_STAT_CONN_CACHE_BYTES_MAX 1052 /*! cache: maximum page size at eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1051 +#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1053 /*! cache: modified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1052 +#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1054 +/*! cache: modified pages evicted by application threads */ +#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1055 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1053 +#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1056 /*! cache: page written requiring lookaside records */ -#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1054 +#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1057 /*! cache: pages currently held in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_INUSE 1055 +#define WT_STAT_CONN_CACHE_PAGES_INUSE 1058 /*! cache: pages evicted because they exceeded the in-memory maximum */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1056 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1059 /*! cache: pages evicted because they had chains of deleted items */ -#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1057 +#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1060 /*! cache: pages evicted by application threads */ -#define WT_STAT_CONN_CACHE_EVICTION_APP 1058 +#define WT_STAT_CONN_CACHE_EVICTION_APP 1061 +/*! cache: pages queued for eviction */ +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1062 +/*! cache: pages queued for urgent eviction */ +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1063 /*! cache: pages read into cache */ -#define WT_STAT_CONN_CACHE_READ 1059 +#define WT_STAT_CONN_CACHE_READ 1064 /*! cache: pages read into cache requiring lookaside entries */ -#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1060 +#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1065 +/*! cache: pages seen by eviction walk */ +#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1066 /*! cache: pages selected for eviction unable to be evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1061 +#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1067 /*! cache: pages walked for eviction */ -#define WT_STAT_CONN_CACHE_EVICTION_WALK 1062 +#define WT_STAT_CONN_CACHE_EVICTION_WALK 1068 /*! cache: pages written from cache */ -#define WT_STAT_CONN_CACHE_WRITE 1063 +#define WT_STAT_CONN_CACHE_WRITE 1069 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1064 +#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1070 /*! cache: percentage overhead */ -#define WT_STAT_CONN_CACHE_OVERHEAD 1065 +#define WT_STAT_CONN_CACHE_OVERHEAD 1071 /*! cache: tracked bytes belonging to internal pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1066 +#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1072 /*! cache: tracked bytes belonging to leaf pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_LEAF 1067 +#define WT_STAT_CONN_CACHE_BYTES_LEAF 1073 /*! cache: tracked bytes belonging to overflow pages in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_OVERFLOW 1068 +#define WT_STAT_CONN_CACHE_BYTES_OVERFLOW 1074 /*! cache: tracked dirty bytes in the cache */ -#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1069 +#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1075 /*! cache: tracked dirty pages in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1070 +#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1076 /*! cache: unmodified pages evicted */ -#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1071 +#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1077 /*! connection: auto adjusting condition resets */ -#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1072 +#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1078 /*! connection: auto adjusting condition wait calls */ -#define WT_STAT_CONN_COND_AUTO_WAIT 1073 +#define WT_STAT_CONN_COND_AUTO_WAIT 1079 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1074 +#define WT_STAT_CONN_FILE_OPEN 1080 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1075 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1081 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1076 +#define WT_STAT_CONN_MEMORY_FREE 1082 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1077 +#define WT_STAT_CONN_MEMORY_GROW 1083 /*! connection: pthread mutex condition wait calls */ -#define WT_STAT_CONN_COND_WAIT 1078 +#define WT_STAT_CONN_COND_WAIT 1084 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1079 +#define WT_STAT_CONN_RWLOCK_READ 1085 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1080 +#define WT_STAT_CONN_RWLOCK_WRITE 1086 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1081 +#define WT_STAT_CONN_READ_IO 1087 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1082 +#define WT_STAT_CONN_WRITE_IO 1088 /*! cursor: cursor create calls */ -#define WT_STAT_CONN_CURSOR_CREATE 1083 +#define WT_STAT_CONN_CURSOR_CREATE 1089 /*! cursor: cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT 1084 +#define WT_STAT_CONN_CURSOR_INSERT 1090 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1085 +#define WT_STAT_CONN_CURSOR_NEXT 1091 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1086 +#define WT_STAT_CONN_CURSOR_PREV 1092 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1087 +#define WT_STAT_CONN_CURSOR_REMOVE 1093 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1088 +#define WT_STAT_CONN_CURSOR_RESET 1094 /*! cursor: cursor restarted searches */ -#define WT_STAT_CONN_CURSOR_RESTART 1089 +#define WT_STAT_CONN_CURSOR_RESTART 1095 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1090 +#define WT_STAT_CONN_CURSOR_SEARCH 1096 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1091 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1097 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1092 +#define WT_STAT_CONN_CURSOR_UPDATE 1098 /*! cursor: truncate calls */ -#define WT_STAT_CONN_CURSOR_TRUNCATE 1093 +#define WT_STAT_CONN_CURSOR_TRUNCATE 1099 /*! data-handle: connection data handles currently active */ -#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1094 +#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1100 /*! data-handle: connection sweep candidate became referenced */ -#define WT_STAT_CONN_DH_SWEEP_REF 1095 +#define WT_STAT_CONN_DH_SWEEP_REF 1101 /*! data-handle: connection sweep dhandles closed */ -#define WT_STAT_CONN_DH_SWEEP_CLOSE 1096 +#define WT_STAT_CONN_DH_SWEEP_CLOSE 1102 /*! data-handle: connection sweep dhandles removed from hash list */ -#define WT_STAT_CONN_DH_SWEEP_REMOVE 1097 +#define WT_STAT_CONN_DH_SWEEP_REMOVE 1103 /*! data-handle: connection sweep time-of-death sets */ -#define WT_STAT_CONN_DH_SWEEP_TOD 1098 +#define WT_STAT_CONN_DH_SWEEP_TOD 1104 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_SWEEPS 1099 +#define WT_STAT_CONN_DH_SWEEPS 1105 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1100 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1106 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1101 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1107 /*! log: busy returns attempting to switch slots */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1102 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1108 /*! log: consolidated slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1103 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1109 /*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1104 +#define WT_STAT_CONN_LOG_SLOT_RACES 1110 /*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1105 +#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1111 /*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1106 +#define WT_STAT_CONN_LOG_SLOT_JOINS 1112 /*! log: consolidated slot unbuffered writes */ -#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1107 +#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1113 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1108 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1114 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1109 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1115 /*! log: log files manually zero-filled */ -#define WT_STAT_CONN_LOG_ZERO_FILLS 1110 +#define WT_STAT_CONN_LOG_ZERO_FILLS 1116 /*! log: log flush operations */ -#define WT_STAT_CONN_LOG_FLUSH 1111 +#define WT_STAT_CONN_LOG_FLUSH 1117 /*! log: log force write operations */ -#define WT_STAT_CONN_LOG_FORCE_WRITE 1112 +#define WT_STAT_CONN_LOG_FORCE_WRITE 1118 /*! log: log force write operations skipped */ -#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1113 +#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1119 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1114 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1120 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1115 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1121 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1116 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1122 /*! log: log release advances write LSN */ -#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1117 +#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1123 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1118 +#define WT_STAT_CONN_LOG_SCANS 1124 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1119 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1125 /*! log: log server thread advances write LSN */ -#define WT_STAT_CONN_LOG_WRITE_LSN 1120 +#define WT_STAT_CONN_LOG_WRITE_LSN 1126 /*! log: log server thread write LSN walk skipped */ -#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1121 +#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1127 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1122 +#define WT_STAT_CONN_LOG_SYNC 1128 /*! log: log sync_dir operations */ -#define WT_STAT_CONN_LOG_SYNC_DIR 1123 +#define WT_STAT_CONN_LOG_SYNC_DIR 1129 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1124 +#define WT_STAT_CONN_LOG_WRITES 1130 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1125 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1131 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1126 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1132 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1127 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1133 /*! log: pre-allocated log files not ready and missed */ -#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1128 +#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1134 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1129 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1135 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1130 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1136 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1131 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1137 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1132 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1138 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1133 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1139 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1134 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1140 /*! log: written slots coalesced */ -#define WT_STAT_CONN_LOG_SLOT_COALESCED 1135 +#define WT_STAT_CONN_LOG_SLOT_COALESCED 1141 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1136 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1142 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1137 +#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1143 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1138 +#define WT_STAT_CONN_REC_PAGES 1144 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1139 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1145 /*! reconciliation: pages deleted */ -#define WT_STAT_CONN_REC_PAGE_DELETE 1140 +#define WT_STAT_CONN_REC_PAGE_DELETE 1146 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1141 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1147 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1142 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1148 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1143 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1149 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1144 +#define WT_STAT_CONN_SESSION_OPEN 1150 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1145 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1151 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1146 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1152 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1147 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1153 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1148 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1154 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1149 +#define WT_STAT_CONN_PAGE_SLEEP 1155 /*! transaction: number of named snapshots created */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1150 +#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1156 /*! transaction: number of named snapshots dropped */ -#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1151 +#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1157 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1152 +#define WT_STAT_CONN_TXN_BEGIN 1158 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1153 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1159 /*! transaction: transaction checkpoint generation */ -#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1154 +#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1160 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1155 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1161 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1156 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1162 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1157 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1163 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1158 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1164 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1159 +#define WT_STAT_CONN_TXN_CHECKPOINT 1165 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1160 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1166 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1161 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1167 /*! transaction: transaction range of IDs currently pinned by a checkpoint */ -#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1162 +#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1168 /*! transaction: transaction range of IDs currently pinned by named * snapshots */ -#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1163 +#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1169 /*! transaction: transaction sync calls */ -#define WT_STAT_CONN_TXN_SYNC 1164 +#define WT_STAT_CONN_TXN_SYNC 1170 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1165 +#define WT_STAT_CONN_TXN_COMMIT 1171 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1166 +#define WT_STAT_CONN_TXN_ROLLBACK 1172 /*! * @} @@ -4126,125 +4138,127 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_DSRC_BTREE_ROW_INTERNAL 2038 /*! btree: row-store leaf pages */ #define WT_STAT_DSRC_BTREE_ROW_LEAF 2039 +/*! cache: bytes currently in the cache */ +#define WT_STAT_DSRC_CACHE_BYTES_INUSE 2040 /*! cache: bytes read into cache */ -#define WT_STAT_DSRC_CACHE_BYTES_READ 2040 +#define WT_STAT_DSRC_CACHE_BYTES_READ 2041 /*! cache: bytes written from cache */ -#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2041 +#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2042 /*! cache: checkpoint blocked page eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2042 +#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2043 /*! cache: data source pages selected for eviction unable to be evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2043 +#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2044 /*! cache: hazard pointer blocked page eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2044 +#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2045 /*! cache: in-memory page passed criteria to be split */ -#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2045 +#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2046 /*! cache: in-memory page splits */ -#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2046 +#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2047 /*! cache: internal pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2047 +#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2048 /*! cache: internal pages split during eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2048 +#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2049 /*! cache: leaf pages split during eviction */ -#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2049 +#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2050 /*! cache: modified pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2050 +#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2051 /*! cache: overflow pages read into cache */ -#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2051 +#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2052 /*! cache: overflow values cached in memory */ -#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2052 +#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2053 /*! cache: page split during eviction deepened the tree */ -#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2053 +#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2054 /*! cache: page written requiring lookaside records */ -#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2054 +#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2055 /*! cache: pages read into cache */ -#define WT_STAT_DSRC_CACHE_READ 2055 +#define WT_STAT_DSRC_CACHE_READ 2056 /*! cache: pages read into cache requiring lookaside entries */ -#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2056 +#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2057 /*! cache: pages written from cache */ -#define WT_STAT_DSRC_CACHE_WRITE 2057 +#define WT_STAT_DSRC_CACHE_WRITE 2058 /*! cache: pages written requiring in-memory restoration */ -#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2058 +#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2059 /*! cache: unmodified pages evicted */ -#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2059 +#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2060 /*! compression: compressed pages read */ -#define WT_STAT_DSRC_COMPRESS_READ 2060 +#define WT_STAT_DSRC_COMPRESS_READ 2061 /*! compression: compressed pages written */ -#define WT_STAT_DSRC_COMPRESS_WRITE 2061 +#define WT_STAT_DSRC_COMPRESS_WRITE 2062 /*! compression: page written failed to compress */ -#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2062 +#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2063 /*! compression: page written was too small to compress */ -#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2063 +#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2064 /*! compression: raw compression call failed, additional data available */ -#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2064 +#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2065 /*! compression: raw compression call failed, no additional data available */ -#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2065 +#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2066 /*! compression: raw compression call succeeded */ -#define WT_STAT_DSRC_COMPRESS_RAW_OK 2066 +#define WT_STAT_DSRC_COMPRESS_RAW_OK 2067 /*! cursor: bulk-loaded cursor-insert calls */ -#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2067 +#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2068 /*! cursor: create calls */ -#define WT_STAT_DSRC_CURSOR_CREATE 2068 +#define WT_STAT_DSRC_CURSOR_CREATE 2069 /*! cursor: cursor-insert key and value bytes inserted */ -#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2069 +#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2070 /*! cursor: cursor-remove key bytes removed */ -#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2070 +#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2071 /*! cursor: cursor-update value bytes updated */ -#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2071 +#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2072 /*! cursor: insert calls */ -#define WT_STAT_DSRC_CURSOR_INSERT 2072 +#define WT_STAT_DSRC_CURSOR_INSERT 2073 /*! cursor: next calls */ -#define WT_STAT_DSRC_CURSOR_NEXT 2073 +#define WT_STAT_DSRC_CURSOR_NEXT 2074 /*! cursor: prev calls */ -#define WT_STAT_DSRC_CURSOR_PREV 2074 +#define WT_STAT_DSRC_CURSOR_PREV 2075 /*! cursor: remove calls */ -#define WT_STAT_DSRC_CURSOR_REMOVE 2075 +#define WT_STAT_DSRC_CURSOR_REMOVE 2076 /*! cursor: reset calls */ -#define WT_STAT_DSRC_CURSOR_RESET 2076 +#define WT_STAT_DSRC_CURSOR_RESET 2077 /*! cursor: restarted searches */ -#define WT_STAT_DSRC_CURSOR_RESTART 2077 +#define WT_STAT_DSRC_CURSOR_RESTART 2078 /*! cursor: search calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH 2078 +#define WT_STAT_DSRC_CURSOR_SEARCH 2079 /*! cursor: search near calls */ -#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2079 +#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2080 /*! cursor: truncate calls */ -#define WT_STAT_DSRC_CURSOR_TRUNCATE 2080 +#define WT_STAT_DSRC_CURSOR_TRUNCATE 2081 /*! cursor: update calls */ -#define WT_STAT_DSRC_CURSOR_UPDATE 2081 +#define WT_STAT_DSRC_CURSOR_UPDATE 2082 /*! reconciliation: dictionary matches */ -#define WT_STAT_DSRC_REC_DICTIONARY 2082 +#define WT_STAT_DSRC_REC_DICTIONARY 2083 /*! reconciliation: fast-path pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2083 +#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2084 /*! reconciliation: internal page key bytes discarded using suffix * compression */ -#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2084 +#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2085 /*! reconciliation: internal page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2085 +#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2086 /*! reconciliation: internal-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2086 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2087 /*! reconciliation: leaf page key bytes discarded using prefix compression */ -#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2087 +#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2088 /*! reconciliation: leaf page multi-block writes */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2088 +#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2089 /*! reconciliation: leaf-page overflow keys */ -#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2089 +#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2090 /*! reconciliation: maximum blocks required for a page */ -#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2090 +#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2091 /*! reconciliation: overflow values written */ -#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2091 +#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2092 /*! reconciliation: page checksum matches */ -#define WT_STAT_DSRC_REC_PAGE_MATCH 2092 +#define WT_STAT_DSRC_REC_PAGE_MATCH 2093 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_DSRC_REC_PAGES 2093 +#define WT_STAT_DSRC_REC_PAGES 2094 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_DSRC_REC_PAGES_EVICTION 2094 +#define WT_STAT_DSRC_REC_PAGES_EVICTION 2095 /*! reconciliation: pages deleted */ -#define WT_STAT_DSRC_REC_PAGE_DELETE 2095 +#define WT_STAT_DSRC_REC_PAGE_DELETE 2096 /*! session: object compaction */ -#define WT_STAT_DSRC_SESSION_COMPACT 2096 +#define WT_STAT_DSRC_SESSION_COMPACT 2097 /*! session: open cursor count */ -#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2097 +#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2098 /*! transaction: update conflicts */ -#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2098 +#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2099 /*! * @} diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c index b7efb1d9018..47071211450 100644 --- a/src/third_party/wiredtiger/src/log/log_slot.c +++ b/src/third_party/wiredtiger/src/log/log_slot.c @@ -94,6 +94,17 @@ retry: if (WT_LOG_SLOT_DONE(new_state)) *releasep = 1; slot->slot_end_lsn = slot->slot_start_lsn; + /* + * A thread setting the unbuffered flag sets the unbuffered size after + * setting the flag. There could be a delay between a thread setting + * the flag, a thread closing the slot, and the original thread setting + * that value. If the state is unbuffered, wait for the unbuffered + * size to be set. + */ + while (WT_LOG_SLOT_UNBUFFERED_ISSET(old_state) && + slot->slot_unbuffered == 0) + __wt_yield(); + end_offset = WT_LOG_SLOT_JOINED_BUFFERED(old_state) + slot->slot_unbuffered; slot->slot_end_lsn.l.offset += (uint32_t)end_offset; diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c index 51cf2e981de..f5bb4cfd337 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c @@ -289,7 +289,8 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, } /* Stop if a running transaction needs the chunk. */ - WT_RET(__wt_txn_update_oldest(session, true)); + WT_RET(__wt_txn_update_oldest( + session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT)); if (chunk->switch_txn == WT_TXN_NONE || !__wt_txn_visible_all(session, chunk->switch_txn)) { WT_RET(__wt_verbose(session, WT_VERB_LSM, diff --git a/src/third_party/wiredtiger/src/os_posix/os_dir.c b/src/third_party/wiredtiger/src/os_posix/os_dir.c index 78ae5f8edd4..02f12ec7311 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_dir.c +++ b/src/third_party/wiredtiger/src/os_posix/os_dir.c @@ -36,7 +36,7 @@ __wt_posix_directory_list(WT_SESSION_IMPL *session, const char *dir, dirsz = 0; entries = NULL; - WT_SYSCALL_RETRY(((dirp = opendir(path)) == NULL ? 1 : 0), ret); + WT_SYSCALL_RETRY(((dirp = opendir(path)) == NULL ? -1 : 0), ret); if (ret != 0) WT_ERR_MSG(session, ret, "%s: directory-list: opendir", path); diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c index 86aa8db8f4f..7d8f3b937b6 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_fs.c +++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c @@ -52,7 +52,7 @@ __posix_sync(WT_SESSION_IMPL *session, * "This is currently implemented on HFS, MS-DOS (FAT), and Universal * Disk Format (UDF) file systems." */ - WT_SYSCALL_RETRY(fcntl(fd, F_FULLFSYNC, 0), ret); + WT_SYSCALL_RETRY(fcntl(fd, F_FULLFSYNC, 0) == -1 ? -1 : 0, ret); if (ret == 0) return (0); /* @@ -107,7 +107,7 @@ __posix_directory_sync(WT_SESSION_IMPL *session, const char *path) } WT_SYSCALL_RETRY(( - (fd = open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret); + (fd = open(path, O_RDONLY, 0444)) == -1 ? -1 : 0), ret); if (ret != 0) WT_ERR_MSG(session, ret, "%s: directory-sync: open", path); @@ -172,14 +172,19 @@ __posix_file_remove(WT_SESSION_IMPL *session, const char *name) #endif WT_RET(__wt_filename(session, name, &path)); - name = path; - - WT_SYSCALL_RETRY(remove(name), ret); - if (ret != 0) - __wt_err(session, ret, "%s: file-remove: remove", name); + /* + * ISO C doesn't require remove return -1 on failure or set errno (note + * POSIX 1003.1 extends C with those requirements). Regardless, use the + * unlink system call, instead of remove, to simplify error handling; + * where we're not doing any special checking for standards compliance, + * using unlink may be marginally safer. + */ + WT_SYSCALL_RETRY(unlink(path), ret); __wt_free(session, path); - return (ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s: file-remove: unlink", name); } /* @@ -203,18 +208,22 @@ __posix_file_rename(WT_SESSION_IMPL *session, const char *from, const char *to) from_path = to_path = NULL; WT_ERR(__wt_filename(session, from, &from_path)); - from = from_path; WT_ERR(__wt_filename(session, to, &to_path)); - to = to_path; - WT_SYSCALL_RETRY(rename(from, to), ret); - if (ret != 0) - __wt_err(session, ret, - "%s to %s: file-rename: rename", from, to); + /* + * ISO C doesn't require rename return -1 on failure or set errno (note + * POSIX 1003.1 extends C with those requirements). Be cautious, force + * any non-zero return to -1 so we'll check errno. We can still end up + * with the wrong errno (if errno is garbage), or the generic WT_ERROR + * return (if errno is 0), but we've done the best we can. + */ + WT_SYSCALL_RETRY(rename(from_path, to_path) != 0 ? -1 : 0, ret); err: __wt_free(session, from_path); __wt_free(session, to_path); - return (ret); + if (ret == 0) + return (0); + WT_RET_MSG(session, ret, "%s to %s: file-rename: rename", from, to); } /* @@ -360,7 +369,7 @@ __posix_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) fl.l_type = lock ? F_WRLCK : F_UNLCK; fl.l_whence = SEEK_SET; - WT_SYSCALL_RETRY(fcntl(fh->fd, F_SETLK, &fl), ret); + WT_SYSCALL_RETRY(fcntl(fh->fd, F_SETLK, &fl) == -1 ? -1 : 0, ret); if (ret == 0) return (0); WT_RET_MSG(session, ret, "%s: handle-lock: fcntl", fh->name); @@ -560,7 +569,7 @@ __posix_handle_open(WT_SESSION_IMPL *session, f |= O_CLOEXEC; #endif WT_SYSCALL_RETRY(( - (fd = open(name, f, 0444)) == -1 ? 1 : 0), ret); + (fd = open(name, f, 0444)) == -1 ? -1 : 0), ret); if (ret != 0) WT_ERR_MSG(session, ret, "%s: handle-open: open", name); WT_ERR(__posix_handle_open_cloexec(session, fd, name)); @@ -622,7 +631,7 @@ __posix_handle_open(WT_SESSION_IMPL *session, #endif } - WT_SYSCALL_RETRY(((fd = open(name, f, mode)) == -1 ? 1 : 0), ret); + WT_SYSCALL_RETRY(((fd = open(name, f, mode)) == -1 ? -1 : 0), ret); if (ret != 0) WT_ERR_MSG(session, ret, direct_io ? diff --git a/src/third_party/wiredtiger/src/os_posix/os_map.c b/src/third_party/wiredtiger/src/os_posix/os_map.c index de28891ffd1..e161e268f6d 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_map.c +++ b/src/third_party/wiredtiger/src/os_posix/os_map.c @@ -98,6 +98,7 @@ __posix_map_preload_madvise( if (size <= (size_t)conn->page_size || (ret = posix_madvise(blk, size, POSIX_MADV_WILLNEED)) == 0) return (0); + WT_RET_MSG(session, ret, "%s: memory-map preload: posix_madvise: POSIX_MADV_WILLNEED", fh->name); @@ -145,6 +146,7 @@ __posix_map_discard_madvise( if ((ret = posix_madvise(blk, size, POSIX_MADV_DONTNEED)) == 0) return (0); + WT_RET_MSG(session, ret, "%s: memory-map discard: posix_madvise: POSIX_MADV_DONTNEED", fh->name); diff --git a/src/third_party/wiredtiger/src/os_win/os_fs.c b/src/third_party/wiredtiger/src/os_win/os_fs.c index 95c0ea40ce6..4ac613fc9f9 100644 --- a/src/third_party/wiredtiger/src/os_win/os_fs.c +++ b/src/third_party/wiredtiger/src/os_win/os_fs.c @@ -286,11 +286,6 @@ __win_handle_lock(WT_SESSION_IMPL *session, WT_FH *fh, bool lock) * WiredTiger requires this function be able to acquire locks past * the end of file. * - * Note we're using fcntl(2) locking: all fcntl locks associated with a - * file for a given process are removed when any file descriptor for the - * file is closed by the process, even if a lock was never requested for - * that file descriptor. - * * http://msdn.microsoft.com/ * en-us/library/windows/desktop/aa365202%28v=vs.85%29.aspx * diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index 2a826eda962..2f5609567da 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -43,6 +43,7 @@ static const char * const __stats_dsrc_desc[] = { "btree: pages rewritten by compaction", "btree: row-store internal pages", "btree: row-store leaf pages", + "cache: bytes currently in the cache", "cache: bytes read into cache", "cache: bytes written from cache", "cache: checkpoint blocked page eviction", @@ -172,6 +173,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats) stats->btree_compact_rewrite = 0; stats->btree_row_internal = 0; stats->btree_row_leaf = 0; + /* not clearing cache_bytes_inuse */ stats->cache_bytes_read = 0; stats->cache_bytes_write = 0; stats->cache_eviction_checkpoint = 0; @@ -298,6 +300,7 @@ __wt_stat_dsrc_aggregate_single( to->btree_compact_rewrite += from->btree_compact_rewrite; to->btree_row_internal += from->btree_row_internal; to->btree_row_leaf += from->btree_row_leaf; + to->cache_bytes_inuse += from->cache_bytes_inuse; to->cache_bytes_read += from->cache_bytes_read; to->cache_bytes_write += from->cache_bytes_write; to->cache_eviction_checkpoint += from->cache_eviction_checkpoint; @@ -430,6 +433,7 @@ __wt_stat_dsrc_aggregate( WT_STAT_READ(from, btree_compact_rewrite); to->btree_row_internal += WT_STAT_READ(from, btree_row_internal); to->btree_row_leaf += WT_STAT_READ(from, btree_row_leaf); + to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse); to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read); to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write); to->cache_eviction_checkpoint += @@ -551,6 +555,8 @@ static const char * const __stats_connection_desc[] = { "cache: eviction server unable to reach eviction goal", "cache: eviction worker thread evicting pages", "cache: failed eviction of pages that exceeded the in-memory maximum", + "cache: files with active eviction walks", + "cache: files with new eviction walks started", "cache: hazard pointer blocked page eviction", "cache: in-memory page passed criteria to be split", "cache: in-memory page splits", @@ -562,14 +568,18 @@ static const char * const __stats_connection_desc[] = { "cache: maximum bytes configured", "cache: maximum page size at eviction", "cache: modified pages evicted", + "cache: modified pages evicted by application threads", "cache: page split during eviction deepened the tree", "cache: page written requiring lookaside records", "cache: pages currently held in the cache", "cache: pages evicted because they exceeded the in-memory maximum", "cache: pages evicted because they had chains of deleted items", "cache: pages evicted by application threads", + "cache: pages queued for eviction", + "cache: pages queued for urgent eviction", "cache: pages read into cache", "cache: pages read into cache requiring lookaside entries", + "cache: pages seen by eviction walk", "cache: pages selected for eviction unable to be evicted", "cache: pages walked for eviction", "cache: pages written from cache", @@ -748,6 +758,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) stats->cache_eviction_slow = 0; stats->cache_eviction_worker_evicting = 0; stats->cache_eviction_force_fail = 0; + /* not clearing cache_eviction_walks_active */ + stats->cache_eviction_walks_started = 0; stats->cache_eviction_hazard = 0; stats->cache_inmem_splittable = 0; stats->cache_inmem_split = 0; @@ -759,14 +771,18 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats) /* not clearing cache_bytes_max */ /* not clearing cache_eviction_maximum_page_size */ stats->cache_eviction_dirty = 0; + stats->cache_eviction_app_dirty = 0; stats->cache_eviction_deepen = 0; stats->cache_write_lookaside = 0; /* not clearing cache_pages_inuse */ stats->cache_eviction_force = 0; stats->cache_eviction_force_delete = 0; stats->cache_eviction_app = 0; + stats->cache_eviction_pages_queued = 0; + stats->cache_eviction_pages_queued_oldest = 0; stats->cache_read = 0; stats->cache_read_lookaside = 0; + stats->cache_eviction_pages_seen = 0; stats->cache_eviction_fail = 0; stats->cache_eviction_walk = 0; stats->cache_write = 0; @@ -943,6 +959,10 @@ __wt_stat_connection_aggregate( WT_STAT_READ(from, cache_eviction_worker_evicting); to->cache_eviction_force_fail += WT_STAT_READ(from, cache_eviction_force_fail); + to->cache_eviction_walks_active += + WT_STAT_READ(from, cache_eviction_walks_active); + to->cache_eviction_walks_started += + WT_STAT_READ(from, cache_eviction_walks_started); to->cache_eviction_hazard += WT_STAT_READ(from, cache_eviction_hazard); to->cache_inmem_splittable += @@ -962,6 +982,8 @@ __wt_stat_connection_aggregate( to->cache_eviction_maximum_page_size += WT_STAT_READ(from, cache_eviction_maximum_page_size); to->cache_eviction_dirty += WT_STAT_READ(from, cache_eviction_dirty); + to->cache_eviction_app_dirty += + WT_STAT_READ(from, cache_eviction_app_dirty); to->cache_eviction_deepen += WT_STAT_READ(from, cache_eviction_deepen); to->cache_write_lookaside += @@ -971,8 +993,14 @@ __wt_stat_connection_aggregate( to->cache_eviction_force_delete += WT_STAT_READ(from, cache_eviction_force_delete); to->cache_eviction_app += WT_STAT_READ(from, cache_eviction_app); + to->cache_eviction_pages_queued += + WT_STAT_READ(from, cache_eviction_pages_queued); + to->cache_eviction_pages_queued_oldest += + WT_STAT_READ(from, cache_eviction_pages_queued_oldest); to->cache_read += WT_STAT_READ(from, cache_read); to->cache_read_lookaside += WT_STAT_READ(from, cache_read_lookaside); + to->cache_eviction_pages_seen += + WT_STAT_READ(from, cache_eviction_pages_seen); to->cache_eviction_fail += WT_STAT_READ(from, cache_eviction_fail); to->cache_eviction_walk += WT_STAT_READ(from, cache_eviction_walk); to->cache_write += WT_STAT_READ(from, cache_write); diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index ab1cd622057..9d5975b2bc5 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -257,7 +257,7 @@ __txn_oldest_scan(WT_SESSION_IMPL *session, * Sweep the running transactions to update the oldest ID required. */ int -__wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force) +__wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags) { WT_CONNECTION_IMPL *conn; WT_DECL_RET; @@ -265,9 +265,12 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force) WT_TXN_GLOBAL *txn_global; uint64_t current_id, last_running, oldest_id; uint64_t prev_last_running, prev_oldest_id; + bool strict, wait; conn = S2C(session); txn_global = &conn->txn_global; + strict = LF_ISSET(WT_TXN_OLDEST_STRICT); + wait = LF_ISSET(WT_TXN_OLDEST_WAIT); current_id = last_running = txn_global->current; prev_last_running = txn_global->last_running; @@ -278,11 +281,11 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force) * oldest ID isn't too far behind, avoid scanning. */ if (prev_oldest_id == current_id || - (!force && WT_TXNID_LT(current_id, prev_oldest_id + 100))) + (!strict && WT_TXNID_LT(current_id, prev_oldest_id + 100))) return (0); /* First do a read-only scan. */ - if (force) + if (wait) WT_RET(__wt_readlock(session, txn_global->scan_rwlock)); else if ((ret = __wt_try_readlock(session, txn_global->scan_rwlock)) != 0) @@ -295,13 +298,13 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force) * non-forced updates), give up. */ if ((oldest_id == prev_oldest_id || - (!force && WT_TXNID_LT(oldest_id, prev_oldest_id + 100))) && + (!strict && WT_TXNID_LT(oldest_id, prev_oldest_id + 100))) && ((last_running == prev_last_running) || - (!force && WT_TXNID_LT(last_running, prev_last_running + 100)))) + (!strict && WT_TXNID_LT(last_running, prev_last_running + 100)))) return (0); /* It looks like an update is necessary, wait for exclusive access. */ - if (force) + if (wait) WT_RET(__wt_writelock(session, txn_global->scan_rwlock)); else if ((ret = __wt_try_writelock(session, txn_global->scan_rwlock)) != 0) diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index c1b435d9897..5c0c55963a3 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -404,7 +404,8 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) * This is particularly important for compact, so that all dirty pages * can be fully written. */ - WT_ERR(__wt_txn_update_oldest(session, true)); + WT_ERR(__wt_txn_update_oldest( + session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT)); /* Flush data-sources before we start the checkpoint. */ WT_ERR(__checkpoint_data_source(session, cfg)); @@ -1284,7 +1285,8 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) * for active readers. */ if (!btree->modified && !bulk) { - WT_RET(__wt_txn_update_oldest(session, true)); + WT_RET(__wt_txn_update_oldest( + session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT)); return (__wt_txn_visible_all(session, btree->rec_max_txn) ? __wt_cache_op(session, WT_SYNC_DISCARD) : EBUSY); } diff --git a/src/third_party/wiredtiger/test/recovery/random-abort.c b/src/third_party/wiredtiger/test/recovery/random-abort.c index cd7d1b08708..2454aa6056b 100644 --- a/src/third_party/wiredtiger/test/recovery/random-abort.c +++ b/src/third_party/wiredtiger/test/recovery/random-abort.c @@ -36,7 +36,7 @@ #include <unistd.h> #endif -#include <wiredtiger.h> +#include <wt_internal.h> #include "test_util.i" @@ -44,7 +44,8 @@ static char home[512]; /* Program working dir */ static const char *progname; /* Program name */ static const char * const uri = "table:main"; -#define RECORDS_FILE "records" +#define NTHREADS 5 +#define RECORDS_FILE "records-%u" #define ENV_CONFIG \ "create,log=(file_max=10M,archive=false,enabled)," \ @@ -55,71 +56,66 @@ static const char * const uri = "table:main"; static void usage(void) { - fprintf(stderr, "usage: %s [-h dir]\n", progname); + fprintf(stderr, "usage: %s [-h dir] [-T threads]\n", progname); exit(EXIT_FAILURE); } +typedef struct { + WT_CONNECTION *conn; + uint64_t start; + uint32_t id; +} WT_THREAD_DATA; + /* * Child process creates the database and table, and then writes data into * the table until it is killed by the parent. */ -static void -fill_db(void) +static void * +thread_run(void *arg) { FILE *fp; - WT_CONNECTION *conn; WT_CURSOR *cursor; WT_ITEM data; WT_RAND_STATE rnd; WT_SESSION *session; + WT_THREAD_DATA *td; uint64_t i; int ret; - uint8_t buf[MAX_VAL]; + char buf[MAX_VAL], kname[64]; __wt_random_init(&rnd); memset(buf, 0, sizeof(buf)); - /* - * Initialize the first 25% to random values. Leave a bunch of data - * space at the end to emphasize zero data. - */ - for (i = 0; i < MAX_VAL/4; i++) - buf[i] = (uint8_t)__wt_random(&rnd); + memset(kname, 0, sizeof(kname)); + td = (WT_THREAD_DATA *)arg; /* - * Run in the home directory so that the records file is in there too. + * The value is the name of the record file with our id appended. */ - if (chdir(home) != 0) - testutil_die(errno, "chdir: %s", home); - if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn)) != 0) - testutil_die(ret, "wiredtiger_open"); - if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) - testutil_die(ret, "WT_CONNECTION:open_session"); - if ((ret = session->create(session, - uri, "key_format=Q,value_format=u")) != 0) - testutil_die(ret, "WT_SESSION.create: %s", uri); - if ((ret = - session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) - testutil_die(ret, "WT_SESSION.open_cursor: %s", uri); - + snprintf(buf, sizeof(buf), RECORDS_FILE, td->id); /* * Keep a separate file with the records we wrote for checking. */ - (void)unlink(RECORDS_FILE); - if ((fp = fopen(RECORDS_FILE, "w")) == NULL) + (void)unlink(buf); + if ((fp = fopen(buf, "w")) == NULL) testutil_die(errno, "fopen"); /* * Set to no buffering. */ __wt_stream_set_no_buffer(fp); - + if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0) + testutil_die(ret, "WT_CONNECTION:open_session"); + if ((ret = + session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) + testutil_die(ret, "WT_SESSION.open_cursor: %s", uri); + data.data = buf; + data.size = sizeof(buf); /* - * Write data into the table until we are killed by the parent. - * The data in the buffer is already set to random content. + * Write our portion of the key space until we're killed. */ - data.data = buf; - for (i = 0;; ++i) { + for (i = td->start; ; ++i) { + snprintf(kname, sizeof(kname), "%" PRIu64, i); data.size = __wt_random(&rnd) % MAX_VAL; - cursor->set_key(cursor, i); + cursor->set_key(cursor, kname); cursor->set_value(cursor, &data); if ((ret = cursor->insert(cursor)) != 0) testutil_die(ret, "WT_CURSOR.insert"); @@ -128,9 +124,62 @@ fill_db(void) */ if (fprintf(fp, "%" PRIu64 "\n", i) == -1) testutil_die(errno, "fprintf"); - if (i % 5000) - __wt_yield(); } + return (NULL); +} + +/* + * Child process creates the database and table, and then creates worker + * threads to add data until it is killed by the parent. + */ +static void fill_db(uint32_t) + WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn)); +static void +fill_db(uint32_t nth) +{ + pthread_t *thr; + WT_CONNECTION *conn; + WT_SESSION *session; + WT_THREAD_DATA *td; + uint32_t i; + int ret; + + thr = calloc(nth, sizeof(pthread_t)); + td = calloc(nth, sizeof(WT_THREAD_DATA)); + if (chdir(home) != 0) + testutil_die(errno, "Child chdir: %s", home); + if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn)) != 0) + testutil_die(ret, "wiredtiger_open"); + if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) + testutil_die(ret, "WT_CONNECTION:open_session"); + if ((ret = session->create(session, + uri, "key_format=S,value_format=u")) != 0) + testutil_die(ret, "WT_SESSION.create: %s", uri); + if ((ret = session->close(session, NULL)) != 0) + testutil_die(ret, "WT_SESSION:close"); + + for (i = 0; i < nth; ++i) { + td[i].conn = conn; + td[i].start = (UINT64_MAX / nth) * i; + td[i].id = i; + if ((ret = pthread_create( + &thr[i], NULL, thread_run, &td[i])) != 0) + testutil_die(ret, "pthread_create"); + } + printf("Spawned %" PRIu32 " writer threads\n", nth); + fflush(stdout); + /* + * The threads never exit, so the child will just wait here until + * it is killed. + */ + for (i = 0; i < nth; ++i) + pthread_join(thr[i], NULL); + /* + * NOTREACHED + */ + free(thr); + free(td); + exit(EXIT_SUCCESS); } extern int __wt_optind; @@ -147,23 +196,28 @@ main(int argc, char *argv[]) WT_SESSION *session; WT_RAND_STATE rnd; uint64_t key; - uint32_t absent, count, timeout; + uint32_t absent, count, i, nth, timeout; int ch, status, ret; pid_t pid; const char *working_dir; + char fname[64], kname[64]; if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL) progname = argv[0]; else ++progname; - working_dir = "WT_TEST.random-abort"; + working_dir = "WT_TEST.random-abort-many"; timeout = 10; - while ((ch = __wt_getopt(progname, argc, argv, "h:t:")) != EOF) + nth = NTHREADS; + while ((ch = __wt_getopt(progname, argc, argv, "h:T:t:")) != EOF) switch (ch) { case 'h': working_dir = __wt_optarg; break; + case 'T': + nth = (uint32_t)atoi(__wt_optarg); + break; case 't': timeout = (uint32_t)atoi(__wt_optarg); break; @@ -187,7 +241,7 @@ main(int argc, char *argv[]) testutil_die(errno, "fork"); if (pid == 0) { /* child */ - fill_db(); + fill_db(nth); return (EXIT_SUCCESS); } @@ -212,7 +266,7 @@ main(int argc, char *argv[]) * this is the place to do it. */ if (chdir(home) != 0) - testutil_die(errno, "chdir: %s", home); + testutil_die(errno, "parent chdir: %s", home); printf("Open database, run recovery and verify content\n"); if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn)) != 0) testutil_die(ret, "wiredtiger_open"); @@ -222,30 +276,35 @@ main(int argc, char *argv[]) session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) testutil_die(ret, "WT_SESSION.open_cursor: %s", uri); - if ((fp = fopen(RECORDS_FILE, "r")) == NULL) - testutil_die(errno, "fopen"); + absent = count = 0; + for (i = 0; i < nth; ++i) { + snprintf(fname, sizeof(fname), RECORDS_FILE, i); + if ((fp = fopen(fname, "r")) == NULL) + testutil_die(errno, "fopen"); - /* - * For every key in the saved file, verify that the key exists - * in the table after recovery. Since we did write-no-sync, we - * expect every key to have been recovered. - */ - for (absent = count = 0;; ++count) { - ret = fscanf(fp, "%" SCNu64 "\n", &key); - if (ret != EOF && ret != 1) - testutil_die(errno, "fscanf"); - if (ret == EOF) - break; - cursor->set_key(cursor, key); - if ((ret = cursor->search(cursor)) != 0) { - if (ret != WT_NOTFOUND) - testutil_die(ret, "search"); - printf("no record with key %" PRIu64 "\n", key); - ++absent; + /* + * For every key in the saved file, verify that the key exists + * in the table after recovery. Since we did write-no-sync, we + * expect every key to have been recovered. + */ + for (count = 0;; ++count) { + ret = fscanf(fp, "%" SCNu64 "\n", &key); + if (ret != EOF && ret != 1) + testutil_die(errno, "fscanf"); + if (ret == EOF) + break; + snprintf(kname, sizeof(kname), "%" PRIu64, key); + cursor->set_key(cursor, kname); + if ((ret = cursor->search(cursor)) != 0) { + if (ret != WT_NOTFOUND) + testutil_die(ret, "search"); + printf("no record with key %" PRIu64 "\n", key); + ++absent; + } } + if (fclose(fp) != 0) + testutil_die(errno, "fclose"); } - if (fclose(fp) != 0) - testutil_die(errno, "fclose"); if ((ret = conn->close(conn, NULL)) != 0) testutil_die(ret, "WT_CONNECTION:close"); if (absent) { diff --git a/src/third_party/wiredtiger/tools/wtstats/stat_data.py b/src/third_party/wiredtiger/tools/wtstats/stat_data.py index c75e4f194dd..7c42ab4d926 100644 --- a/src/third_party/wiredtiger/tools/wtstats/stat_data.py +++ b/src/third_party/wiredtiger/tools/wtstats/stat_data.py @@ -5,6 +5,7 @@ no_scale_per_second_list = [ 'async: maximum work queue length', 'cache: bytes currently in the cache', 'cache: eviction currently operating in aggressive mode', + 'cache: files with active eviction walks', 'cache: maximum bytes configured', 'cache: maximum page size at eviction', 'cache: pages currently held in the cache', @@ -59,6 +60,7 @@ no_scale_per_second_list = [ 'btree: overflow pages', 'btree: row-store internal pages', 'btree: row-store leaf pages', + 'cache: bytes currently in the cache', 'cache: overflow values cached in memory', 'LSM: bloom filters in the LSM tree', 'LSM: chunks in the LSM tree', @@ -71,6 +73,7 @@ no_clear_list = [ 'async: maximum work queue length', 'cache: bytes currently in the cache', 'cache: eviction currently operating in aggressive mode', + 'cache: files with active eviction walks', 'cache: maximum bytes configured', 'cache: maximum page size at eviction', 'cache: pages currently held in the cache', @@ -102,6 +105,7 @@ no_clear_list = [ 'transaction: transaction range of IDs currently pinned by a checkpoint', 'transaction: transaction range of IDs currently pinned by named snapshots', 'btree: btree checkpoint generation', + 'cache: bytes currently in the cache', 'session: open cursor count', ] prefix_list = [ |