diff options
author | Dan Pasette <dan@10gen.com> | 2015-01-27 05:48:08 -0500 |
---|---|---|
committer | Dan Pasette <dan@mongodb.com> | 2015-01-27 05:48:08 -0500 |
commit | e926b20df8bcb14985817d2b37c61e2f8889fb27 (patch) | |
tree | 9b717bb3965470297112fcdbf26dc0ece70721e2 /src/third_party | |
parent | ae258579edb7a9c6185a515bff5b57f8e80d0088 (diff) | |
download | mongo-e926b20df8bcb14985817d2b37c61e2f8889fb27.tar.gz |
Import wiredtiger-wiredtiger-mongodb-2.8-rc6-47-g5b3283e.tar.gz from wiredtiger branch mongodb-2.8
Diffstat (limited to 'src/third_party')
39 files changed, 509 insertions, 317 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py index 0141526285c..7754a3a1d13 100644 --- a/src/third_party/wiredtiger/dist/api_data.py +++ b/src/third_party/wiredtiger/dist/api_data.py @@ -309,8 +309,18 @@ connection_runtime_config = [ ]), Config('cache_size', '100MB', r''' maximum heap memory to allocate for the cache. A database should - configure either a cache_size or a shared_cache not both''', + configure either \c cache_size or \c shared_cache but not both''', min='1MB', max='10TB'), + Config('cache_overhead', '8', r''' + assume the heap allocator overhead is the specified percentage, and + adjust the cache size by that amount (for example, if the cache size is + 100GB, a percentage of 10 means WiredTiger limits itself to allocating + 90GB of memory). This value is configurable because different heap + allocators have different overhead and different workloads will have + different heap allocation sizes and patterns, therefore applications + may need to adjust this value based on allocator choice and behavior + in measured workloads''', + min='0', max='30'), Config('checkpoint', '', r''' periodically checkpoint the database''', type='category', subconfig=[ diff --git a/src/third_party/wiredtiger/dist/s_all b/src/third_party/wiredtiger/dist/s_all index 1b171bdeafd..0e584b938b9 100644 --- a/src/third_party/wiredtiger/dist/s_all +++ b/src/third_party/wiredtiger/dist/s_all @@ -69,6 +69,7 @@ run "sh ./s_copyright" "checking copyright notices" run "sh ./s_define" "checking for unused #defines" run "sh ./s_funcs" "checking for unused functions" run "sh ./s_getopt" "checking for incorrect getopt usage" +run "sh ./s_lang" "checking for SWIG generated name conflicts" run "sh ./s_longlines" "checking for long lines" run "sh ./s_stat" "checking for unused statistics fields" run "sh ./s_string" "checking string spelling" diff --git a/src/third_party/wiredtiger/dist/s_lang b/src/third_party/wiredtiger/dist/s_lang new file mode 100755 index 00000000000..0f0519f87e4 --- /dev/null +++ b/src/third_party/wiredtiger/dist/s_lang @@ -0,0 +1,25 @@ +#! /bin/sh + +# Check lang directories for potential name conflicts +t=__wt.$$ +trap 'rm -f $t; exit 0' 0 1 2 3 13 15 + +cd ../lang + +for d in *; do + f=`find $d -name 'wiredtiger_wrap.c'` + test -z "$f" && continue + + sed -e '/SWIGINTERN.*__wt_[a-z][a-z]*_[a-z]/!d' \ + -e '/__wt_[^(]*__.*(/d' \ + -e '/_wrap/d' \ + -e "/_${d}_/d" \ + $f > $t + + test -s $t && { + echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=" + echo "$l: potential SWIG naming conflict" + echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=" + cat $t + } +done diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok index 6c658df8bf0..cea96db2848 100644 --- a/src/third_party/wiredtiger/dist/s_string.ok +++ b/src/third_party/wiredtiger/dist/s_string.ok @@ -73,6 +73,7 @@ Crummey CustomersPhone DATAITEMs DECL +DECR DESC DHANDLE DLFCN @@ -397,6 +398,7 @@ agc alfred alloc allocator +allocators allocfile allocsize amd diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py index 69e8d2ed21e..bad65528521 100644 --- a/src/third_party/wiredtiger/dist/stat_data.py +++ b/src/third_party/wiredtiger/dist/stat_data.py @@ -182,7 +182,10 @@ connection_stats = [ 'eviction server unable to reach eviction goal'), CacheStat('cache_eviction_split', 'pages split during eviction'), CacheStat('cache_eviction_walk', 'pages walked for eviction'), + CacheStat('cache_eviction_worker_evicting', + 'eviction worker thread evicting pages'), CacheStat('cache_inmem_split', 'in-memory page splits'), + CacheStat('cache_overhead', 'percentage overhead', 'no_clear,no_scale'), CacheStat('cache_pages_dirty', 'tracked dirty pages in the cache', 'no_scale'), CacheStat('cache_pages_inuse', diff --git a/src/third_party/wiredtiger/lang/python/wiredtiger.i b/src/third_party/wiredtiger/lang/python/wiredtiger.i index bf726ceac0a..d35d7d5c456 100644 --- a/src/third_party/wiredtiger/lang/python/wiredtiger.i +++ b/src/third_party/wiredtiger/lang/python/wiredtiger.i @@ -157,7 +157,7 @@ from packing import pack, unpack %define DESTRUCTOR(class, method) %feature("shadow") class::method %{ def method(self, *args): - '''close(self, config) -> int + '''method(self, config) -> int @copydoc class::method''' try: @@ -171,6 +171,26 @@ DESTRUCTOR(__wt_connection, close) DESTRUCTOR(__wt_cursor, close) DESTRUCTOR(__wt_session, close) +/* + * OVERRIDE_METHOD must be used when overriding or extending an existing + * method in the C interface. It creates Python method() that calls + * _method(), which is the extended version of the method. This works + * around potential naming conflicts. Without this technique, for example, + * defining __wt_cursor::equals() creates the wrapper function + * __wt_cursor_equals(), which may be defined in the WT library. + */ +%define OVERRIDE_METHOD(cclass, pyclass, method, pyargs) +%extend cclass { +%pythoncode %{ + def method(self, *args): + '''method pyargs -> int + + @copydoc class::method''' + return self._##method(*args) +%} +}; +%enddef + /* Don't require empty config strings. */ %typemap(default) const char *config { $1 = NULL; } %typemap(default) WT_CURSOR *to_dup { $1 = NULL; } @@ -389,9 +409,9 @@ NOTFOUND_OK(__wt_cursor::remove) NOTFOUND_OK(__wt_cursor::search) NOTFOUND_OK(__wt_cursor::update) -COMPARE_OK(__wt_cursor::compare) -COMPARE_OK(__wt_cursor::equals) -COMPARE_NOTFOUND_OK(__wt_cursor::search_near) +COMPARE_OK(__wt_cursor::_compare) +COMPARE_OK(__wt_cursor::_equals) +COMPARE_NOTFOUND_OK(__wt_cursor::_search_near) /* Lastly, some methods need no (additional) error checking. */ %exception __wt_connection::get_home; @@ -428,6 +448,10 @@ COMPARE_NOTFOUND_OK(__wt_cursor::search_near) %ignore __wt_cursor::equals(WT_CURSOR *, WT_CURSOR *, int *); %ignore __wt_cursor::search_near(WT_CURSOR *, int *); +OVERRIDE_METHOD(__wt_cursor, WT_CURSOR, compare, (self, other)) +OVERRIDE_METHOD(__wt_cursor, WT_CURSOR, equals, (self, other)) +OVERRIDE_METHOD(__wt_cursor, WT_CURSOR, search_near, (self)) + /* SWIG magic to turn Python byte strings into data / size. */ %apply (char *STRING, int LENGTH) { (char *data, int size) }; @@ -685,7 +709,7 @@ typedef int int_void; } /* compare: special handling. */ - int compare(WT_CURSOR *other) { + int _compare(WT_CURSOR *other) { int cmp = 0; int ret = 0; if (other == NULL) { @@ -709,7 +733,7 @@ typedef int int_void; } /* equals: special handling. */ - int equals(WT_CURSOR *other) { + int _equals(WT_CURSOR *other) { int cmp = 0; int ret = 0; if (other == NULL) { @@ -728,7 +752,7 @@ typedef int int_void; } /* search_near: special handling. */ - int search_near() { + int _search_near() { int cmp = 0; int ret = $self->search_near($self, &cmp); /* @@ -828,7 +852,7 @@ typedef int int_void; }; %extend __wt_session { - int log_printf(const char *msg) { + int _log_printf(const char *msg) { return self->log_printf(self, "%s", msg); } @@ -892,6 +916,8 @@ int verbose_build(); %ignore __wt_connection::get_extension_api; %ignore __wt_session::log_printf; +OVERRIDE_METHOD(__wt_session, WT_SESSION, log_printf, (self, msg)) + %ignore wiredtiger_struct_pack; %ignore wiredtiger_struct_size; %ignore wiredtiger_struct_unpack; diff --git a/src/third_party/wiredtiger/src/block/block_compact.c b/src/third_party/wiredtiger/src/block/block_compact.c index 79494a274a9..a28b6d7a71e 100644 --- a/src/third_party/wiredtiger/src/block/block_compact.c +++ b/src/third_party/wiredtiger/src/block/block_compact.c @@ -50,6 +50,8 @@ __wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block) block->allocfirst = block->allocfirst_save; __wt_spin_unlock(session, &block->live_lock); + block->compact_pct_tenths = 0; + return (0); } @@ -64,7 +66,7 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, int *skipp) WT_EXT *ext; WT_EXTLIST *el; WT_FH *fh; - wt_off_t avail, ninety; + wt_off_t avail_eighty, avail_ninety, eighty, ninety; *skipp = 1; /* Return a default skip. */ @@ -84,31 +86,53 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, int *skipp) if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT)) WT_ERR(__block_dump_avail(session, block)); - /* Sum the number of available bytes in the first 90% of the file. */ - avail = 0; + /* Sum the available bytes in the first 80% and 90% of the file. */ + avail_eighty = avail_ninety = 0; ninety = fh->size - fh->size / 10; + eighty = fh->size - ((fh->size / 10) * 2); el = &block->live.avail; WT_EXT_FOREACH(ext, el->off) - if (ext->off < ninety) - avail += ext->size; - - /* - * If at least 10% of the total file is available and in the first 90% - * of the file, we'll try compaction. - */ - if (avail >= fh->size / 10) - *skipp = 0; + if (ext->off < ninety) { + avail_ninety += ext->size; + if (ext->off < eighty) + avail_eighty += ext->size; + } WT_ERR(__wt_verbose(session, WT_VERB_COMPACT, "%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first " - "90%% of the file, require 10%% or %" PRIuMAX "MB (%" PRIuMAX - ") to perform compaction, compaction %s", + "80%% of the file", + block->name, + (uintmax_t)avail_eighty / WT_MEGABYTE, (uintmax_t)avail_eighty)); + WT_ERR(__wt_verbose(session, WT_VERB_COMPACT, + "%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first " + "90%% of the file", + block->name, + (uintmax_t)avail_ninety / WT_MEGABYTE, (uintmax_t)avail_ninety)); + WT_ERR(__wt_verbose(session, WT_VERB_COMPACT, + "%s: require 10%% or %" PRIuMAX "MB (%" PRIuMAX ") in the first " + "90%% of the file to perform compaction, compaction %s", block->name, - (uintmax_t)avail / WT_MEGABYTE, (uintmax_t)avail, (uintmax_t)(fh->size / 10) / WT_MEGABYTE, (uintmax_t)fh->size / 10, *skipp ? "skipped" : "proceeding")); + /* + * If at least 20% of the total file is available and in the first 80% + * of the file, we'll try compaction on the last 20% of the file; else, + * if at least 10% of the total file is available and in the first 90% + * of the file, we'll try compaction on the last 10% of the file. + * + * We could push this further, but there's diminishing returns, a mostly + * empty file can be processed quickly, so more aggressive compaction is + * less useful. + */ + if (avail_ninety >= fh->size / 10) { + *skipp = 0; + block->compact_pct_tenths = 1; + if (avail_eighty >= ((fh->size / 10) * 2)) + block->compact_pct_tenths = 2; + } + err: __wt_spin_unlock(session, &block->live_lock); return (ret); @@ -126,7 +150,7 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session, WT_EXT *ext; WT_EXTLIST *el; WT_FH *fh; - wt_off_t ninety, offset; + wt_off_t limit, offset; uint32_t size, cksum; WT_UNUSED(addr_size); @@ -138,21 +162,24 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session, WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum)); /* - * If this block is in the last 10% of the file and there's a block on - * the available list that's in the first 90% of the file, rewrite the - * block. Checking the available list is necessary (otherwise writing - * the block would extend the file), but there's an obvious race if the - * file is sufficiently busy. + * If this block is in the chosen percentage of the file and there's a + * block on the available list that's appears before that percentage of + * the file, rewrite the block. Checking the available list is + * necessary (otherwise writing the block would extend the file), but + * there's an obvious race if the file is sufficiently busy. */ __wt_spin_lock(session, &block->live_lock); - ninety = fh->size - fh->size / 10; - if (offset > ninety) { + limit = fh->size - ((fh->size / 10) * block->compact_pct_tenths); + if (offset > limit) { el = &block->live.avail; - WT_EXT_FOREACH(ext, el->off) - if (ext->off < ninety && ext->size >= size) { + WT_EXT_FOREACH(ext, el->off) { + if (ext->off >= limit) + break; + if (ext->size >= size) { *skipp = 0; break; } + } } __wt_spin_unlock(session, &block->live_lock); diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c index 943a8573f29..f0414c4e855 100644 --- a/src/third_party/wiredtiger/src/btree/bt_handle.c +++ b/src/third_party/wiredtiger/src/btree/bt_handle.c @@ -659,7 +659,8 @@ __btree_page_sizes(WT_SESSION_IMPL *session) * cache, it may not have been set. */ WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval)); - btree->maxmempage = WT_MAX((uint64_t)cval.val, 50 * btree->maxleafpage); + btree->maxmempage = + WT_MAX((uint64_t)cval.val, 50 * (uint64_t)btree->maxleafpage); cache_size = S2C(session)->cache_size; if (cache_size > 0) btree->maxmempage = WT_MIN(btree->maxmempage, cache_size / 2); diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c index 94c73106d70..10112d46289 100644 --- a/src/third_party/wiredtiger/src/btree/bt_split.c +++ b/src/third_party/wiredtiger/src/btree/bt_split.c @@ -13,7 +13,7 @@ * estimate of allocation overhead to every object. */ #define WT_MEMSIZE_ADD(total, len) do { \ - total += (len) + WT_ALLOC_OVERHEAD; \ + total += (len); \ } while (0) #define WT_MEMSIZE_TRANSFER(from_decr, to_incr, len) do { \ WT_MEMSIZE_ADD(from_decr, len); \ @@ -728,14 +728,6 @@ __split_multi_inmem( */ page->modify->first_dirty_txn = WT_TXN_FIRST; - /* - * XXX Don't allow this page to be evicted immediately. - * - * In some cases involving forced eviction during truncates, a reader - * ends up looking at an evicted page. This is a temporary workaround. - */ - page->modify->inmem_split_txn = __wt_txn_new_id(session); - err: /* Free any resources that may have been cached in the cursor. */ WT_TRET(__wt_btcur_close(&cbt)); @@ -889,8 +881,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, for (i = 0, deleted_entries = 0; i < parent_entries; ++i) { next_ref = pindex->index[i]; WT_ASSERT(session, next_ref->state != WT_REF_SPLIT); - if (next_ref->state == WT_REF_DELETED && - next_ref->page_del == NULL && + if (__wt_delete_page_skip(session, next_ref) && WT_ATOMIC_CAS4(next_ref->state, WT_REF_DELETED, WT_REF_SPLIT)) deleted_entries++; @@ -986,6 +977,18 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, session, 0, ikey, size)); WT_MEMSIZE_ADD(parent_decr, size); } + /* + * The page_del structure can be freed + * immediately: it is only read when the ref + * state is WT_REF_DELETED. The size of the + * structures wasn't added to the parent: don't + * decrement. + */ + if (next_ref->page_del != NULL) { + __wt_free(session, + next_ref->page_del->update_list); + __wt_free(session, next_ref->page_del); + } } WT_TRET(__split_safe_free( diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c index ec44e8839b0..646551cdd38 100644 --- a/src/third_party/wiredtiger/src/config/config_def.c +++ b/src/third_party/wiredtiger/src/config/config_def.c @@ -82,6 +82,7 @@ static const WT_CONFIG_CHECK confchk_statistics_log_subconfigs[] = { static const WT_CONFIG_CHECK confchk_connection_reconfigure[] = { { "async", "category", NULL, confchk_async_subconfigs }, + { "cache_overhead", "int", "min=0,max=30", NULL }, { "cache_size", "int", "min=1MB,max=10TB", NULL }, { "checkpoint", "category", NULL, confchk_checkpoint_subconfigs }, @@ -326,6 +327,7 @@ static const WT_CONFIG_CHECK confchk_transaction_sync_subconfigs[] = { static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { { "async", "category", NULL, confchk_async_subconfigs }, { "buffer_alignment", "int", "min=-1,max=1MB", NULL }, + { "cache_overhead", "int", "min=0,max=30", NULL }, { "cache_size", "int", "min=1MB,max=10TB", NULL }, { "checkpoint", "category", NULL, confchk_checkpoint_subconfigs }, @@ -375,6 +377,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = { static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { { "async", "category", NULL, confchk_async_subconfigs }, { "buffer_alignment", "int", "min=-1,max=1MB", NULL }, + { "cache_overhead", "int", "min=0,max=30", NULL }, { "cache_size", "int", "min=1MB,max=10TB", NULL }, { "checkpoint", "category", NULL, confchk_checkpoint_subconfigs }, @@ -425,6 +428,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = { static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { { "async", "category", NULL, confchk_async_subconfigs }, { "buffer_alignment", "int", "min=-1,max=1MB", NULL }, + { "cache_overhead", "int", "min=0,max=30", NULL }, { "cache_size", "int", "min=1MB,max=10TB", NULL }, { "checkpoint", "category", NULL, confchk_checkpoint_subconfigs }, @@ -471,6 +475,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = { static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = { { "async", "category", NULL, confchk_async_subconfigs }, { "buffer_alignment", "int", "min=-1,max=1MB", NULL }, + { "cache_overhead", "int", "min=0,max=30", NULL }, { "cache_size", "int", "min=1MB,max=10TB", NULL }, { "checkpoint", "category", NULL, confchk_checkpoint_subconfigs }, @@ -552,14 +557,14 @@ static const WT_CONFIG_ENTRY config_entries[] = { confchk_connection_open_session }, { "connection.reconfigure", - "async=(enabled=0,ops_max=1024,threads=2),cache_size=100MB," - "checkpoint=(log_size=0,name=\"WiredTigerCheckpoint\",wait=0)," - "error_prefix=,eviction=(threads_max=1,threads_min=1)," - "eviction_dirty_target=80,eviction_target=80,eviction_trigger=95," - "lsm_manager=(merge=,worker_thread_max=4),lsm_merge=," - "shared_cache=(chunk=10MB,name=,reserve=0,size=500MB)," - "statistics=none,statistics_log=(on_close=0," - "path=\"WiredTigerStat.%d.%H\",sources=," + "async=(enabled=0,ops_max=1024,threads=2),cache_overhead=8," + "cache_size=100MB,checkpoint=(log_size=0," + "name=\"WiredTigerCheckpoint\",wait=0),error_prefix=," + "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80," + "eviction_target=80,eviction_trigger=95,lsm_manager=(merge=," + "worker_thread_max=4),lsm_merge=,shared_cache=(chunk=10MB,name=," + "reserve=0,size=500MB),statistics=none,statistics_log=(on_close=0" + ",path=\"WiredTigerStat.%d.%H\",sources=," "timestamp=\"%b %d %H:%M:%S\",wait=0),verbose=", confchk_connection_reconfigure }, @@ -672,7 +677,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "wiredtiger_open", "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1," - "cache_size=100MB,checkpoint=(log_size=0," + "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0," "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=," "config_base=,create=0,direct_io=,error_prefix=," "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80," @@ -690,7 +695,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "wiredtiger_open_all", "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1," - "cache_size=100MB,checkpoint=(log_size=0," + "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0," "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=," "config_base=,create=0,direct_io=,error_prefix=," "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80," @@ -708,7 +713,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "wiredtiger_open_basecfg", "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1," - "cache_size=100MB,checkpoint=(log_size=0," + "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0," "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=," "direct_io=,error_prefix=,eviction=(threads_max=1,threads_min=1)," "eviction_dirty_target=80,eviction_target=80,eviction_trigger=95," @@ -725,7 +730,7 @@ static const WT_CONFIG_ENTRY config_entries[] = { }, { "wiredtiger_open_usercfg", "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1," - "cache_size=100MB,checkpoint=(log_size=0," + "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0," "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=," "direct_io=,error_prefix=,eviction=(threads_max=1,threads_min=1)," "eviction_dirty_target=80,eviction_target=80,eviction_trigger=95," diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c index 91f82a5105b..b278d7a6b8a 100644 --- a/src/third_party/wiredtiger/src/conn/conn_cache.c +++ b/src/third_party/wiredtiger/src/conn/conn_cache.c @@ -29,6 +29,8 @@ __wt_cache_config(WT_SESSION_IMPL *session, const char *cfg[]) if (!F_ISSET(conn, WT_CONN_CACHE_POOL)) { WT_RET(__wt_config_gets(session, cfg, "cache_size", &cval)); conn->cache_size = (uint64_t)cval.val; + WT_RET(__wt_config_gets(session, cfg, "cache_overhead", &cval)); + conn->cache_overhead = (int)cval.val; } else { WT_RET(__wt_config_gets( session, cfg, "shared_cache.reserve", &cval)); @@ -142,6 +144,8 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session) WT_STAT_SET(stats, cache_bytes_max, conn->cache_size); WT_STAT_SET(stats, cache_bytes_inuse, __wt_cache_bytes_inuse(cache)); + + WT_STAT_SET(stats, cache_overhead, conn->cache_overhead); WT_STAT_SET(stats, cache_pages_inuse, __wt_cache_pages_inuse(cache)); WT_STAT_SET(stats, cache_bytes_dirty, cache->bytes_dirty); WT_STAT_SET(stats, diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c index f70a9a4a60c..abf21408d2d 100644 --- a/src/third_party/wiredtiger/src/conn/conn_log.c +++ b/src/third_party/wiredtiger/src/conn/conn_log.c @@ -444,6 +444,7 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_INIT_LSN(&log->write_lsn); log->fileid = 0; WT_RET(__wt_cond_alloc(session, "log sync", 0, &log->log_sync_cond)); + WT_RET(__wt_cond_alloc(session, "log write", 0, &log->log_write_cond)); WT_RET(__wt_log_open(session)); WT_RET(__wt_log_slot_init(session)); @@ -565,6 +566,7 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) WT_TRET(__wt_log_slot_destroy(session)); WT_TRET(__wt_cond_destroy(session, &conn->log->log_sync_cond)); + WT_TRET(__wt_cond_destroy(session, &conn->log->log_write_cond)); WT_TRET(__wt_rwlock_destroy(session, &conn->log->log_archive_lock)); __wt_spin_destroy(session, &conn->log->log_lock); __wt_spin_destroy(session, &conn->log->log_slot_lock); diff --git a/src/third_party/wiredtiger/src/cursor/cur_ds.c b/src/third_party/wiredtiger/src/cursor/cur_ds.c index cc3e23570d5..c58d6899150 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_ds.c +++ b/src/third_party/wiredtiger/src/cursor/cur_ds.c @@ -454,7 +454,7 @@ __wt_curds_open( __wt_cursor_set_key, /* set-key */ __wt_cursor_set_value, /* set-value */ __curds_compare, /* compare */ - __wt_cursor_equal, /* equals */ + __wt_cursor_equals, /* equals */ __curds_next, /* next */ __curds_prev, /* prev */ __curds_reset, /* reset */ diff --git a/src/third_party/wiredtiger/src/cursor/cur_log.c b/src/third_party/wiredtiger/src/cursor/cur_log.c index 2e2a2530df6..5de3762217f 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_log.c +++ b/src/third_party/wiredtiger/src/cursor/cur_log.c @@ -344,7 +344,7 @@ __wt_curlog_open(WT_SESSION_IMPL *session, __wt_cursor_set_key, /* set-key */ __wt_cursor_set_value, /* set-value */ __curlog_compare, /* compare */ - __wt_cursor_equal, /* equals */ + __wt_cursor_equals, /* equals */ __curlog_next, /* next */ __wt_cursor_notsup, /* prev */ __curlog_reset, /* reset */ diff --git a/src/third_party/wiredtiger/src/cursor/cur_metadata.c b/src/third_party/wiredtiger/src/cursor/cur_metadata.c index 31c96e3087a..9860eb65a55 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_metadata.c +++ b/src/third_party/wiredtiger/src/cursor/cur_metadata.c @@ -409,7 +409,7 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session, __wt_cursor_set_key, /* set-key */ __wt_cursor_set_value, /* set-value */ __curmetadata_compare, /* compare */ - __wt_cursor_equal, /* equals */ + __wt_cursor_equals, /* equals */ __curmetadata_next, /* next */ __curmetadata_prev, /* prev */ __curmetadata_reset, /* reset */ diff --git a/src/third_party/wiredtiger/src/cursor/cur_std.c b/src/third_party/wiredtiger/src/cursor/cur_std.c index df38eb9e57d..858c6af6853 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_std.c +++ b/src/third_party/wiredtiger/src/cursor/cur_std.c @@ -480,11 +480,11 @@ __wt_cursor_close(WT_CURSOR *cursor) } /* - * __wt_cursor_equal -- + * __wt_cursor_equals -- * WT_CURSOR->equals default implementation. */ int -__wt_cursor_equal(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) +__wt_cursor_equals(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) { WT_DECL_RET; WT_SESSION_IMPL *session; diff --git a/src/third_party/wiredtiger/src/cursor/cur_table.c b/src/third_party/wiredtiger/src/cursor/cur_table.c index f8e8625b0bd..a046b2b1ed9 100644 --- a/src/third_party/wiredtiger/src/cursor/cur_table.c +++ b/src/third_party/wiredtiger/src/cursor/cur_table.c @@ -839,7 +839,7 @@ __wt_curtable_open(WT_SESSION_IMPL *session, __wt_curtable_set_key, /* set-key */ __wt_curtable_set_value, /* set-value */ __curtable_compare, /* compare */ - __wt_cursor_equal, /* equals */ + __wt_cursor_equals, /* equals */ __curtable_next, /* next */ __curtable_prev, /* prev */ __curtable_reset, /* reset */ diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index a4ae0aaf55b..384ec9be5b3 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -362,7 +362,7 @@ __evict_worker(void *arg) WT_ERR(__wt_cond_wait( session, cache->evict_waiter_cond, 10000)); else - WT_ERR(__evict_lru_pages(session, 1)); + WT_ERR(__evict_lru_pages(session, 0)); } WT_ERR(__wt_verbose( session, WT_VERB_EVICTSERVER, "cache eviction worker exiting")); @@ -704,7 +704,7 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session) * Get pages from the LRU queue to evict. */ static int -__evict_lru_pages(WT_SESSION_IMPL *session, int is_app) +__evict_lru_pages(WT_SESSION_IMPL *session, int is_server) { WT_DECL_RET; @@ -712,7 +712,7 @@ __evict_lru_pages(WT_SESSION_IMPL *session, int is_app) * Reconcile and discard some pages: EBUSY is returned if a page fails * eviction because it's unavailable, continue in that case. */ - while ((ret = __wt_evict_lru_page(session, is_app)) == 0 || + while ((ret = __wt_evict_lru_page(session, is_server)) == 0 || ret == EBUSY) ; return (ret == WT_NOTFOUND ? 0 : ret); @@ -822,10 +822,8 @@ __evict_server_work(WT_SESSION_IMPL *session) if (cache->evict_candidates > 10 && cache->evict_current != NULL) __wt_yield(); - } else { - WT_STAT_FAST_CONN_INCR(session, cache_eviction_server_evicting); - WT_RET(__evict_lru_pages(session, 0)); - } + } else + WT_RET(__evict_lru_pages(session, 1)); return (0); } @@ -1185,7 +1183,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags) */ static int __evict_get_ref( - WT_SESSION_IMPL *session, int is_app, WT_BTREE **btreep, WT_REF **refp) + WT_SESSION_IMPL *session, int is_server, WT_BTREE **btreep, WT_REF **refp) { WT_CACHE *cache; WT_EVICT_ENTRY *evict; @@ -1197,18 +1195,6 @@ __evict_get_ref( *refp = NULL; /* - * A pathological case: if we're the oldest transaction in the system - * and the eviction server is stuck trying to find space, abort the - * transaction to give up all hazard pointers before trying again. - */ - if (is_app && F_ISSET(cache, WT_EVICT_STUCK) && - __wt_txn_am_oldest(session)) { - F_CLR(cache, WT_EVICT_STUCK); - WT_STAT_FAST_CONN_INCR(session, txn_fail_cache); - return (WT_ROLLBACK); - } - - /* * Avoid the LRU lock if no pages are available. If there are pages * available, spin until we get the lock. If this function returns * without getting a page to evict, application threads assume there @@ -1228,7 +1214,7 @@ __evict_get_ref( * looking for more. */ candidates = cache->evict_candidates; - if (!is_app && candidates > 1) + if (is_server && candidates > 1) candidates /= 2; /* Get the next page queued for eviction. */ @@ -1280,7 +1266,7 @@ __evict_get_ref( * Called by both eviction and application threads to evict a page. */ int -__wt_evict_lru_page(WT_SESSION_IMPL *session, int is_app) +__wt_evict_lru_page(WT_SESSION_IMPL *session, int is_server) { WT_BTREE *btree; WT_CACHE *cache; @@ -1288,13 +1274,24 @@ __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_app) WT_PAGE *page; WT_REF *ref; - if (is_app) - WT_STAT_FAST_CONN_INCR(session, cache_eviction_app); - - WT_RET(__evict_get_ref(session, is_app, &btree, &ref)); + WT_RET(__evict_get_ref(session, is_server, &btree, &ref)); WT_ASSERT(session, ref->state == WT_REF_LOCKED); /* + * An internal session flags either the server itself or an eviction + * worker thread. + */ + if (F_ISSET(session, WT_SESSION_INTERNAL)) { + if (is_server) + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_server_evicting); + else + WT_STAT_FAST_CONN_INCR( + session, cache_eviction_worker_evicting); + } else + WT_STAT_FAST_CONN_INCR(session, cache_eviction_app); + + /* * In case something goes wrong, don't pick the same set of pages every * time. * @@ -1308,7 +1305,6 @@ __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_app) page->read_gen = __wt_cache_read_gen_set(session); WT_WITH_BTREE(session, btree, ret = __wt_evict_page(session, ref)); - WT_ASSERT(session, is_app || session->split_gen == 0); (void)WT_ATOMIC_SUB4(btree->evict_busy, 1); @@ -1321,6 +1317,94 @@ __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_app) return (ret); } +/* + * __wt_cache_wait -- + * Wait for space in the cache. + */ +int +__wt_cache_wait(WT_SESSION_IMPL *session, int full) +{ + WT_CACHE *cache; + WT_DECL_RET; + WT_TXN_GLOBAL *txn_global; + WT_TXN_STATE *txn_state; + int busy, count; + + cache = S2C(session)->cache; + + /* + * If the current transaction is keeping the oldest ID pinned, it is in + * the middle of an operation. This may prevent the oldest ID from + * moving forward, leading to deadlock, so only evict what we can. + * Otherwise, we are at a transaction boundary and we can work harder + * to make sure there is free space in the cache. + */ + txn_global = &S2C(session)->txn_global; + txn_state = &txn_global->states[session->id]; + busy = txn_state->id != WT_TXN_NONE || + session->nhazard > 0 || + (txn_state->snap_min != WT_TXN_NONE && + txn_global->current != txn_global->oldest_id); + if (busy && full < 100) + return (0); + count = busy ? 1 : 10; + + for (;;) { + /* + * A pathological case: if we're the oldest transaction in the + * system and the eviction server is stuck trying to find space, + * abort the transaction to give up all hazard pointers before + * trying again. + */ + if (F_ISSET(cache, WT_EVICT_STUCK) && + __wt_txn_am_oldest(session)) { + F_CLR(cache, WT_EVICT_STUCK); + WT_STAT_FAST_CONN_INCR(session, txn_fail_cache); + return (WT_ROLLBACK); + } + + switch (ret = __wt_evict_lru_page(session, 0)) { + case 0: + if (--count == 0) + return (0); + break; + case EBUSY: + continue; + case WT_NOTFOUND: + break; + default: + return (ret); + } + + WT_RET(__wt_eviction_check(session, &full, 0)); + if (full < 100) + return (0); + else if (ret == 0) + continue; + + /* + * The cache is still full and no pages were found in the queue + * to evict. If this transaction is the one holding back the + * oldest ID, we can't wait forever. We'll block next time we + * are not busy. + */ + if (busy) { + __wt_txn_update_oldest(session); + if (txn_state->id == txn_global->oldest_id || + txn_state->snap_min == txn_global->oldest_id) + return (0); + } + + /* Wait for the queue to re-populate before trying again. */ + WT_RET(__wt_cond_wait(session, + S2C(session)->cache->evict_waiter_cond, 100000)); + + /* Check if things have changed so that we are busy. */ + if (!busy && txn_state->snap_min != WT_TXN_NONE && + txn_global->current != txn_global->oldest_id) + busy = count = 1; + } +} #ifdef HAVE_DIAGNOSTIC /* * __wt_cache_dump -- diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c index 99e2a6751be..5bbf3b891f7 100644 --- a/src/third_party/wiredtiger/src/evict/evict_page.c +++ b/src/third_party/wiredtiger/src/evict/evict_page.c @@ -413,12 +413,11 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, * Check for an append-only workload needing an in-memory split. * * We can't do this earlier because in-memory splits require exclusive - * access. If an in-memory split completes, the page stays in memory - * and the tree is left in the desired state: avoid the usual cleanup. + * access, and we can't split if a checkpoint is in progress because + * the checkpoint could be walking the parent page. * - * Attempt the split before checking whether a checkpoint is running - - * that's not a problem here because we aren't evicting any dirty - * pages. + * If an in-memory split completes, the page stays in memory and the + * tree is left in the desired state: avoid the usual cleanup. */ if (top && !exclusive) { WT_RET(__wt_split_insert(session, ref, inmem_splitp)); diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h index f4677236d8b..9e737dc81d2 100644 --- a/src/third_party/wiredtiger/src/include/block.h +++ b/src/third_party/wiredtiger/src/include/block.h @@ -217,7 +217,6 @@ struct __wt_block { /* Configuration information, set when the file is opened. */ int allocfirst; /* Allocation is first-fit */ - int allocfirst_save; /* Allocation is first-fit, saved */ uint32_t allocsize; /* Allocation size */ size_t os_cache; /* System buffer cache flush max */ size_t os_cache_max; @@ -237,6 +236,10 @@ struct __wt_block { WT_BLOCK_CKPT live; /* Live checkpoint */ int ckpt_inprogress;/* Live checkpoint in progress */ + /* Compaction support */ + int allocfirst_save; /* Saved: allocation is first-fit */ + int compact_pct_tenths; /* Percent to compact */ + /* Salvage support */ wt_off_t slvg_off; /* Salvage file offset */ diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 7b3f6a10403..f64e80e7d12 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -27,39 +27,6 @@ __wt_page_is_modified(WT_PAGE *page) } /* - * Estimate the per-allocation overhead. All implementations of malloc / free - * have some kind of header and pad for alignment. We can't know for sure what - * that adds up to, but this is an estimate based on some measurements of heap - * size versus bytes in use. - */ -#define WT_ALLOC_OVERHEAD 32U - -/* - * Track a field in the cache. Use atomic CAS so that we can reliably avoid - * decrementing the cache below zero - since we use an unsigned number. - * Track if we would go below zero in a diagnostic build - something has gone - * wrong. - */ -#ifdef HAVE_DIAGNOSTIC -#define WT_CACHE_DECR(session, f, sz) do { \ - uint64_t __val = f; \ - uint64_t __sz = WT_MIN(__val, sz); \ - if (__sz < sz) \ - __wt_errx(session, "%s underflow: decrementing %" \ - WT_SIZET_FMT, #f, sz); \ - while (!WT_ATOMIC_CAS8(f, __val, __val - __sz)) \ - __val = f, __sz = WT_MIN(__val, __sz); \ -} while (0) -#else -#define WT_CACHE_DECR(session, f, sz) do { \ - uint64_t __val = f; \ - uint64_t __sz = WT_MIN(__val, sz); \ - while (!WT_ATOMIC_CAS8(f, __val, __val - __sz)) \ - __val = f, __sz = WT_MIN(__val, __sz); \ -} while (0) -#endif - -/* * __wt_cache_page_inmem_incr -- * Increment a page's memory footprint in the cache. */ @@ -68,8 +35,6 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) { WT_CACHE *cache; - size += WT_ALLOC_OVERHEAD; - cache = S2C(session)->cache; (void)WT_ATOMIC_ADD8(cache->bytes_inmem, size); (void)WT_ATOMIC_ADD8(page->memory_footprint, size); @@ -79,6 +44,34 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) } } +/* + * WT_CACHE_DECR -- + * Macro to decrement a field by a size. + * + * Be defensive and don't underflow: a band-aid on a gaping wound, but underflow + * won't make things better no matter the problem (specifically, underflow makes + * eviction crazy trying to evict non-existent memory). + */ +#ifdef HAVE_DIAGNOSTIC +#define WT_CACHE_DECR(session, f, sz) do { \ + static int __first = 1; \ + if (WT_ATOMIC_SUB8(f, sz) > WT_EXABYTE) { \ + (void)WT_ATOMIC_ADD8(f, sz); \ + if (__first) { \ + __wt_errx(session, \ + "%s underflow: decrementing %" WT_SIZET_FMT,\ + #f, sz); \ + __first = 0; \ + } \ + } \ +} while (0) +#else +#define WT_CACHE_DECR(s, f, sz) do { \ + if (WT_ATOMIC_SUB8(f, sz) > WT_EXABYTE) \ + (void)WT_ATOMIC_ADD8(f, sz); \ +} while (0) +#endif + /* * __wt_cache_page_inmem_decr -- * Decrement a page's memory footprint in the cache. @@ -88,9 +81,8 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size) { WT_CACHE *cache; - size += WT_ALLOC_OVERHEAD; - cache = S2C(session)->cache; + WT_CACHE_DECR(session, cache->bytes_inmem, size); WT_CACHE_DECR(session, page->memory_footprint, size); if (__wt_page_is_modified(page)) { diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i index ee969255241..0295451ef11 100644 --- a/src/third_party/wiredtiger/src/include/cache.i +++ b/src/third_party/wiredtiger/src/include/cache.i @@ -90,6 +90,10 @@ __wt_eviction_check(WT_SESSION_IMPL *session, int *fullp, int wake) dirty_inuse = cache->bytes_dirty; bytes_max = conn->cache_size + 1; + /* Adjust the cache size to take allocation overhead into account. */ + if (conn->cache_overhead != 0) + bytes_max -= (bytes_max * (uint64_t)conn->cache_overhead) / 100; + /* Calculate the cache full percentage. */ *fullp = (int)((100 * bytes_inuse) / bytes_max); @@ -98,6 +102,7 @@ __wt_eviction_check(WT_SESSION_IMPL *session, int *fullp, int wake) (bytes_inuse > (cache->eviction_trigger * bytes_max) / 100 || dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100)) WT_RET(__wt_evict_server_wake(session)); + return (0); } @@ -136,10 +141,7 @@ static inline int __wt_cache_full_check(WT_SESSION_IMPL *session) { WT_BTREE *btree; - WT_DECL_RET; - WT_TXN_GLOBAL *txn_global; - WT_TXN_STATE *txn_state; - int busy, count, full; + int full; /* * LSM sets the no-cache-check flag when holding the LSM tree lock, in @@ -162,73 +164,15 @@ __wt_cache_full_check(WT_SESSION_IMPL *session) * Only wake the eviction server the first time through here (if the * cache is too full). * - * If the cache is less than 95% full, no work to be done. + * If the cache is less than 95% full, no work to be done. If we are + * at the API boundary and the cache is more than 95% full, try to + * evict at least one page before we start an operation. This helps + * with some eviction-dominated workloads. */ WT_RET(__wt_eviction_check(session, &full, 1)); if (full < 95) return (0); - /* - * If we are at the API boundary and the cache is more than 95% full, - * try to evict at least one page before we start an operation. This - * helps with some eviction-dominated workloads. - * - * If the current transaction is keeping the oldest ID pinned, it is in - * the middle of an operation. This may prevent the oldest ID from - * moving forward, leading to deadlock, so only evict what we can. - * Otherwise, we are at a transaction boundary and we can work harder - * to make sure there is free space in the cache. - */ - txn_global = &S2C(session)->txn_global; - txn_state = &txn_global->states[session->id]; - busy = txn_state->id != WT_TXN_NONE || - session->nhazard > 0 || - (txn_state->snap_min != WT_TXN_NONE && - txn_global->current != txn_global->oldest_id); - if (busy && full < 100) - return (0); - count = busy ? 1 : 10; - - for (;;) { - switch (ret = __wt_evict_lru_page(session, 1)) { - case 0: - if (--count == 0) - return (0); - break; - case EBUSY: - continue; - case WT_NOTFOUND: - break; - default: - return (ret); - } - - WT_RET(__wt_eviction_check(session, &full, 0)); - if (full < 100) - return (0); - else if (ret == 0) - continue; - - /* - * The cache is still full and no pages were found in the queue - * to evict. If this transaction is the one holding back the - * oldest ID, we can't wait forever. We'll block next time we - * are not busy. - */ - if (busy) { - __wt_txn_update_oldest(session); - if (txn_state->id == txn_global->oldest_id || - txn_state->snap_min == txn_global->oldest_id) - return (0); - } - - /* Wait for the queue to re-populate before trying again. */ - WT_RET(__wt_cond_wait(session, - S2C(session)->cache->evict_waiter_cond, 100000)); - - /* Check if things have changed so that we are busy. */ - if (!busy && txn_state->snap_min != WT_TXN_NONE && - txn_global->current != txn_global->oldest_id) - busy = count = 1; - } + return (__wt_cache_wait(session, full)); } + diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index c5723882489..ff34b014ecf 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -196,7 +196,8 @@ struct __wt_connection_impl { uint32_t hazard_max; /* Hazard array size */ WT_CACHE *cache; /* Page cache */ - uint64_t cache_size; + int cache_overhead; /* Cache percent adjustment */ + uint64_t cache_size; /* Configured cache size */ WT_TXN_GLOBAL txn_global; /* Global transaction state */ diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 35b8dfc113e..ee9c27581c8 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -280,7 +280,7 @@ extern int __wt_cursor_get_valuev(WT_CURSOR *cursor, va_list ap); extern void __wt_cursor_set_value(WT_CURSOR *cursor, ...); extern void __wt_cursor_set_valuev(WT_CURSOR *cursor, va_list ap); extern int __wt_cursor_close(WT_CURSOR *cursor); -extern int __wt_cursor_equal(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp); +extern int __wt_cursor_equals(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp); extern int __wt_cursor_reconfigure(WT_CURSOR *cursor, const char *config); extern int __wt_cursor_dup_position(WT_CURSOR *to_dup, WT_CURSOR *cursor); extern int __wt_cursor_init(WT_CURSOR *cursor, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp); @@ -298,7 +298,8 @@ extern int __wt_evict_destroy(WT_SESSION_IMPL *session); extern int __wt_evict_page(WT_SESSION_IMPL *session, WT_REF *ref); extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session); extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session); -extern int __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_app); +extern int __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_server); +extern int __wt_cache_wait(WT_SESSION_IMPL *session, int full); extern void __wt_cache_dump(WT_SESSION_IMPL *session); extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive); extern void __wt_rec_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref); diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h index 17a3fd5a23a..f88a5381227 100644 --- a/src/third_party/wiredtiger/src/include/log.h +++ b/src/third_party/wiredtiger/src/include/log.h @@ -135,6 +135,8 @@ typedef struct { /* Notify any waiting threads when sync_lsn is updated. */ WT_CONDVAR *log_sync_cond; + /* Notify any waiting threads when write_lsn is updated. */ + WT_CONDVAR *log_write_cond; /* * Consolidation array information diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h index 97fc2a47f20..71d6e802fa8 100644 --- a/src/third_party/wiredtiger/src/include/misc.h +++ b/src/third_party/wiredtiger/src/include/misc.h @@ -21,6 +21,7 @@ #define WT_GIGABYTE (1073741824) #define WT_TERABYTE ((uint64_t)1099511627776) #define WT_PETABYTE ((uint64_t)1125899906842624) +#define WT_EXABYTE ((uint64_t)1152921504606846976) /* * Number of directory entries can grow dynamically. diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h index 6efb9970065..62917f303fd 100644 --- a/src/third_party/wiredtiger/src/include/stat.h +++ b/src/third_party/wiredtiger/src/include/stat.h @@ -176,7 +176,9 @@ struct __wt_connection_stats { WT_STATS cache_eviction_slow; WT_STATS cache_eviction_split; WT_STATS cache_eviction_walk; + WT_STATS cache_eviction_worker_evicting; WT_STATS cache_inmem_split; + WT_STATS cache_overhead; WT_STATS cache_pages_dirty; WT_STATS cache_pages_inuse; WT_STATS cache_read; diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index 656181790ed..f5f9b662e6f 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -382,11 +382,8 @@ __wt_txn_am_oldest(WT_SESSION_IMPL *session) return (0); WT_ORDERED_READ(session_cnt, conn->session_cnt); - for (i = 0, s = txn_global->states; - i < session_cnt; - i++, s++) - if ((id = s->id) != WT_TXN_NONE && - TXNID_LT(id, txn->id)) + for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) + if ((id = s->id) != WT_TXN_NONE && TXNID_LT(id, txn->id)) return (0); return (1); diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in index 982e850241b..21a4a657506 100644 --- a/src/third_party/wiredtiger/src/include/wiredtiger.in +++ b/src/third_party/wiredtiger/src/include/wiredtiger.in @@ -1537,9 +1537,18 @@ struct __wt_connection { * number of worker threads to service asynchronous requests., an * integer between 1 and 20; default \c 2.} * @config{ ),,} + * @config{cache_overhead, assume the heap allocator overhead is the + * specified percentage\, and adjust the cache size by that amount (for + * example\, if the cache size is 100GB\, a percentage of 10 means + * WiredTiger limits itself to allocating 90GB of memory). This value is + * configurable because different heap allocators have different + * overhead and different workloads will have different heap allocation + * sizes and patterns\, therefore applications may need to adjust this + * value based on allocator choice and behavior in measured workloads., + * an integer between 0 and 30; default \c 8.} * @config{cache_size, maximum heap memory to allocate for the cache. A - * database should configure either a cache_size or a shared_cache not - * both., an integer between 1MB and 10TB; default \c 100MB.} + * database should configure either \c cache_size or \c shared_cache but + * not both., an integer between 1MB and 10TB; default \c 100MB.} * @config{checkpoint = (, periodically checkpoint the database., a set * of related configuration options defined below.} * @config{ log_size, wait for this amount of log @@ -1864,9 +1873,17 @@ struct __wt_connection { * I/O. The default value of -1 indicates a platform-specific alignment value * should be used (4KB on Linux systems\, zero elsewhere)., an integer between * -1 and 1MB; default \c -1.} + * @config{cache_overhead, assume the heap allocator overhead is the specified + * percentage\, and adjust the cache size by that amount (for example\, if the + * cache size is 100GB\, a percentage of 10 means WiredTiger limits itself to + * allocating 90GB of memory). This value is configurable because different heap + * allocators have different overhead and different workloads will have + * different heap allocation sizes and patterns\, therefore applications may + * need to adjust this value based on allocator choice and behavior in measured + * workloads., an integer between 0 and 30; default \c 8.} * @config{cache_size, maximum heap memory to allocate for the cache. A - * database should configure either a cache_size or a shared_cache not both., an - * integer between 1MB and 10TB; default \c 100MB.} + * database should configure either \c cache_size or \c shared_cache but not + * both., an integer between 1MB and 10TB; default \c 100MB.} * @config{checkpoint = (, periodically checkpoint the database., a set of * related configuration options defined below.} * @config{ log_size, wait for this amount of log record @@ -3225,184 +3242,188 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection); #define WT_STAT_CONN_CACHE_EVICTION_SPLIT 1042 /*! cache: pages walked for eviction */ #define WT_STAT_CONN_CACHE_EVICTION_WALK 1043 +/*! cache: eviction worker thread evicting pages */ +#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1044 /*! cache: in-memory page splits */ -#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1044 +#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1045 +/*! cache: percentage overhead */ +#define WT_STAT_CONN_CACHE_OVERHEAD 1046 /*! cache: tracked dirty pages in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1045 +#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1047 /*! cache: pages currently held in the cache */ -#define WT_STAT_CONN_CACHE_PAGES_INUSE 1046 +#define WT_STAT_CONN_CACHE_PAGES_INUSE 1048 /*! cache: pages read into cache */ -#define WT_STAT_CONN_CACHE_READ 1047 +#define WT_STAT_CONN_CACHE_READ 1049 /*! cache: pages written from cache */ -#define WT_STAT_CONN_CACHE_WRITE 1048 +#define WT_STAT_CONN_CACHE_WRITE 1050 /*! connection: pthread mutex condition wait calls */ -#define WT_STAT_CONN_COND_WAIT 1049 +#define WT_STAT_CONN_COND_WAIT 1051 /*! cursor: cursor create calls */ -#define WT_STAT_CONN_CURSOR_CREATE 1050 +#define WT_STAT_CONN_CURSOR_CREATE 1052 /*! cursor: cursor insert calls */ -#define WT_STAT_CONN_CURSOR_INSERT 1051 +#define WT_STAT_CONN_CURSOR_INSERT 1053 /*! cursor: cursor next calls */ -#define WT_STAT_CONN_CURSOR_NEXT 1052 +#define WT_STAT_CONN_CURSOR_NEXT 1054 /*! cursor: cursor prev calls */ -#define WT_STAT_CONN_CURSOR_PREV 1053 +#define WT_STAT_CONN_CURSOR_PREV 1055 /*! cursor: cursor remove calls */ -#define WT_STAT_CONN_CURSOR_REMOVE 1054 +#define WT_STAT_CONN_CURSOR_REMOVE 1056 /*! cursor: cursor reset calls */ -#define WT_STAT_CONN_CURSOR_RESET 1055 +#define WT_STAT_CONN_CURSOR_RESET 1057 /*! cursor: cursor search calls */ -#define WT_STAT_CONN_CURSOR_SEARCH 1056 +#define WT_STAT_CONN_CURSOR_SEARCH 1058 /*! cursor: cursor search near calls */ -#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1057 +#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1059 /*! cursor: cursor update calls */ -#define WT_STAT_CONN_CURSOR_UPDATE 1058 +#define WT_STAT_CONN_CURSOR_UPDATE 1060 /*! data-handle: connection dhandles swept */ -#define WT_STAT_CONN_DH_CONN_HANDLES 1059 +#define WT_STAT_CONN_DH_CONN_HANDLES 1061 /*! data-handle: connection candidate referenced */ -#define WT_STAT_CONN_DH_CONN_REF 1060 +#define WT_STAT_CONN_DH_CONN_REF 1062 /*! data-handle: connection sweeps */ -#define WT_STAT_CONN_DH_CONN_SWEEPS 1061 +#define WT_STAT_CONN_DH_CONN_SWEEPS 1063 /*! data-handle: connection time-of-death sets */ -#define WT_STAT_CONN_DH_CONN_TOD 1062 +#define WT_STAT_CONN_DH_CONN_TOD 1064 /*! data-handle: session dhandles swept */ -#define WT_STAT_CONN_DH_SESSION_HANDLES 1063 +#define WT_STAT_CONN_DH_SESSION_HANDLES 1065 /*! data-handle: session sweep attempts */ -#define WT_STAT_CONN_DH_SESSION_SWEEPS 1064 +#define WT_STAT_CONN_DH_SESSION_SWEEPS 1066 /*! connection: files currently open */ -#define WT_STAT_CONN_FILE_OPEN 1065 +#define WT_STAT_CONN_FILE_OPEN 1067 /*! log: log buffer size increases */ -#define WT_STAT_CONN_LOG_BUFFER_GROW 1066 +#define WT_STAT_CONN_LOG_BUFFER_GROW 1068 /*! log: total log buffer size */ -#define WT_STAT_CONN_LOG_BUFFER_SIZE 1067 +#define WT_STAT_CONN_LOG_BUFFER_SIZE 1069 /*! log: log bytes of payload data */ -#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1068 +#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1070 /*! log: log bytes written */ -#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1069 +#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1071 /*! log: yields waiting for previous log file close */ -#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1070 +#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1072 /*! log: total size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_LEN 1071 +#define WT_STAT_CONN_LOG_COMPRESS_LEN 1073 /*! log: total in-memory size of compressed records */ -#define WT_STAT_CONN_LOG_COMPRESS_MEM 1072 +#define WT_STAT_CONN_LOG_COMPRESS_MEM 1074 /*! log: log records too small to compress */ -#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1073 +#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1075 /*! log: log records not compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1074 +#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1076 /*! log: log records compressed */ -#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1075 +#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1077 /*! log: maximum log file size */ -#define WT_STAT_CONN_LOG_MAX_FILESIZE 1076 +#define WT_STAT_CONN_LOG_MAX_FILESIZE 1078 /*! log: pre-allocated log files prepared */ -#define WT_STAT_CONN_LOG_PREALLOC_FILES 1077 +#define WT_STAT_CONN_LOG_PREALLOC_FILES 1079 /*! log: number of pre-allocated log files to create */ -#define WT_STAT_CONN_LOG_PREALLOC_MAX 1078 +#define WT_STAT_CONN_LOG_PREALLOC_MAX 1080 /*! log: pre-allocated log files used */ -#define WT_STAT_CONN_LOG_PREALLOC_USED 1079 +#define WT_STAT_CONN_LOG_PREALLOC_USED 1081 /*! log: log read operations */ -#define WT_STAT_CONN_LOG_READS 1080 +#define WT_STAT_CONN_LOG_READS 1082 /*! log: records processed by log scan */ -#define WT_STAT_CONN_LOG_SCAN_RECORDS 1081 +#define WT_STAT_CONN_LOG_SCAN_RECORDS 1083 /*! log: log scan records requiring two reads */ -#define WT_STAT_CONN_LOG_SCAN_REREADS 1082 +#define WT_STAT_CONN_LOG_SCAN_REREADS 1084 /*! log: log scan operations */ -#define WT_STAT_CONN_LOG_SCANS 1083 +#define WT_STAT_CONN_LOG_SCANS 1085 /*! log: consolidated slot closures */ -#define WT_STAT_CONN_LOG_SLOT_CLOSES 1084 +#define WT_STAT_CONN_LOG_SLOT_CLOSES 1086 /*! log: logging bytes consolidated */ -#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1085 +#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1087 /*! log: consolidated slot joins */ -#define WT_STAT_CONN_LOG_SLOT_JOINS 1086 +#define WT_STAT_CONN_LOG_SLOT_JOINS 1088 /*! log: consolidated slot join races */ -#define WT_STAT_CONN_LOG_SLOT_RACES 1087 +#define WT_STAT_CONN_LOG_SLOT_RACES 1089 /*! log: slots selected for switching that were unavailable */ -#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1088 +#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1090 /*! log: record size exceeded maximum */ -#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1089 +#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1091 /*! log: failed to find a slot large enough for record */ -#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1090 +#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1092 /*! log: consolidated slot join transitions */ -#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1091 +#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1093 /*! log: log sync operations */ -#define WT_STAT_CONN_LOG_SYNC 1092 +#define WT_STAT_CONN_LOG_SYNC 1094 /*! log: log write operations */ -#define WT_STAT_CONN_LOG_WRITES 1093 +#define WT_STAT_CONN_LOG_WRITES 1095 /*! LSM: sleep for LSM checkpoint throttle */ -#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1094 +#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1096 /*! LSM: sleep for LSM merge throttle */ -#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1095 +#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1097 /*! LSM: rows merged in an LSM tree */ -#define WT_STAT_CONN_LSM_ROWS_MERGED 1096 +#define WT_STAT_CONN_LSM_ROWS_MERGED 1098 /*! LSM: application work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1097 +#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1099 /*! LSM: merge work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1098 +#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1100 /*! LSM: tree queue hit maximum */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1099 +#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1101 /*! LSM: switch work units currently queued */ -#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1100 +#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1102 /*! LSM: tree maintenance operations scheduled */ -#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1101 +#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1103 /*! LSM: tree maintenance operations discarded */ -#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1102 +#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1104 /*! LSM: tree maintenance operations executed */ -#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1103 +#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1105 /*! connection: memory allocations */ -#define WT_STAT_CONN_MEMORY_ALLOCATION 1104 +#define WT_STAT_CONN_MEMORY_ALLOCATION 1106 /*! connection: memory frees */ -#define WT_STAT_CONN_MEMORY_FREE 1105 +#define WT_STAT_CONN_MEMORY_FREE 1107 /*! connection: memory re-allocations */ -#define WT_STAT_CONN_MEMORY_GROW 1106 +#define WT_STAT_CONN_MEMORY_GROW 1108 /*! thread-yield: page acquire busy blocked */ -#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1107 +#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1109 /*! thread-yield: page acquire eviction blocked */ -#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1108 +#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1110 /*! thread-yield: page acquire locked blocked */ -#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1109 +#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1111 /*! thread-yield: page acquire read blocked */ -#define WT_STAT_CONN_PAGE_READ_BLOCKED 1110 +#define WT_STAT_CONN_PAGE_READ_BLOCKED 1112 /*! thread-yield: page acquire time sleeping (usecs) */ -#define WT_STAT_CONN_PAGE_SLEEP 1111 +#define WT_STAT_CONN_PAGE_SLEEP 1113 /*! connection: total read I/Os */ -#define WT_STAT_CONN_READ_IO 1112 +#define WT_STAT_CONN_READ_IO 1114 /*! reconciliation: page reconciliation calls */ -#define WT_STAT_CONN_REC_PAGES 1113 +#define WT_STAT_CONN_REC_PAGES 1115 /*! reconciliation: page reconciliation calls for eviction */ -#define WT_STAT_CONN_REC_PAGES_EVICTION 1114 +#define WT_STAT_CONN_REC_PAGES_EVICTION 1116 /*! reconciliation: split bytes currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1115 +#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1117 /*! reconciliation: split objects currently awaiting free */ -#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1116 +#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1118 /*! connection: pthread mutex shared lock read-lock calls */ -#define WT_STAT_CONN_RWLOCK_READ 1117 +#define WT_STAT_CONN_RWLOCK_READ 1119 /*! connection: pthread mutex shared lock write-lock calls */ -#define WT_STAT_CONN_RWLOCK_WRITE 1118 +#define WT_STAT_CONN_RWLOCK_WRITE 1120 /*! session: open cursor count */ -#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1119 +#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1121 /*! session: open session count */ -#define WT_STAT_CONN_SESSION_OPEN 1120 +#define WT_STAT_CONN_SESSION_OPEN 1122 /*! transaction: transaction begins */ -#define WT_STAT_CONN_TXN_BEGIN 1121 +#define WT_STAT_CONN_TXN_BEGIN 1123 /*! transaction: transaction checkpoints */ -#define WT_STAT_CONN_TXN_CHECKPOINT 1122 +#define WT_STAT_CONN_TXN_CHECKPOINT 1124 /*! transaction: transaction checkpoint currently running */ -#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1123 +#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1125 /*! transaction: transaction checkpoint max time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1124 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1126 /*! transaction: transaction checkpoint min time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1125 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1127 /*! transaction: transaction checkpoint most recent time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1126 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1128 /*! transaction: transaction checkpoint total time (msecs) */ -#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1127 +#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1129 /*! transaction: transactions committed */ -#define WT_STAT_CONN_TXN_COMMIT 1128 +#define WT_STAT_CONN_TXN_COMMIT 1130 /*! transaction: transaction failures due to cache overflow */ -#define WT_STAT_CONN_TXN_FAIL_CACHE 1129 +#define WT_STAT_CONN_TXN_FAIL_CACHE 1131 /*! transaction: transaction range of IDs currently pinned */ -#define WT_STAT_CONN_TXN_PINNED_RANGE 1130 +#define WT_STAT_CONN_TXN_PINNED_RANGE 1132 /*! transaction: transactions rolled back */ -#define WT_STAT_CONN_TXN_ROLLBACK 1131 +#define WT_STAT_CONN_TXN_ROLLBACK 1133 /*! connection: total write I/Os */ -#define WT_STAT_CONN_WRITE_IO 1132 +#define WT_STAT_CONN_WRITE_IO 1134 /*! * @} diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c index e75946e9885..a173a829436 100644 --- a/src/third_party/wiredtiger/src/log/log.c +++ b/src/third_party/wiredtiger/src/log/log.c @@ -883,6 +883,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) while (LOG_CMP(&log->write_lsn, &slot->slot_release_lsn) != 0) __wt_yield(); log->write_lsn = slot->slot_end_lsn; + WT_ERR(__wt_cond_signal(session, log->log_write_cond)); if (F_ISSET(slot, SLOT_CLOSEFH)) WT_ERR(__wt_cond_signal(session, conn->log_close_cond)); @@ -1651,6 +1652,12 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, myslot.slot->slot_error == 0) (void)__wt_cond_wait( session, log->log_sync_cond, 10000); + } else if (LF_ISSET(WT_LOG_FLUSH)) { + /* Wait for our writes to reach the OS */ + while (LOG_CMP(&log->write_lsn, &lsn) <= 0 && + myslot.slot->slot_error == 0) + (void)__wt_cond_wait( + session, log->log_write_cond, 10000); } err: if (locked) diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c index 39b4b4508b7..bf5ae465664 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c @@ -1429,7 +1429,7 @@ __wt_clsm_open(WT_SESSION_IMPL *session, __wt_cursor_set_key, /* set-key */ __wt_cursor_set_value, /* set-value */ __clsm_compare, /* compare */ - __wt_cursor_equal, /* equals */ + __wt_cursor_equals, /* equals */ __clsm_next, /* next */ __clsm_prev, /* prev */ __clsm_reset, /* reset */ diff --git a/src/third_party/wiredtiger/src/os_win/os_map.c b/src/third_party/wiredtiger/src/os_win/os_map.c index 5c78f371889..3c4edb59ea8 100644 --- a/src/third_party/wiredtiger/src/os_win/os_map.c +++ b/src/third_party/wiredtiger/src/os_win/os_map.c @@ -99,7 +99,10 @@ __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len, fh->name, len); } - CloseHandle(*mappingcookie); + if (CloseHandle(*mappingcookie) == 0) { + WT_RET_MSG(session, __wt_errno(), + "CloseHandle: MapViewOfFile: %s", fh->name); + } *mappingcookie = 0; diff --git a/src/third_party/wiredtiger/src/os_win/os_open.c b/src/third_party/wiredtiger/src/os_win/os_open.c index 387c7d2597b..bfcfb13fc3b 100644 --- a/src/third_party/wiredtiger/src/os_win/os_open.c +++ b/src/third_party/wiredtiger/src/os_win/os_open.c @@ -214,13 +214,13 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH *fh) * windows since it is not possible to sync a directory */ if (fh->filehandle != INVALID_HANDLE_VALUE && - !CloseHandle(fh->filehandle) != 0) { + CloseHandle(fh->filehandle) == 0) { ret = __wt_errno(); __wt_err(session, ret, "CloseHandle: %s", fh->name); } if (fh->filehandle_secondary != INVALID_HANDLE_VALUE && - !CloseHandle(fh->filehandle_secondary) != 0) { + CloseHandle(fh->filehandle_secondary) == 0) { ret = __wt_errno(); __wt_err(session, ret, "CloseHandle: secondary: %s", fh->name); } diff --git a/src/third_party/wiredtiger/src/os_win/os_thread.c b/src/third_party/wiredtiger/src/os_win/os_thread.c index 3ecfe2796d5..05f7dc15914 100644 --- a/src/third_party/wiredtiger/src/os_win/os_thread.c +++ b/src/third_party/wiredtiger/src/os_win/os_thread.c @@ -33,10 +33,19 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) { WT_DECL_RET; - if ((ret = WaitForSingleObject(tid, INFINITE)) == WAIT_OBJECT_0) - return (0); + if ((ret = WaitForSingleObject(tid, INFINITE)) != WAIT_OBJECT_0) + /* + * If we fail to wait, we will leak handles so do not continue + */ + WT_PANIC_RET(session, ret == WAIT_FAILED ? __wt_errno() : ret, + "Wait for thread join failed"); + + if (CloseHandle(tid) == 0) { + WT_RET_MSG(session, __wt_errno(), + "CloseHandle: thread join"); + } - WT_RET_MSG(session, ret, "WaitForSingleObject"); + return (0); } /* diff --git a/src/third_party/wiredtiger/src/schema/schema_open.c b/src/third_party/wiredtiger/src/schema/schema_open.c index d613ced00aa..fa655c7108b 100644 --- a/src/third_party/wiredtiger/src/schema/schema_open.c +++ b/src/third_party/wiredtiger/src/schema/schema_open.c @@ -269,6 +269,7 @@ __wt_schema_open_index(WT_SESSION_IMPL *session, cursor = NULL; idx = NULL; + match = 0; /* Build a search key. */ tablename = table->name; @@ -343,6 +344,8 @@ __wt_schema_open_index(WT_SESSION_IMPL *session, break; } WT_ERR_NOTFOUND_OK(ret); + if (idxname != NULL && !match) + ret = WT_NOTFOUND; /* If we did a full pass, we won't need to do it again. */ if (idxname == NULL) { @@ -557,6 +560,8 @@ __wt_schema_get_index(WT_SESSION_IMPL *session, /* Otherwise, open it. */ WT_ERR(__wt_schema_open_index( session, table, tend + 1, strlen(tend + 1), indexp)); + if (tablep != NULL) + *tablep = table; err: __wt_schema_release_table(session, table); WT_RET(ret); diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c index 223d62d0559..8ddac870e0e 100644 --- a/src/third_party/wiredtiger/src/support/stat.c +++ b/src/third_party/wiredtiger/src/support/stat.c @@ -360,6 +360,8 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats) "cache: eviction server populating queue, but not evicting pages"; stats->cache_eviction_slow.desc = "cache: eviction server unable to reach eviction goal"; + stats->cache_eviction_worker_evicting.desc = + "cache: eviction worker thread evicting pages"; stats->cache_eviction_force_fail.desc = "cache: failed eviction of pages that exceeded the in-memory maximum"; stats->cache_eviction_hazard.desc = @@ -387,6 +389,7 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats) "cache: pages split during eviction"; stats->cache_eviction_walk.desc = "cache: pages walked for eviction"; stats->cache_write.desc = "cache: pages written from cache"; + stats->cache_overhead.desc = "cache: percentage overhead"; stats->cache_bytes_dirty.desc = "cache: tracked dirty bytes in the cache"; stats->cache_pages_dirty.desc = @@ -548,6 +551,7 @@ __wt_stat_refresh_connection_stats(void *stats_arg) stats->cache_eviction_server_evicting.v = 0; stats->cache_eviction_server_not_evicting.v = 0; stats->cache_eviction_slow.v = 0; + stats->cache_eviction_worker_evicting.v = 0; stats->cache_eviction_force_fail.v = 0; stats->cache_eviction_hazard.v = 0; stats->cache_inmem_split.v = 0; diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c index 0cd341185f2..eae21d0b9f5 100644 --- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c +++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c @@ -341,9 +341,10 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) * into the session cache, but we're going to do that eventually anyway. */ WT_WITH_SCHEMA_LOCK(session, - WT_WITH_DHANDLE_LOCK(session, - ret = __checkpoint_apply_all( - session, cfg, __wt_checkpoint_list, NULL))); + WT_WITH_TABLE_LOCK(session, + WT_WITH_DHANDLE_LOCK(session, + ret = __checkpoint_apply_all( + session, cfg, __wt_checkpoint_list, NULL)))); WT_ERR(ret); /* diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c index b2515e3c8cd..62890db7895 100644 --- a/src/third_party/wiredtiger/src/txn/txn_recover.c +++ b/src/third_party/wiredtiger/src/txn/txn_recover.c @@ -452,12 +452,17 @@ __wt_txn_recover(WT_SESSION_IMPL *session) if (WT_IS_INIT_LSN(&metafile->ckpt_lsn)) WT_ERR(__wt_log_scan(session, NULL, WT_LOGSCAN_FIRST, __txn_log_recover, &r)); - else + else { + /* + * Start at the last checkpoint LSN referenced in the + * metadata. If we see the end of a checkpoint while + * scanning, we will change the full scan to start from + * there. + */ + r.ckpt_lsn = metafile->ckpt_lsn; WT_ERR(__wt_log_scan(session, &metafile->ckpt_lsn, 0, __txn_log_recover, &r)); - - WT_ASSERT(session, - LOG_CMP(&r.ckpt_lsn, &conn->log->first_lsn) >= 0); + } } /* Scan the metadata to find the live files and their IDs. */ diff --git a/src/third_party/wiredtiger/tools/stat_data.py b/src/third_party/wiredtiger/tools/stat_data.py index 89e06dbbf90..da6fb3a7f7f 100644 --- a/src/third_party/wiredtiger/tools/stat_data.py +++ b/src/third_party/wiredtiger/tools/stat_data.py @@ -6,6 +6,7 @@ no_scale_per_second_list = [ 'cache: maximum bytes configured', 'cache: maximum page size at eviction', 'cache: pages currently held in the cache', + 'cache: percentage overhead', 'cache: tracked dirty bytes in the cache', 'cache: tracked dirty pages in the cache', 'connection: files currently open', @@ -57,6 +58,7 @@ no_clear_list = [ 'cache: bytes currently in the cache', 'cache: maximum bytes configured', 'cache: pages currently held in the cache', + 'cache: percentage overhead', 'connection: files currently open', 'log: maximum log file size', 'log: total log buffer size', |