summaryrefslogtreecommitdiff
path: root/src/third_party
diff options
context:
space:
mode:
authorDan Pasette <dan@10gen.com>2015-01-27 05:48:08 -0500
committerDan Pasette <dan@mongodb.com>2015-01-27 05:48:08 -0500
commite926b20df8bcb14985817d2b37c61e2f8889fb27 (patch)
tree9b717bb3965470297112fcdbf26dc0ece70721e2 /src/third_party
parentae258579edb7a9c6185a515bff5b57f8e80d0088 (diff)
downloadmongo-e926b20df8bcb14985817d2b37c61e2f8889fb27.tar.gz
Import wiredtiger-wiredtiger-mongodb-2.8-rc6-47-g5b3283e.tar.gz from wiredtiger branch mongodb-2.8
Diffstat (limited to 'src/third_party')
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py12
-rw-r--r--src/third_party/wiredtiger/dist/s_all1
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_lang25
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok2
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py3
-rw-r--r--src/third_party/wiredtiger/lang/python/wiredtiger.i42
-rw-r--r--src/third_party/wiredtiger/src/block/block_compact.c77
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c3
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c25
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c29
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache.c4
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_log.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_ds.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_log.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_metadata.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_std.c4
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_table.c2
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c138
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c9
-rw-r--r--src/third_party/wiredtiger/src/include/block.h5
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i66
-rw-r--r--src/third_party/wiredtiger/src/include/cache.i80
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h3
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h5
-rw-r--r--src/third_party/wiredtiger/src/include/log.h2
-rw-r--r--src/third_party/wiredtiger/src/include/misc.h1
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h2
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i7
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in207
-rw-r--r--src/third_party/wiredtiger/src/log/log.c7
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_cursor.c2
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_map.c5
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_open.c4
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_thread.c15
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_open.c5
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c4
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c7
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_recover.c13
-rw-r--r--src/third_party/wiredtiger/tools/stat_data.py2
39 files changed, 509 insertions, 317 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 0141526285c..7754a3a1d13 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -309,8 +309,18 @@ connection_runtime_config = [
]),
Config('cache_size', '100MB', r'''
maximum heap memory to allocate for the cache. A database should
- configure either a cache_size or a shared_cache not both''',
+ configure either \c cache_size or \c shared_cache but not both''',
min='1MB', max='10TB'),
+ Config('cache_overhead', '8', r'''
+ assume the heap allocator overhead is the specified percentage, and
+ adjust the cache size by that amount (for example, if the cache size is
+ 100GB, a percentage of 10 means WiredTiger limits itself to allocating
+ 90GB of memory). This value is configurable because different heap
+ allocators have different overhead and different workloads will have
+ different heap allocation sizes and patterns, therefore applications
+ may need to adjust this value based on allocator choice and behavior
+ in measured workloads''',
+ min='0', max='30'),
Config('checkpoint', '', r'''
periodically checkpoint the database''',
type='category', subconfig=[
diff --git a/src/third_party/wiredtiger/dist/s_all b/src/third_party/wiredtiger/dist/s_all
index 1b171bdeafd..0e584b938b9 100644
--- a/src/third_party/wiredtiger/dist/s_all
+++ b/src/third_party/wiredtiger/dist/s_all
@@ -69,6 +69,7 @@ run "sh ./s_copyright" "checking copyright notices"
run "sh ./s_define" "checking for unused #defines"
run "sh ./s_funcs" "checking for unused functions"
run "sh ./s_getopt" "checking for incorrect getopt usage"
+run "sh ./s_lang" "checking for SWIG generated name conflicts"
run "sh ./s_longlines" "checking for long lines"
run "sh ./s_stat" "checking for unused statistics fields"
run "sh ./s_string" "checking string spelling"
diff --git a/src/third_party/wiredtiger/dist/s_lang b/src/third_party/wiredtiger/dist/s_lang
new file mode 100755
index 00000000000..0f0519f87e4
--- /dev/null
+++ b/src/third_party/wiredtiger/dist/s_lang
@@ -0,0 +1,25 @@
+#! /bin/sh
+
+# Check lang directories for potential name conflicts
+t=__wt.$$
+trap 'rm -f $t; exit 0' 0 1 2 3 13 15
+
+cd ../lang
+
+for d in *; do
+ f=`find $d -name 'wiredtiger_wrap.c'`
+ test -z "$f" && continue
+
+ sed -e '/SWIGINTERN.*__wt_[a-z][a-z]*_[a-z]/!d' \
+ -e '/__wt_[^(]*__.*(/d' \
+ -e '/_wrap/d' \
+ -e "/_${d}_/d" \
+ $f > $t
+
+ test -s $t && {
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ echo "$l: potential SWIG naming conflict"
+ echo "=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-="
+ cat $t
+ }
+done
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index 6c658df8bf0..cea96db2848 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -73,6 +73,7 @@ Crummey
CustomersPhone
DATAITEMs
DECL
+DECR
DESC
DHANDLE
DLFCN
@@ -397,6 +398,7 @@ agc
alfred
alloc
allocator
+allocators
allocfile
allocsize
amd
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index 69e8d2ed21e..bad65528521 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -182,7 +182,10 @@ connection_stats = [
'eviction server unable to reach eviction goal'),
CacheStat('cache_eviction_split', 'pages split during eviction'),
CacheStat('cache_eviction_walk', 'pages walked for eviction'),
+ CacheStat('cache_eviction_worker_evicting',
+ 'eviction worker thread evicting pages'),
CacheStat('cache_inmem_split', 'in-memory page splits'),
+ CacheStat('cache_overhead', 'percentage overhead', 'no_clear,no_scale'),
CacheStat('cache_pages_dirty',
'tracked dirty pages in the cache', 'no_scale'),
CacheStat('cache_pages_inuse',
diff --git a/src/third_party/wiredtiger/lang/python/wiredtiger.i b/src/third_party/wiredtiger/lang/python/wiredtiger.i
index bf726ceac0a..d35d7d5c456 100644
--- a/src/third_party/wiredtiger/lang/python/wiredtiger.i
+++ b/src/third_party/wiredtiger/lang/python/wiredtiger.i
@@ -157,7 +157,7 @@ from packing import pack, unpack
%define DESTRUCTOR(class, method)
%feature("shadow") class::method %{
def method(self, *args):
- '''close(self, config) -> int
+ '''method(self, config) -> int
@copydoc class::method'''
try:
@@ -171,6 +171,26 @@ DESTRUCTOR(__wt_connection, close)
DESTRUCTOR(__wt_cursor, close)
DESTRUCTOR(__wt_session, close)
+/*
+ * OVERRIDE_METHOD must be used when overriding or extending an existing
+ * method in the C interface. It creates Python method() that calls
+ * _method(), which is the extended version of the method. This works
+ * around potential naming conflicts. Without this technique, for example,
+ * defining __wt_cursor::equals() creates the wrapper function
+ * __wt_cursor_equals(), which may be defined in the WT library.
+ */
+%define OVERRIDE_METHOD(cclass, pyclass, method, pyargs)
+%extend cclass {
+%pythoncode %{
+ def method(self, *args):
+ '''method pyargs -> int
+
+ @copydoc class::method'''
+ return self._##method(*args)
+%}
+};
+%enddef
+
/* Don't require empty config strings. */
%typemap(default) const char *config { $1 = NULL; }
%typemap(default) WT_CURSOR *to_dup { $1 = NULL; }
@@ -389,9 +409,9 @@ NOTFOUND_OK(__wt_cursor::remove)
NOTFOUND_OK(__wt_cursor::search)
NOTFOUND_OK(__wt_cursor::update)
-COMPARE_OK(__wt_cursor::compare)
-COMPARE_OK(__wt_cursor::equals)
-COMPARE_NOTFOUND_OK(__wt_cursor::search_near)
+COMPARE_OK(__wt_cursor::_compare)
+COMPARE_OK(__wt_cursor::_equals)
+COMPARE_NOTFOUND_OK(__wt_cursor::_search_near)
/* Lastly, some methods need no (additional) error checking. */
%exception __wt_connection::get_home;
@@ -428,6 +448,10 @@ COMPARE_NOTFOUND_OK(__wt_cursor::search_near)
%ignore __wt_cursor::equals(WT_CURSOR *, WT_CURSOR *, int *);
%ignore __wt_cursor::search_near(WT_CURSOR *, int *);
+OVERRIDE_METHOD(__wt_cursor, WT_CURSOR, compare, (self, other))
+OVERRIDE_METHOD(__wt_cursor, WT_CURSOR, equals, (self, other))
+OVERRIDE_METHOD(__wt_cursor, WT_CURSOR, search_near, (self))
+
/* SWIG magic to turn Python byte strings into data / size. */
%apply (char *STRING, int LENGTH) { (char *data, int size) };
@@ -685,7 +709,7 @@ typedef int int_void;
}
/* compare: special handling. */
- int compare(WT_CURSOR *other) {
+ int _compare(WT_CURSOR *other) {
int cmp = 0;
int ret = 0;
if (other == NULL) {
@@ -709,7 +733,7 @@ typedef int int_void;
}
/* equals: special handling. */
- int equals(WT_CURSOR *other) {
+ int _equals(WT_CURSOR *other) {
int cmp = 0;
int ret = 0;
if (other == NULL) {
@@ -728,7 +752,7 @@ typedef int int_void;
}
/* search_near: special handling. */
- int search_near() {
+ int _search_near() {
int cmp = 0;
int ret = $self->search_near($self, &cmp);
/*
@@ -828,7 +852,7 @@ typedef int int_void;
};
%extend __wt_session {
- int log_printf(const char *msg) {
+ int _log_printf(const char *msg) {
return self->log_printf(self, "%s", msg);
}
@@ -892,6 +916,8 @@ int verbose_build();
%ignore __wt_connection::get_extension_api;
%ignore __wt_session::log_printf;
+OVERRIDE_METHOD(__wt_session, WT_SESSION, log_printf, (self, msg))
+
%ignore wiredtiger_struct_pack;
%ignore wiredtiger_struct_size;
%ignore wiredtiger_struct_unpack;
diff --git a/src/third_party/wiredtiger/src/block/block_compact.c b/src/third_party/wiredtiger/src/block/block_compact.c
index 79494a274a9..a28b6d7a71e 100644
--- a/src/third_party/wiredtiger/src/block/block_compact.c
+++ b/src/third_party/wiredtiger/src/block/block_compact.c
@@ -50,6 +50,8 @@ __wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block)
block->allocfirst = block->allocfirst_save;
__wt_spin_unlock(session, &block->live_lock);
+ block->compact_pct_tenths = 0;
+
return (0);
}
@@ -64,7 +66,7 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, int *skipp)
WT_EXT *ext;
WT_EXTLIST *el;
WT_FH *fh;
- wt_off_t avail, ninety;
+ wt_off_t avail_eighty, avail_ninety, eighty, ninety;
*skipp = 1; /* Return a default skip. */
@@ -84,31 +86,53 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, int *skipp)
if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT))
WT_ERR(__block_dump_avail(session, block));
- /* Sum the number of available bytes in the first 90% of the file. */
- avail = 0;
+ /* Sum the available bytes in the first 80% and 90% of the file. */
+ avail_eighty = avail_ninety = 0;
ninety = fh->size - fh->size / 10;
+ eighty = fh->size - ((fh->size / 10) * 2);
el = &block->live.avail;
WT_EXT_FOREACH(ext, el->off)
- if (ext->off < ninety)
- avail += ext->size;
-
- /*
- * If at least 10% of the total file is available and in the first 90%
- * of the file, we'll try compaction.
- */
- if (avail >= fh->size / 10)
- *skipp = 0;
+ if (ext->off < ninety) {
+ avail_ninety += ext->size;
+ if (ext->off < eighty)
+ avail_eighty += ext->size;
+ }
WT_ERR(__wt_verbose(session, WT_VERB_COMPACT,
"%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first "
- "90%% of the file, require 10%% or %" PRIuMAX "MB (%" PRIuMAX
- ") to perform compaction, compaction %s",
+ "80%% of the file",
+ block->name,
+ (uintmax_t)avail_eighty / WT_MEGABYTE, (uintmax_t)avail_eighty));
+ WT_ERR(__wt_verbose(session, WT_VERB_COMPACT,
+ "%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first "
+ "90%% of the file",
+ block->name,
+ (uintmax_t)avail_ninety / WT_MEGABYTE, (uintmax_t)avail_ninety));
+ WT_ERR(__wt_verbose(session, WT_VERB_COMPACT,
+ "%s: require 10%% or %" PRIuMAX "MB (%" PRIuMAX ") in the first "
+ "90%% of the file to perform compaction, compaction %s",
block->name,
- (uintmax_t)avail / WT_MEGABYTE, (uintmax_t)avail,
(uintmax_t)(fh->size / 10) / WT_MEGABYTE, (uintmax_t)fh->size / 10,
*skipp ? "skipped" : "proceeding"));
+ /*
+ * If at least 20% of the total file is available and in the first 80%
+ * of the file, we'll try compaction on the last 20% of the file; else,
+ * if at least 10% of the total file is available and in the first 90%
+ * of the file, we'll try compaction on the last 10% of the file.
+ *
+ * We could push this further, but there's diminishing returns, a mostly
+ * empty file can be processed quickly, so more aggressive compaction is
+ * less useful.
+ */
+ if (avail_ninety >= fh->size / 10) {
+ *skipp = 0;
+ block->compact_pct_tenths = 1;
+ if (avail_eighty >= ((fh->size / 10) * 2))
+ block->compact_pct_tenths = 2;
+ }
+
err: __wt_spin_unlock(session, &block->live_lock);
return (ret);
@@ -126,7 +150,7 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session,
WT_EXT *ext;
WT_EXTLIST *el;
WT_FH *fh;
- wt_off_t ninety, offset;
+ wt_off_t limit, offset;
uint32_t size, cksum;
WT_UNUSED(addr_size);
@@ -138,21 +162,24 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session,
WT_RET(__wt_block_buffer_to_addr(block, addr, &offset, &size, &cksum));
/*
- * If this block is in the last 10% of the file and there's a block on
- * the available list that's in the first 90% of the file, rewrite the
- * block. Checking the available list is necessary (otherwise writing
- * the block would extend the file), but there's an obvious race if the
- * file is sufficiently busy.
+ * If this block is in the chosen percentage of the file and there's a
+ * block on the available list that's appears before that percentage of
+ * the file, rewrite the block. Checking the available list is
+ * necessary (otherwise writing the block would extend the file), but
+ * there's an obvious race if the file is sufficiently busy.
*/
__wt_spin_lock(session, &block->live_lock);
- ninety = fh->size - fh->size / 10;
- if (offset > ninety) {
+ limit = fh->size - ((fh->size / 10) * block->compact_pct_tenths);
+ if (offset > limit) {
el = &block->live.avail;
- WT_EXT_FOREACH(ext, el->off)
- if (ext->off < ninety && ext->size >= size) {
+ WT_EXT_FOREACH(ext, el->off) {
+ if (ext->off >= limit)
+ break;
+ if (ext->size >= size) {
*skipp = 0;
break;
}
+ }
}
__wt_spin_unlock(session, &block->live_lock);
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index 943a8573f29..f0414c4e855 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -659,7 +659,8 @@ __btree_page_sizes(WT_SESSION_IMPL *session)
* cache, it may not have been set.
*/
WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval));
- btree->maxmempage = WT_MAX((uint64_t)cval.val, 50 * btree->maxleafpage);
+ btree->maxmempage =
+ WT_MAX((uint64_t)cval.val, 50 * (uint64_t)btree->maxleafpage);
cache_size = S2C(session)->cache_size;
if (cache_size > 0)
btree->maxmempage = WT_MIN(btree->maxmempage, cache_size / 2);
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 94c73106d70..10112d46289 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -13,7 +13,7 @@
* estimate of allocation overhead to every object.
*/
#define WT_MEMSIZE_ADD(total, len) do { \
- total += (len) + WT_ALLOC_OVERHEAD; \
+ total += (len); \
} while (0)
#define WT_MEMSIZE_TRANSFER(from_decr, to_incr, len) do { \
WT_MEMSIZE_ADD(from_decr, len); \
@@ -728,14 +728,6 @@ __split_multi_inmem(
*/
page->modify->first_dirty_txn = WT_TXN_FIRST;
- /*
- * XXX Don't allow this page to be evicted immediately.
- *
- * In some cases involving forced eviction during truncates, a reader
- * ends up looking at an evicted page. This is a temporary workaround.
- */
- page->modify->inmem_split_txn = __wt_txn_new_id(session);
-
err: /* Free any resources that may have been cached in the cursor. */
WT_TRET(__wt_btcur_close(&cbt));
@@ -889,8 +881,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
for (i = 0, deleted_entries = 0; i < parent_entries; ++i) {
next_ref = pindex->index[i];
WT_ASSERT(session, next_ref->state != WT_REF_SPLIT);
- if (next_ref->state == WT_REF_DELETED &&
- next_ref->page_del == NULL &&
+ if (__wt_delete_page_skip(session, next_ref) &&
WT_ATOMIC_CAS4(next_ref->state,
WT_REF_DELETED, WT_REF_SPLIT))
deleted_entries++;
@@ -986,6 +977,18 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
session, 0, ikey, size));
WT_MEMSIZE_ADD(parent_decr, size);
}
+ /*
+ * The page_del structure can be freed
+ * immediately: it is only read when the ref
+ * state is WT_REF_DELETED. The size of the
+ * structures wasn't added to the parent: don't
+ * decrement.
+ */
+ if (next_ref->page_del != NULL) {
+ __wt_free(session,
+ next_ref->page_del->update_list);
+ __wt_free(session, next_ref->page_del);
+ }
}
WT_TRET(__split_safe_free(
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index ec44e8839b0..646551cdd38 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -82,6 +82,7 @@ static const WT_CONFIG_CHECK confchk_statistics_log_subconfigs[] = {
static const WT_CONFIG_CHECK confchk_connection_reconfigure[] = {
{ "async", "category", NULL, confchk_async_subconfigs },
+ { "cache_overhead", "int", "min=0,max=30", NULL },
{ "cache_size", "int", "min=1MB,max=10TB", NULL },
{ "checkpoint", "category", NULL,
confchk_checkpoint_subconfigs },
@@ -326,6 +327,7 @@ static const WT_CONFIG_CHECK confchk_transaction_sync_subconfigs[] = {
static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
{ "async", "category", NULL, confchk_async_subconfigs },
{ "buffer_alignment", "int", "min=-1,max=1MB", NULL },
+ { "cache_overhead", "int", "min=0,max=30", NULL },
{ "cache_size", "int", "min=1MB,max=10TB", NULL },
{ "checkpoint", "category", NULL,
confchk_checkpoint_subconfigs },
@@ -375,6 +377,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
{ "async", "category", NULL, confchk_async_subconfigs },
{ "buffer_alignment", "int", "min=-1,max=1MB", NULL },
+ { "cache_overhead", "int", "min=0,max=30", NULL },
{ "cache_size", "int", "min=1MB,max=10TB", NULL },
{ "checkpoint", "category", NULL,
confchk_checkpoint_subconfigs },
@@ -425,6 +428,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
{ "async", "category", NULL, confchk_async_subconfigs },
{ "buffer_alignment", "int", "min=-1,max=1MB", NULL },
+ { "cache_overhead", "int", "min=0,max=30", NULL },
{ "cache_size", "int", "min=1MB,max=10TB", NULL },
{ "checkpoint", "category", NULL,
confchk_checkpoint_subconfigs },
@@ -471,6 +475,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
{ "async", "category", NULL, confchk_async_subconfigs },
{ "buffer_alignment", "int", "min=-1,max=1MB", NULL },
+ { "cache_overhead", "int", "min=0,max=30", NULL },
{ "cache_size", "int", "min=1MB,max=10TB", NULL },
{ "checkpoint", "category", NULL,
confchk_checkpoint_subconfigs },
@@ -552,14 +557,14 @@ static const WT_CONFIG_ENTRY config_entries[] = {
confchk_connection_open_session
},
{ "connection.reconfigure",
- "async=(enabled=0,ops_max=1024,threads=2),cache_size=100MB,"
- "checkpoint=(log_size=0,name=\"WiredTigerCheckpoint\",wait=0),"
- "error_prefix=,eviction=(threads_max=1,threads_min=1),"
- "eviction_dirty_target=80,eviction_target=80,eviction_trigger=95,"
- "lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,"
- "shared_cache=(chunk=10MB,name=,reserve=0,size=500MB),"
- "statistics=none,statistics_log=(on_close=0,"
- "path=\"WiredTigerStat.%d.%H\",sources=,"
+ "async=(enabled=0,ops_max=1024,threads=2),cache_overhead=8,"
+ "cache_size=100MB,checkpoint=(log_size=0,"
+ "name=\"WiredTigerCheckpoint\",wait=0),error_prefix=,"
+ "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80,"
+ "eviction_target=80,eviction_trigger=95,lsm_manager=(merge=,"
+ "worker_thread_max=4),lsm_merge=,shared_cache=(chunk=10MB,name=,"
+ "reserve=0,size=500MB),statistics=none,statistics_log=(on_close=0"
+ ",path=\"WiredTigerStat.%d.%H\",sources=,"
"timestamp=\"%b %d %H:%M:%S\",wait=0),verbose=",
confchk_connection_reconfigure
},
@@ -672,7 +677,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
},
{ "wiredtiger_open",
"async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1,"
- "cache_size=100MB,checkpoint=(log_size=0,"
+ "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,"
"name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=,"
"config_base=,create=0,direct_io=,error_prefix=,"
"eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80,"
@@ -690,7 +695,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
},
{ "wiredtiger_open_all",
"async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1,"
- "cache_size=100MB,checkpoint=(log_size=0,"
+ "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,"
"name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=,"
"config_base=,create=0,direct_io=,error_prefix=,"
"eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80,"
@@ -708,7 +713,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
},
{ "wiredtiger_open_basecfg",
"async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1,"
- "cache_size=100MB,checkpoint=(log_size=0,"
+ "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,"
"name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=,"
"direct_io=,error_prefix=,eviction=(threads_max=1,threads_min=1),"
"eviction_dirty_target=80,eviction_target=80,eviction_trigger=95,"
@@ -725,7 +730,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
},
{ "wiredtiger_open_usercfg",
"async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1,"
- "cache_size=100MB,checkpoint=(log_size=0,"
+ "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,"
"name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=,"
"direct_io=,error_prefix=,eviction=(threads_max=1,threads_min=1),"
"eviction_dirty_target=80,eviction_target=80,eviction_trigger=95,"
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c
index 91f82a5105b..b278d7a6b8a 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache.c
@@ -29,6 +29,8 @@ __wt_cache_config(WT_SESSION_IMPL *session, const char *cfg[])
if (!F_ISSET(conn, WT_CONN_CACHE_POOL)) {
WT_RET(__wt_config_gets(session, cfg, "cache_size", &cval));
conn->cache_size = (uint64_t)cval.val;
+ WT_RET(__wt_config_gets(session, cfg, "cache_overhead", &cval));
+ conn->cache_overhead = (int)cval.val;
} else {
WT_RET(__wt_config_gets(
session, cfg, "shared_cache.reserve", &cval));
@@ -142,6 +144,8 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session)
WT_STAT_SET(stats, cache_bytes_max, conn->cache_size);
WT_STAT_SET(stats, cache_bytes_inuse, __wt_cache_bytes_inuse(cache));
+
+ WT_STAT_SET(stats, cache_overhead, conn->cache_overhead);
WT_STAT_SET(stats, cache_pages_inuse, __wt_cache_pages_inuse(cache));
WT_STAT_SET(stats, cache_bytes_dirty, cache->bytes_dirty);
WT_STAT_SET(stats,
diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c
index f70a9a4a60c..abf21408d2d 100644
--- a/src/third_party/wiredtiger/src/conn/conn_log.c
+++ b/src/third_party/wiredtiger/src/conn/conn_log.c
@@ -444,6 +444,7 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[])
WT_INIT_LSN(&log->write_lsn);
log->fileid = 0;
WT_RET(__wt_cond_alloc(session, "log sync", 0, &log->log_sync_cond));
+ WT_RET(__wt_cond_alloc(session, "log write", 0, &log->log_write_cond));
WT_RET(__wt_log_open(session));
WT_RET(__wt_log_slot_init(session));
@@ -565,6 +566,7 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session)
WT_TRET(__wt_log_slot_destroy(session));
WT_TRET(__wt_cond_destroy(session, &conn->log->log_sync_cond));
+ WT_TRET(__wt_cond_destroy(session, &conn->log->log_write_cond));
WT_TRET(__wt_rwlock_destroy(session, &conn->log->log_archive_lock));
__wt_spin_destroy(session, &conn->log->log_lock);
__wt_spin_destroy(session, &conn->log->log_slot_lock);
diff --git a/src/third_party/wiredtiger/src/cursor/cur_ds.c b/src/third_party/wiredtiger/src/cursor/cur_ds.c
index cc3e23570d5..c58d6899150 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_ds.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_ds.c
@@ -454,7 +454,7 @@ __wt_curds_open(
__wt_cursor_set_key, /* set-key */
__wt_cursor_set_value, /* set-value */
__curds_compare, /* compare */
- __wt_cursor_equal, /* equals */
+ __wt_cursor_equals, /* equals */
__curds_next, /* next */
__curds_prev, /* prev */
__curds_reset, /* reset */
diff --git a/src/third_party/wiredtiger/src/cursor/cur_log.c b/src/third_party/wiredtiger/src/cursor/cur_log.c
index 2e2a2530df6..5de3762217f 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_log.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_log.c
@@ -344,7 +344,7 @@ __wt_curlog_open(WT_SESSION_IMPL *session,
__wt_cursor_set_key, /* set-key */
__wt_cursor_set_value, /* set-value */
__curlog_compare, /* compare */
- __wt_cursor_equal, /* equals */
+ __wt_cursor_equals, /* equals */
__curlog_next, /* next */
__wt_cursor_notsup, /* prev */
__curlog_reset, /* reset */
diff --git a/src/third_party/wiredtiger/src/cursor/cur_metadata.c b/src/third_party/wiredtiger/src/cursor/cur_metadata.c
index 31c96e3087a..9860eb65a55 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_metadata.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_metadata.c
@@ -409,7 +409,7 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session,
__wt_cursor_set_key, /* set-key */
__wt_cursor_set_value, /* set-value */
__curmetadata_compare, /* compare */
- __wt_cursor_equal, /* equals */
+ __wt_cursor_equals, /* equals */
__curmetadata_next, /* next */
__curmetadata_prev, /* prev */
__curmetadata_reset, /* reset */
diff --git a/src/third_party/wiredtiger/src/cursor/cur_std.c b/src/third_party/wiredtiger/src/cursor/cur_std.c
index df38eb9e57d..858c6af6853 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_std.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_std.c
@@ -480,11 +480,11 @@ __wt_cursor_close(WT_CURSOR *cursor)
}
/*
- * __wt_cursor_equal --
+ * __wt_cursor_equals --
* WT_CURSOR->equals default implementation.
*/
int
-__wt_cursor_equal(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp)
+__wt_cursor_equals(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp)
{
WT_DECL_RET;
WT_SESSION_IMPL *session;
diff --git a/src/third_party/wiredtiger/src/cursor/cur_table.c b/src/third_party/wiredtiger/src/cursor/cur_table.c
index f8e8625b0bd..a046b2b1ed9 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_table.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_table.c
@@ -839,7 +839,7 @@ __wt_curtable_open(WT_SESSION_IMPL *session,
__wt_curtable_set_key, /* set-key */
__wt_curtable_set_value, /* set-value */
__curtable_compare, /* compare */
- __wt_cursor_equal, /* equals */
+ __wt_cursor_equals, /* equals */
__curtable_next, /* next */
__curtable_prev, /* prev */
__curtable_reset, /* reset */
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index a4ae0aaf55b..384ec9be5b3 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -362,7 +362,7 @@ __evict_worker(void *arg)
WT_ERR(__wt_cond_wait(
session, cache->evict_waiter_cond, 10000));
else
- WT_ERR(__evict_lru_pages(session, 1));
+ WT_ERR(__evict_lru_pages(session, 0));
}
WT_ERR(__wt_verbose(
session, WT_VERB_EVICTSERVER, "cache eviction worker exiting"));
@@ -704,7 +704,7 @@ __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session)
* Get pages from the LRU queue to evict.
*/
static int
-__evict_lru_pages(WT_SESSION_IMPL *session, int is_app)
+__evict_lru_pages(WT_SESSION_IMPL *session, int is_server)
{
WT_DECL_RET;
@@ -712,7 +712,7 @@ __evict_lru_pages(WT_SESSION_IMPL *session, int is_app)
* Reconcile and discard some pages: EBUSY is returned if a page fails
* eviction because it's unavailable, continue in that case.
*/
- while ((ret = __wt_evict_lru_page(session, is_app)) == 0 ||
+ while ((ret = __wt_evict_lru_page(session, is_server)) == 0 ||
ret == EBUSY)
;
return (ret == WT_NOTFOUND ? 0 : ret);
@@ -822,10 +822,8 @@ __evict_server_work(WT_SESSION_IMPL *session)
if (cache->evict_candidates > 10 &&
cache->evict_current != NULL)
__wt_yield();
- } else {
- WT_STAT_FAST_CONN_INCR(session, cache_eviction_server_evicting);
- WT_RET(__evict_lru_pages(session, 0));
- }
+ } else
+ WT_RET(__evict_lru_pages(session, 1));
return (0);
}
@@ -1185,7 +1183,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
*/
static int
__evict_get_ref(
- WT_SESSION_IMPL *session, int is_app, WT_BTREE **btreep, WT_REF **refp)
+ WT_SESSION_IMPL *session, int is_server, WT_BTREE **btreep, WT_REF **refp)
{
WT_CACHE *cache;
WT_EVICT_ENTRY *evict;
@@ -1197,18 +1195,6 @@ __evict_get_ref(
*refp = NULL;
/*
- * A pathological case: if we're the oldest transaction in the system
- * and the eviction server is stuck trying to find space, abort the
- * transaction to give up all hazard pointers before trying again.
- */
- if (is_app && F_ISSET(cache, WT_EVICT_STUCK) &&
- __wt_txn_am_oldest(session)) {
- F_CLR(cache, WT_EVICT_STUCK);
- WT_STAT_FAST_CONN_INCR(session, txn_fail_cache);
- return (WT_ROLLBACK);
- }
-
- /*
* Avoid the LRU lock if no pages are available. If there are pages
* available, spin until we get the lock. If this function returns
* without getting a page to evict, application threads assume there
@@ -1228,7 +1214,7 @@ __evict_get_ref(
* looking for more.
*/
candidates = cache->evict_candidates;
- if (!is_app && candidates > 1)
+ if (is_server && candidates > 1)
candidates /= 2;
/* Get the next page queued for eviction. */
@@ -1280,7 +1266,7 @@ __evict_get_ref(
* Called by both eviction and application threads to evict a page.
*/
int
-__wt_evict_lru_page(WT_SESSION_IMPL *session, int is_app)
+__wt_evict_lru_page(WT_SESSION_IMPL *session, int is_server)
{
WT_BTREE *btree;
WT_CACHE *cache;
@@ -1288,13 +1274,24 @@ __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_app)
WT_PAGE *page;
WT_REF *ref;
- if (is_app)
- WT_STAT_FAST_CONN_INCR(session, cache_eviction_app);
-
- WT_RET(__evict_get_ref(session, is_app, &btree, &ref));
+ WT_RET(__evict_get_ref(session, is_server, &btree, &ref));
WT_ASSERT(session, ref->state == WT_REF_LOCKED);
/*
+ * An internal session flags either the server itself or an eviction
+ * worker thread.
+ */
+ if (F_ISSET(session, WT_SESSION_INTERNAL)) {
+ if (is_server)
+ WT_STAT_FAST_CONN_INCR(
+ session, cache_eviction_server_evicting);
+ else
+ WT_STAT_FAST_CONN_INCR(
+ session, cache_eviction_worker_evicting);
+ } else
+ WT_STAT_FAST_CONN_INCR(session, cache_eviction_app);
+
+ /*
* In case something goes wrong, don't pick the same set of pages every
* time.
*
@@ -1308,7 +1305,6 @@ __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_app)
page->read_gen = __wt_cache_read_gen_set(session);
WT_WITH_BTREE(session, btree, ret = __wt_evict_page(session, ref));
- WT_ASSERT(session, is_app || session->split_gen == 0);
(void)WT_ATOMIC_SUB4(btree->evict_busy, 1);
@@ -1321,6 +1317,94 @@ __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_app)
return (ret);
}
+/*
+ * __wt_cache_wait --
+ * Wait for space in the cache.
+ */
+int
+__wt_cache_wait(WT_SESSION_IMPL *session, int full)
+{
+ WT_CACHE *cache;
+ WT_DECL_RET;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_STATE *txn_state;
+ int busy, count;
+
+ cache = S2C(session)->cache;
+
+ /*
+ * If the current transaction is keeping the oldest ID pinned, it is in
+ * the middle of an operation. This may prevent the oldest ID from
+ * moving forward, leading to deadlock, so only evict what we can.
+ * Otherwise, we are at a transaction boundary and we can work harder
+ * to make sure there is free space in the cache.
+ */
+ txn_global = &S2C(session)->txn_global;
+ txn_state = &txn_global->states[session->id];
+ busy = txn_state->id != WT_TXN_NONE ||
+ session->nhazard > 0 ||
+ (txn_state->snap_min != WT_TXN_NONE &&
+ txn_global->current != txn_global->oldest_id);
+ if (busy && full < 100)
+ return (0);
+ count = busy ? 1 : 10;
+
+ for (;;) {
+ /*
+ * A pathological case: if we're the oldest transaction in the
+ * system and the eviction server is stuck trying to find space,
+ * abort the transaction to give up all hazard pointers before
+ * trying again.
+ */
+ if (F_ISSET(cache, WT_EVICT_STUCK) &&
+ __wt_txn_am_oldest(session)) {
+ F_CLR(cache, WT_EVICT_STUCK);
+ WT_STAT_FAST_CONN_INCR(session, txn_fail_cache);
+ return (WT_ROLLBACK);
+ }
+
+ switch (ret = __wt_evict_lru_page(session, 0)) {
+ case 0:
+ if (--count == 0)
+ return (0);
+ break;
+ case EBUSY:
+ continue;
+ case WT_NOTFOUND:
+ break;
+ default:
+ return (ret);
+ }
+
+ WT_RET(__wt_eviction_check(session, &full, 0));
+ if (full < 100)
+ return (0);
+ else if (ret == 0)
+ continue;
+
+ /*
+ * The cache is still full and no pages were found in the queue
+ * to evict. If this transaction is the one holding back the
+ * oldest ID, we can't wait forever. We'll block next time we
+ * are not busy.
+ */
+ if (busy) {
+ __wt_txn_update_oldest(session);
+ if (txn_state->id == txn_global->oldest_id ||
+ txn_state->snap_min == txn_global->oldest_id)
+ return (0);
+ }
+
+ /* Wait for the queue to re-populate before trying again. */
+ WT_RET(__wt_cond_wait(session,
+ S2C(session)->cache->evict_waiter_cond, 100000));
+
+ /* Check if things have changed so that we are busy. */
+ if (!busy && txn_state->snap_min != WT_TXN_NONE &&
+ txn_global->current != txn_global->oldest_id)
+ busy = count = 1;
+ }
+}
#ifdef HAVE_DIAGNOSTIC
/*
* __wt_cache_dump --
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 99e2a6751be..5bbf3b891f7 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -413,12 +413,11 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref,
* Check for an append-only workload needing an in-memory split.
*
* We can't do this earlier because in-memory splits require exclusive
- * access. If an in-memory split completes, the page stays in memory
- * and the tree is left in the desired state: avoid the usual cleanup.
+ * access, and we can't split if a checkpoint is in progress because
+ * the checkpoint could be walking the parent page.
*
- * Attempt the split before checking whether a checkpoint is running -
- * that's not a problem here because we aren't evicting any dirty
- * pages.
+ * If an in-memory split completes, the page stays in memory and the
+ * tree is left in the desired state: avoid the usual cleanup.
*/
if (top && !exclusive) {
WT_RET(__wt_split_insert(session, ref, inmem_splitp));
diff --git a/src/third_party/wiredtiger/src/include/block.h b/src/third_party/wiredtiger/src/include/block.h
index f4677236d8b..9e737dc81d2 100644
--- a/src/third_party/wiredtiger/src/include/block.h
+++ b/src/third_party/wiredtiger/src/include/block.h
@@ -217,7 +217,6 @@ struct __wt_block {
/* Configuration information, set when the file is opened. */
int allocfirst; /* Allocation is first-fit */
- int allocfirst_save; /* Allocation is first-fit, saved */
uint32_t allocsize; /* Allocation size */
size_t os_cache; /* System buffer cache flush max */
size_t os_cache_max;
@@ -237,6 +236,10 @@ struct __wt_block {
WT_BLOCK_CKPT live; /* Live checkpoint */
int ckpt_inprogress;/* Live checkpoint in progress */
+ /* Compaction support */
+ int allocfirst_save; /* Saved: allocation is first-fit */
+ int compact_pct_tenths; /* Percent to compact */
+
/* Salvage support */
wt_off_t slvg_off; /* Salvage file offset */
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 7b3f6a10403..f64e80e7d12 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -27,39 +27,6 @@ __wt_page_is_modified(WT_PAGE *page)
}
/*
- * Estimate the per-allocation overhead. All implementations of malloc / free
- * have some kind of header and pad for alignment. We can't know for sure what
- * that adds up to, but this is an estimate based on some measurements of heap
- * size versus bytes in use.
- */
-#define WT_ALLOC_OVERHEAD 32U
-
-/*
- * Track a field in the cache. Use atomic CAS so that we can reliably avoid
- * decrementing the cache below zero - since we use an unsigned number.
- * Track if we would go below zero in a diagnostic build - something has gone
- * wrong.
- */
-#ifdef HAVE_DIAGNOSTIC
-#define WT_CACHE_DECR(session, f, sz) do { \
- uint64_t __val = f; \
- uint64_t __sz = WT_MIN(__val, sz); \
- if (__sz < sz) \
- __wt_errx(session, "%s underflow: decrementing %" \
- WT_SIZET_FMT, #f, sz); \
- while (!WT_ATOMIC_CAS8(f, __val, __val - __sz)) \
- __val = f, __sz = WT_MIN(__val, __sz); \
-} while (0)
-#else
-#define WT_CACHE_DECR(session, f, sz) do { \
- uint64_t __val = f; \
- uint64_t __sz = WT_MIN(__val, sz); \
- while (!WT_ATOMIC_CAS8(f, __val, __val - __sz)) \
- __val = f, __sz = WT_MIN(__val, __sz); \
-} while (0)
-#endif
-
-/*
* __wt_cache_page_inmem_incr --
* Increment a page's memory footprint in the cache.
*/
@@ -68,8 +35,6 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
{
WT_CACHE *cache;
- size += WT_ALLOC_OVERHEAD;
-
cache = S2C(session)->cache;
(void)WT_ATOMIC_ADD8(cache->bytes_inmem, size);
(void)WT_ATOMIC_ADD8(page->memory_footprint, size);
@@ -79,6 +44,34 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
}
}
+/*
+ * WT_CACHE_DECR --
+ * Macro to decrement a field by a size.
+ *
+ * Be defensive and don't underflow: a band-aid on a gaping wound, but underflow
+ * won't make things better no matter the problem (specifically, underflow makes
+ * eviction crazy trying to evict non-existent memory).
+ */
+#ifdef HAVE_DIAGNOSTIC
+#define WT_CACHE_DECR(session, f, sz) do { \
+ static int __first = 1; \
+ if (WT_ATOMIC_SUB8(f, sz) > WT_EXABYTE) { \
+ (void)WT_ATOMIC_ADD8(f, sz); \
+ if (__first) { \
+ __wt_errx(session, \
+ "%s underflow: decrementing %" WT_SIZET_FMT,\
+ #f, sz); \
+ __first = 0; \
+ } \
+ } \
+} while (0)
+#else
+#define WT_CACHE_DECR(s, f, sz) do { \
+ if (WT_ATOMIC_SUB8(f, sz) > WT_EXABYTE) \
+ (void)WT_ATOMIC_ADD8(f, sz); \
+} while (0)
+#endif
+
/*
* __wt_cache_page_inmem_decr --
* Decrement a page's memory footprint in the cache.
@@ -88,9 +81,8 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
{
WT_CACHE *cache;
- size += WT_ALLOC_OVERHEAD;
-
cache = S2C(session)->cache;
+
WT_CACHE_DECR(session, cache->bytes_inmem, size);
WT_CACHE_DECR(session, page->memory_footprint, size);
if (__wt_page_is_modified(page)) {
diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i
index ee969255241..0295451ef11 100644
--- a/src/third_party/wiredtiger/src/include/cache.i
+++ b/src/third_party/wiredtiger/src/include/cache.i
@@ -90,6 +90,10 @@ __wt_eviction_check(WT_SESSION_IMPL *session, int *fullp, int wake)
dirty_inuse = cache->bytes_dirty;
bytes_max = conn->cache_size + 1;
+ /* Adjust the cache size to take allocation overhead into account. */
+ if (conn->cache_overhead != 0)
+ bytes_max -= (bytes_max * (uint64_t)conn->cache_overhead) / 100;
+
/* Calculate the cache full percentage. */
*fullp = (int)((100 * bytes_inuse) / bytes_max);
@@ -98,6 +102,7 @@ __wt_eviction_check(WT_SESSION_IMPL *session, int *fullp, int wake)
(bytes_inuse > (cache->eviction_trigger * bytes_max) / 100 ||
dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100))
WT_RET(__wt_evict_server_wake(session));
+
return (0);
}
@@ -136,10 +141,7 @@ static inline int
__wt_cache_full_check(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
- WT_DECL_RET;
- WT_TXN_GLOBAL *txn_global;
- WT_TXN_STATE *txn_state;
- int busy, count, full;
+ int full;
/*
* LSM sets the no-cache-check flag when holding the LSM tree lock, in
@@ -162,73 +164,15 @@ __wt_cache_full_check(WT_SESSION_IMPL *session)
* Only wake the eviction server the first time through here (if the
* cache is too full).
*
- * If the cache is less than 95% full, no work to be done.
+ * If the cache is less than 95% full, no work to be done. If we are
+ * at the API boundary and the cache is more than 95% full, try to
+ * evict at least one page before we start an operation. This helps
+ * with some eviction-dominated workloads.
*/
WT_RET(__wt_eviction_check(session, &full, 1));
if (full < 95)
return (0);
- /*
- * If we are at the API boundary and the cache is more than 95% full,
- * try to evict at least one page before we start an operation. This
- * helps with some eviction-dominated workloads.
- *
- * If the current transaction is keeping the oldest ID pinned, it is in
- * the middle of an operation. This may prevent the oldest ID from
- * moving forward, leading to deadlock, so only evict what we can.
- * Otherwise, we are at a transaction boundary and we can work harder
- * to make sure there is free space in the cache.
- */
- txn_global = &S2C(session)->txn_global;
- txn_state = &txn_global->states[session->id];
- busy = txn_state->id != WT_TXN_NONE ||
- session->nhazard > 0 ||
- (txn_state->snap_min != WT_TXN_NONE &&
- txn_global->current != txn_global->oldest_id);
- if (busy && full < 100)
- return (0);
- count = busy ? 1 : 10;
-
- for (;;) {
- switch (ret = __wt_evict_lru_page(session, 1)) {
- case 0:
- if (--count == 0)
- return (0);
- break;
- case EBUSY:
- continue;
- case WT_NOTFOUND:
- break;
- default:
- return (ret);
- }
-
- WT_RET(__wt_eviction_check(session, &full, 0));
- if (full < 100)
- return (0);
- else if (ret == 0)
- continue;
-
- /*
- * The cache is still full and no pages were found in the queue
- * to evict. If this transaction is the one holding back the
- * oldest ID, we can't wait forever. We'll block next time we
- * are not busy.
- */
- if (busy) {
- __wt_txn_update_oldest(session);
- if (txn_state->id == txn_global->oldest_id ||
- txn_state->snap_min == txn_global->oldest_id)
- return (0);
- }
-
- /* Wait for the queue to re-populate before trying again. */
- WT_RET(__wt_cond_wait(session,
- S2C(session)->cache->evict_waiter_cond, 100000));
-
- /* Check if things have changed so that we are busy. */
- if (!busy && txn_state->snap_min != WT_TXN_NONE &&
- txn_global->current != txn_global->oldest_id)
- busy = count = 1;
- }
+ return (__wt_cache_wait(session, full));
}
+
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index c5723882489..ff34b014ecf 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -196,7 +196,8 @@ struct __wt_connection_impl {
uint32_t hazard_max; /* Hazard array size */
WT_CACHE *cache; /* Page cache */
- uint64_t cache_size;
+ int cache_overhead; /* Cache percent adjustment */
+ uint64_t cache_size; /* Configured cache size */
WT_TXN_GLOBAL txn_global; /* Global transaction state */
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 35b8dfc113e..ee9c27581c8 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -280,7 +280,7 @@ extern int __wt_cursor_get_valuev(WT_CURSOR *cursor, va_list ap);
extern void __wt_cursor_set_value(WT_CURSOR *cursor, ...);
extern void __wt_cursor_set_valuev(WT_CURSOR *cursor, va_list ap);
extern int __wt_cursor_close(WT_CURSOR *cursor);
-extern int __wt_cursor_equal(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp);
+extern int __wt_cursor_equals(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp);
extern int __wt_cursor_reconfigure(WT_CURSOR *cursor, const char *config);
extern int __wt_cursor_dup_position(WT_CURSOR *to_dup, WT_CURSOR *cursor);
extern int __wt_cursor_init(WT_CURSOR *cursor, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp);
@@ -298,7 +298,8 @@ extern int __wt_evict_destroy(WT_SESSION_IMPL *session);
extern int __wt_evict_page(WT_SESSION_IMPL *session, WT_REF *ref);
extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session);
extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session);
-extern int __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_app);
+extern int __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_server);
+extern int __wt_cache_wait(WT_SESSION_IMPL *session, int full);
extern void __wt_cache_dump(WT_SESSION_IMPL *session);
extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive);
extern void __wt_rec_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref);
diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h
index 17a3fd5a23a..f88a5381227 100644
--- a/src/third_party/wiredtiger/src/include/log.h
+++ b/src/third_party/wiredtiger/src/include/log.h
@@ -135,6 +135,8 @@ typedef struct {
/* Notify any waiting threads when sync_lsn is updated. */
WT_CONDVAR *log_sync_cond;
+ /* Notify any waiting threads when write_lsn is updated. */
+ WT_CONDVAR *log_write_cond;
/*
* Consolidation array information
diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h
index 97fc2a47f20..71d6e802fa8 100644
--- a/src/third_party/wiredtiger/src/include/misc.h
+++ b/src/third_party/wiredtiger/src/include/misc.h
@@ -21,6 +21,7 @@
#define WT_GIGABYTE (1073741824)
#define WT_TERABYTE ((uint64_t)1099511627776)
#define WT_PETABYTE ((uint64_t)1125899906842624)
+#define WT_EXABYTE ((uint64_t)1152921504606846976)
/*
* Number of directory entries can grow dynamically.
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 6efb9970065..62917f303fd 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -176,7 +176,9 @@ struct __wt_connection_stats {
WT_STATS cache_eviction_slow;
WT_STATS cache_eviction_split;
WT_STATS cache_eviction_walk;
+ WT_STATS cache_eviction_worker_evicting;
WT_STATS cache_inmem_split;
+ WT_STATS cache_overhead;
WT_STATS cache_pages_dirty;
WT_STATS cache_pages_inuse;
WT_STATS cache_read;
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 656181790ed..f5f9b662e6f 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -382,11 +382,8 @@ __wt_txn_am_oldest(WT_SESSION_IMPL *session)
return (0);
WT_ORDERED_READ(session_cnt, conn->session_cnt);
- for (i = 0, s = txn_global->states;
- i < session_cnt;
- i++, s++)
- if ((id = s->id) != WT_TXN_NONE &&
- TXNID_LT(id, txn->id))
+ for (i = 0, s = txn_global->states; i < session_cnt; i++, s++)
+ if ((id = s->id) != WT_TXN_NONE && TXNID_LT(id, txn->id))
return (0);
return (1);
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 982e850241b..21a4a657506 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -1537,9 +1537,18 @@ struct __wt_connection {
* number of worker threads to service asynchronous requests., an
* integer between 1 and 20; default \c 2.}
* @config{ ),,}
+ * @config{cache_overhead, assume the heap allocator overhead is the
+ * specified percentage\, and adjust the cache size by that amount (for
+ * example\, if the cache size is 100GB\, a percentage of 10 means
+ * WiredTiger limits itself to allocating 90GB of memory). This value is
+ * configurable because different heap allocators have different
+ * overhead and different workloads will have different heap allocation
+ * sizes and patterns\, therefore applications may need to adjust this
+ * value based on allocator choice and behavior in measured workloads.,
+ * an integer between 0 and 30; default \c 8.}
* @config{cache_size, maximum heap memory to allocate for the cache. A
- * database should configure either a cache_size or a shared_cache not
- * both., an integer between 1MB and 10TB; default \c 100MB.}
+ * database should configure either \c cache_size or \c shared_cache but
+ * not both., an integer between 1MB and 10TB; default \c 100MB.}
* @config{checkpoint = (, periodically checkpoint the database., a set
* of related configuration options defined below.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;log_size, wait for this amount of log
@@ -1864,9 +1873,17 @@ struct __wt_connection {
* I/O. The default value of -1 indicates a platform-specific alignment value
* should be used (4KB on Linux systems\, zero elsewhere)., an integer between
* -1 and 1MB; default \c -1.}
+ * @config{cache_overhead, assume the heap allocator overhead is the specified
+ * percentage\, and adjust the cache size by that amount (for example\, if the
+ * cache size is 100GB\, a percentage of 10 means WiredTiger limits itself to
+ * allocating 90GB of memory). This value is configurable because different heap
+ * allocators have different overhead and different workloads will have
+ * different heap allocation sizes and patterns\, therefore applications may
+ * need to adjust this value based on allocator choice and behavior in measured
+ * workloads., an integer between 0 and 30; default \c 8.}
* @config{cache_size, maximum heap memory to allocate for the cache. A
- * database should configure either a cache_size or a shared_cache not both., an
- * integer between 1MB and 10TB; default \c 100MB.}
+ * database should configure either \c cache_size or \c shared_cache but not
+ * both., an integer between 1MB and 10TB; default \c 100MB.}
* @config{checkpoint = (, periodically checkpoint the database., a set of
* related configuration options defined below.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;log_size, wait for this amount of log record
@@ -3225,184 +3242,188 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_CACHE_EVICTION_SPLIT 1042
/*! cache: pages walked for eviction */
#define WT_STAT_CONN_CACHE_EVICTION_WALK 1043
+/*! cache: eviction worker thread evicting pages */
+#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1044
/*! cache: in-memory page splits */
-#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1044
+#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1045
+/*! cache: percentage overhead */
+#define WT_STAT_CONN_CACHE_OVERHEAD 1046
/*! cache: tracked dirty pages in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1045
+#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1047
/*! cache: pages currently held in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_INUSE 1046
+#define WT_STAT_CONN_CACHE_PAGES_INUSE 1048
/*! cache: pages read into cache */
-#define WT_STAT_CONN_CACHE_READ 1047
+#define WT_STAT_CONN_CACHE_READ 1049
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 1048
+#define WT_STAT_CONN_CACHE_WRITE 1050
/*! connection: pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 1049
+#define WT_STAT_CONN_COND_WAIT 1051
/*! cursor: cursor create calls */
-#define WT_STAT_CONN_CURSOR_CREATE 1050
+#define WT_STAT_CONN_CURSOR_CREATE 1052
/*! cursor: cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 1051
+#define WT_STAT_CONN_CURSOR_INSERT 1053
/*! cursor: cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 1052
+#define WT_STAT_CONN_CURSOR_NEXT 1054
/*! cursor: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1053
+#define WT_STAT_CONN_CURSOR_PREV 1055
/*! cursor: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1054
+#define WT_STAT_CONN_CURSOR_REMOVE 1056
/*! cursor: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1055
+#define WT_STAT_CONN_CURSOR_RESET 1057
/*! cursor: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1056
+#define WT_STAT_CONN_CURSOR_SEARCH 1058
/*! cursor: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1057
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1059
/*! cursor: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1058
+#define WT_STAT_CONN_CURSOR_UPDATE 1060
/*! data-handle: connection dhandles swept */
-#define WT_STAT_CONN_DH_CONN_HANDLES 1059
+#define WT_STAT_CONN_DH_CONN_HANDLES 1061
/*! data-handle: connection candidate referenced */
-#define WT_STAT_CONN_DH_CONN_REF 1060
+#define WT_STAT_CONN_DH_CONN_REF 1062
/*! data-handle: connection sweeps */
-#define WT_STAT_CONN_DH_CONN_SWEEPS 1061
+#define WT_STAT_CONN_DH_CONN_SWEEPS 1063
/*! data-handle: connection time-of-death sets */
-#define WT_STAT_CONN_DH_CONN_TOD 1062
+#define WT_STAT_CONN_DH_CONN_TOD 1064
/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1063
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1065
/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1064
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1066
/*! connection: files currently open */
-#define WT_STAT_CONN_FILE_OPEN 1065
+#define WT_STAT_CONN_FILE_OPEN 1067
/*! log: log buffer size increases */
-#define WT_STAT_CONN_LOG_BUFFER_GROW 1066
+#define WT_STAT_CONN_LOG_BUFFER_GROW 1068
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1067
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1069
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1068
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1070
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1069
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1071
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1070
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1072
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1071
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1073
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1072
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1074
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1073
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1075
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1074
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1076
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1075
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1077
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1076
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1078
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1077
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1079
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1078
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1080
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1079
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1081
/*! log: log read operations */
-#define WT_STAT_CONN_LOG_READS 1080
+#define WT_STAT_CONN_LOG_READS 1082
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1081
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1083
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1082
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1084
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1083
+#define WT_STAT_CONN_LOG_SCANS 1085
/*! log: consolidated slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1084
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1086
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1085
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1087
/*! log: consolidated slot joins */
-#define WT_STAT_CONN_LOG_SLOT_JOINS 1086
+#define WT_STAT_CONN_LOG_SLOT_JOINS 1088
/*! log: consolidated slot join races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1087
+#define WT_STAT_CONN_LOG_SLOT_RACES 1089
/*! log: slots selected for switching that were unavailable */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1088
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1090
/*! log: record size exceeded maximum */
-#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1089
+#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1091
/*! log: failed to find a slot large enough for record */
-#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1090
+#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1092
/*! log: consolidated slot join transitions */
-#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1091
+#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1093
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1092
+#define WT_STAT_CONN_LOG_SYNC 1094
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1093
+#define WT_STAT_CONN_LOG_WRITES 1095
/*! LSM: sleep for LSM checkpoint throttle */
-#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1094
+#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1096
/*! LSM: sleep for LSM merge throttle */
-#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1095
+#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1097
/*! LSM: rows merged in an LSM tree */
-#define WT_STAT_CONN_LSM_ROWS_MERGED 1096
+#define WT_STAT_CONN_LSM_ROWS_MERGED 1098
/*! LSM: application work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1097
+#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1099
/*! LSM: merge work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1098
+#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1100
/*! LSM: tree queue hit maximum */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1099
+#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1101
/*! LSM: switch work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1100
+#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1102
/*! LSM: tree maintenance operations scheduled */
-#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1101
+#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1103
/*! LSM: tree maintenance operations discarded */
-#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1102
+#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1104
/*! LSM: tree maintenance operations executed */
-#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1103
+#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1105
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1104
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1106
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1105
+#define WT_STAT_CONN_MEMORY_FREE 1107
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1106
+#define WT_STAT_CONN_MEMORY_GROW 1108
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1107
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1109
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1108
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1110
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1109
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1111
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1110
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1112
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1111
+#define WT_STAT_CONN_PAGE_SLEEP 1113
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1112
+#define WT_STAT_CONN_READ_IO 1114
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1113
+#define WT_STAT_CONN_REC_PAGES 1115
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1114
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1116
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1115
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1117
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1116
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1118
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1117
+#define WT_STAT_CONN_RWLOCK_READ 1119
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1118
+#define WT_STAT_CONN_RWLOCK_WRITE 1120
/*! session: open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1119
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1121
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1120
+#define WT_STAT_CONN_SESSION_OPEN 1122
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1121
+#define WT_STAT_CONN_TXN_BEGIN 1123
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1122
+#define WT_STAT_CONN_TXN_CHECKPOINT 1124
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1123
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1125
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1124
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1126
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1125
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1127
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1126
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1128
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1127
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1129
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1128
+#define WT_STAT_CONN_TXN_COMMIT 1130
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1129
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1131
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1130
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1132
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1131
+#define WT_STAT_CONN_TXN_ROLLBACK 1133
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1132
+#define WT_STAT_CONN_WRITE_IO 1134
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index e75946e9885..a173a829436 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -883,6 +883,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
while (LOG_CMP(&log->write_lsn, &slot->slot_release_lsn) != 0)
__wt_yield();
log->write_lsn = slot->slot_end_lsn;
+ WT_ERR(__wt_cond_signal(session, log->log_write_cond));
if (F_ISSET(slot, SLOT_CLOSEFH))
WT_ERR(__wt_cond_signal(session, conn->log_close_cond));
@@ -1651,6 +1652,12 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp,
myslot.slot->slot_error == 0)
(void)__wt_cond_wait(
session, log->log_sync_cond, 10000);
+ } else if (LF_ISSET(WT_LOG_FLUSH)) {
+ /* Wait for our writes to reach the OS */
+ while (LOG_CMP(&log->write_lsn, &lsn) <= 0 &&
+ myslot.slot->slot_error == 0)
+ (void)__wt_cond_wait(
+ session, log->log_write_cond, 10000);
}
err:
if (locked)
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
index 39b4b4508b7..bf5ae465664 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
@@ -1429,7 +1429,7 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
__wt_cursor_set_key, /* set-key */
__wt_cursor_set_value, /* set-value */
__clsm_compare, /* compare */
- __wt_cursor_equal, /* equals */
+ __wt_cursor_equals, /* equals */
__clsm_next, /* next */
__clsm_prev, /* prev */
__clsm_reset, /* reset */
diff --git a/src/third_party/wiredtiger/src/os_win/os_map.c b/src/third_party/wiredtiger/src/os_win/os_map.c
index 5c78f371889..3c4edb59ea8 100644
--- a/src/third_party/wiredtiger/src/os_win/os_map.c
+++ b/src/third_party/wiredtiger/src/os_win/os_map.c
@@ -99,7 +99,10 @@ __wt_munmap(WT_SESSION_IMPL *session, WT_FH *fh, void *map, size_t len,
fh->name, len);
}
- CloseHandle(*mappingcookie);
+ if (CloseHandle(*mappingcookie) == 0) {
+ WT_RET_MSG(session, __wt_errno(),
+ "CloseHandle: MapViewOfFile: %s", fh->name);
+ }
*mappingcookie = 0;
diff --git a/src/third_party/wiredtiger/src/os_win/os_open.c b/src/third_party/wiredtiger/src/os_win/os_open.c
index 387c7d2597b..bfcfb13fc3b 100644
--- a/src/third_party/wiredtiger/src/os_win/os_open.c
+++ b/src/third_party/wiredtiger/src/os_win/os_open.c
@@ -214,13 +214,13 @@ __wt_close(WT_SESSION_IMPL *session, WT_FH *fh)
* windows since it is not possible to sync a directory
*/
if (fh->filehandle != INVALID_HANDLE_VALUE &&
- !CloseHandle(fh->filehandle) != 0) {
+ CloseHandle(fh->filehandle) == 0) {
ret = __wt_errno();
__wt_err(session, ret, "CloseHandle: %s", fh->name);
}
if (fh->filehandle_secondary != INVALID_HANDLE_VALUE &&
- !CloseHandle(fh->filehandle_secondary) != 0) {
+ CloseHandle(fh->filehandle_secondary) == 0) {
ret = __wt_errno();
__wt_err(session, ret, "CloseHandle: secondary: %s", fh->name);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_thread.c b/src/third_party/wiredtiger/src/os_win/os_thread.c
index 3ecfe2796d5..05f7dc15914 100644
--- a/src/third_party/wiredtiger/src/os_win/os_thread.c
+++ b/src/third_party/wiredtiger/src/os_win/os_thread.c
@@ -33,10 +33,19 @@ __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid)
{
WT_DECL_RET;
- if ((ret = WaitForSingleObject(tid, INFINITE)) == WAIT_OBJECT_0)
- return (0);
+ if ((ret = WaitForSingleObject(tid, INFINITE)) != WAIT_OBJECT_0)
+ /*
+ * If we fail to wait, we will leak handles so do not continue
+ */
+ WT_PANIC_RET(session, ret == WAIT_FAILED ? __wt_errno() : ret,
+ "Wait for thread join failed");
+
+ if (CloseHandle(tid) == 0) {
+ WT_RET_MSG(session, __wt_errno(),
+ "CloseHandle: thread join");
+ }
- WT_RET_MSG(session, ret, "WaitForSingleObject");
+ return (0);
}
/*
diff --git a/src/third_party/wiredtiger/src/schema/schema_open.c b/src/third_party/wiredtiger/src/schema/schema_open.c
index d613ced00aa..fa655c7108b 100644
--- a/src/third_party/wiredtiger/src/schema/schema_open.c
+++ b/src/third_party/wiredtiger/src/schema/schema_open.c
@@ -269,6 +269,7 @@ __wt_schema_open_index(WT_SESSION_IMPL *session,
cursor = NULL;
idx = NULL;
+ match = 0;
/* Build a search key. */
tablename = table->name;
@@ -343,6 +344,8 @@ __wt_schema_open_index(WT_SESSION_IMPL *session,
break;
}
WT_ERR_NOTFOUND_OK(ret);
+ if (idxname != NULL && !match)
+ ret = WT_NOTFOUND;
/* If we did a full pass, we won't need to do it again. */
if (idxname == NULL) {
@@ -557,6 +560,8 @@ __wt_schema_get_index(WT_SESSION_IMPL *session,
/* Otherwise, open it. */
WT_ERR(__wt_schema_open_index(
session, table, tend + 1, strlen(tend + 1), indexp));
+ if (tablep != NULL)
+ *tablep = table;
err: __wt_schema_release_table(session, table);
WT_RET(ret);
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 223d62d0559..8ddac870e0e 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -360,6 +360,8 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats)
"cache: eviction server populating queue, but not evicting pages";
stats->cache_eviction_slow.desc =
"cache: eviction server unable to reach eviction goal";
+ stats->cache_eviction_worker_evicting.desc =
+ "cache: eviction worker thread evicting pages";
stats->cache_eviction_force_fail.desc =
"cache: failed eviction of pages that exceeded the in-memory maximum";
stats->cache_eviction_hazard.desc =
@@ -387,6 +389,7 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats)
"cache: pages split during eviction";
stats->cache_eviction_walk.desc = "cache: pages walked for eviction";
stats->cache_write.desc = "cache: pages written from cache";
+ stats->cache_overhead.desc = "cache: percentage overhead";
stats->cache_bytes_dirty.desc =
"cache: tracked dirty bytes in the cache";
stats->cache_pages_dirty.desc =
@@ -548,6 +551,7 @@ __wt_stat_refresh_connection_stats(void *stats_arg)
stats->cache_eviction_server_evicting.v = 0;
stats->cache_eviction_server_not_evicting.v = 0;
stats->cache_eviction_slow.v = 0;
+ stats->cache_eviction_worker_evicting.v = 0;
stats->cache_eviction_force_fail.v = 0;
stats->cache_eviction_hazard.v = 0;
stats->cache_inmem_split.v = 0;
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 0cd341185f2..eae21d0b9f5 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -341,9 +341,10 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* into the session cache, but we're going to do that eventually anyway.
*/
WT_WITH_SCHEMA_LOCK(session,
- WT_WITH_DHANDLE_LOCK(session,
- ret = __checkpoint_apply_all(
- session, cfg, __wt_checkpoint_list, NULL)));
+ WT_WITH_TABLE_LOCK(session,
+ WT_WITH_DHANDLE_LOCK(session,
+ ret = __checkpoint_apply_all(
+ session, cfg, __wt_checkpoint_list, NULL))));
WT_ERR(ret);
/*
diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c
index b2515e3c8cd..62890db7895 100644
--- a/src/third_party/wiredtiger/src/txn/txn_recover.c
+++ b/src/third_party/wiredtiger/src/txn/txn_recover.c
@@ -452,12 +452,17 @@ __wt_txn_recover(WT_SESSION_IMPL *session)
if (WT_IS_INIT_LSN(&metafile->ckpt_lsn))
WT_ERR(__wt_log_scan(session,
NULL, WT_LOGSCAN_FIRST, __txn_log_recover, &r));
- else
+ else {
+ /*
+ * Start at the last checkpoint LSN referenced in the
+ * metadata. If we see the end of a checkpoint while
+ * scanning, we will change the full scan to start from
+ * there.
+ */
+ r.ckpt_lsn = metafile->ckpt_lsn;
WT_ERR(__wt_log_scan(session,
&metafile->ckpt_lsn, 0, __txn_log_recover, &r));
-
- WT_ASSERT(session,
- LOG_CMP(&r.ckpt_lsn, &conn->log->first_lsn) >= 0);
+ }
}
/* Scan the metadata to find the live files and their IDs. */
diff --git a/src/third_party/wiredtiger/tools/stat_data.py b/src/third_party/wiredtiger/tools/stat_data.py
index 89e06dbbf90..da6fb3a7f7f 100644
--- a/src/third_party/wiredtiger/tools/stat_data.py
+++ b/src/third_party/wiredtiger/tools/stat_data.py
@@ -6,6 +6,7 @@ no_scale_per_second_list = [
'cache: maximum bytes configured',
'cache: maximum page size at eviction',
'cache: pages currently held in the cache',
+ 'cache: percentage overhead',
'cache: tracked dirty bytes in the cache',
'cache: tracked dirty pages in the cache',
'connection: files currently open',
@@ -57,6 +58,7 @@ no_clear_list = [
'cache: bytes currently in the cache',
'cache: maximum bytes configured',
'cache: pages currently held in the cache',
+ 'cache: percentage overhead',
'connection: files currently open',
'log: maximum log file size',
'log: total log buffer size',