summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMark Benvenuto <mark.benvenuto@mongodb.com>2015-01-13 17:15:28 -0500
committerMark Benvenuto <mark.benvenuto@mongodb.com>2015-01-13 17:15:28 -0500
commit2fe68fa71b2c4316c6d409cbb6d9f5af13a2342a (patch)
treeef880d8922a9717a2d0bdf8a0ccb253fa916f32c /src
parentfb20325176b25317525b0cd6118586a386048e91 (diff)
downloadmongo-2fe68fa71b2c4316c6d409cbb6d9f5af13a2342a.tar.gz
Import wiredtiger-wiredtiger-2.5.0-92-gd56476d.tar.gz from wiredtiger branch mongodb-2.8
Diffstat (limited to 'src')
-rw-r--r--src/third_party/wiredtiger/dist/api_err.py84
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok2
-rw-r--r--src/third_party/wiredtiger/dist/s_symbols.list1
-rw-r--r--src/third_party/wiredtiger/dist/s_tags13
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py4
-rw-r--r--src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java1
-rw-r--r--src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackInputStream.java8
-rw-r--r--src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackOutputStream.java7
-rw-r--r--src/third_party/wiredtiger/lang/java/wiredtiger.i93
-rw-r--r--src/third_party/wiredtiger/lang/python/wiredtiger.i15
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c21
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_delete.c10
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c15
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c175
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_walk.c9
-rw-r--r--src/third_party/wiredtiger/src/conn/api_strerror.c77
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_ds.c1
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_log.c37
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c13
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c17
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h5
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i97
-rw-r--r--src/third_party/wiredtiger/src/include/cache.h2
-rw-r--r--src/third_party/wiredtiger/src/include/cache.i59
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.h2
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.i12
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h2
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h2
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h1
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i10
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in242
-rw-r--r--src/third_party/wiredtiger/src/include/wt_internal.h2
-rw-r--r--src/third_party/wiredtiger/src/log/log.c8
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_cursor.c7
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_errno.c46
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_errno.c98
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_ftruncate.c7
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_mtx_cond.c4
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_rename.c4
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_open.c7
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c7
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c6
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c14
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_log.c10
-rw-r--r--src/third_party/wiredtiger/tools/stat_data.py1
46 files changed, 902 insertions, 358 deletions
diff --git a/src/third_party/wiredtiger/dist/api_err.py b/src/third_party/wiredtiger/dist/api_err.py
index cb2c8cc588e..6c893c9af82 100644
--- a/src/third_party/wiredtiger/dist/api_err.py
+++ b/src/third_party/wiredtiger/dist/api_err.py
@@ -78,7 +78,7 @@ for line in open('../src/include/wiredtiger.in', 'r'):
tfile.close()
compare_srcfile(tmp_file, '../src/include/wiredtiger.in')
-# Output the wiredtiger_strerror code.
+# Output the wiredtiger_strerror and wiredtiger_sterror_r code.
tmp_file = '__tmp'
tfile = open(tmp_file, 'w')
tfile.write('''/* DO NOT EDIT: automatically built by dist/api_err.py. */
@@ -86,18 +86,22 @@ tfile.write('''/* DO NOT EDIT: automatically built by dist/api_err.py. */
#include "wt_internal.h"
/*
- * wiredtiger_strerror --
- *\tReturn a string for any error value.
+ * Historically, there was only the wiredtiger_strerror call because the POSIX
+ * port didn't need anything more complex; Windows requires memory allocation
+ * of error strings, so we added the wiredtiger_strerror_r call. Because we
+ * want wiredtiger_strerror to continue to be as thread-safe as possible, errors
+ * are split into three categories: WiredTiger constant strings, system constant
+ * strings and Everything Else, and we check constant strings before Everything
+ * Else.
*/
-const char *
-wiredtiger_strerror(int error)
-{
-\tstatic char errbuf[64];
-\tchar *p;
-
-\tif (error == 0)
-\t\treturn ("Successful return: 0");
+/*
+ * __wiredtiger_error --
+ *\tReturn a constant string for the WiredTiger errors.
+ */
+static const char *
+__wiredtiger_error(int error)
+{
\tswitch (error) {
''')
@@ -105,19 +109,51 @@ for err in errors:
tfile.write('\tcase ' + err.name + ':\n')
tfile.write('\t\treturn ("' + err.name + ': ' + err.desc + '");\n')
-tfile.write('''\
-\tdefault:
-\t\tif (error > 0 && (p = strerror(error)) != NULL)
-\t\t\treturn (p);
-\t\tbreak;
-\t}
-
-\t/*
-\t * !!!
-\t * Not thread-safe, but this is never supposed to happen.
-\t */
-\t(void)snprintf(errbuf, sizeof(errbuf), "Unknown error: %d", error);
-\treturn (errbuf);
+tfile.write('''\t}
+\treturn (NULL);
+}
+
+/*
+ * wiredtiger_strerror --
+ *\tReturn a string for any error value, non-thread-safe version.
+ */
+const char *
+wiredtiger_strerror(int error)
+{
+\tstatic char buf[128];
+\tconst char *p;
+
+\t/* Check for a constant string. */
+\tif ((p = __wiredtiger_error(error)) != NULL ||
+\t (p = __wt_strerror(error)) != NULL)
+\t\treturn (p);
+
+\t/* Else, fill in the non-thread-safe static buffer. */
+\tif (wiredtiger_strerror_r(error, buf, sizeof(buf)) != 0)
+\t\t(void)snprintf(buf, sizeof(buf), "error return: %d", error);
+
+\treturn (buf);
+}
+
+/*
+ * wiredtiger_strerror_r --
+ *\tReturn a string for any error value, thread-safe version.
+ */
+int
+wiredtiger_strerror_r(int error, char *buf, size_t buflen)
+{
+\tconst char *p;
+
+\t/* Require at least 2 bytes, printable character and trailing nul. */
+\tif (buflen < 2)
+\t\treturn (ENOMEM);
+
+\t/* Check for a constant string. */
+\tif ((p = __wiredtiger_error(error)) != NULL ||
+\t (p = __wt_strerror(error)) != NULL)
+\t\treturn (snprintf(buf, buflen, "%s", p) > 0 ? 0 : ENOMEM);
+
+\treturn (__wt_strerror_r(error, buf, buflen));
}
''')
tfile.close()
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index db1114b77de..d3717d27331 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -181,6 +181,7 @@ KV
KVS
Kanowski's
Kounavis
+LANGID
LEX
LF
LIBBZ
@@ -437,6 +438,7 @@ btmem
btree
btrees
buf
+buflen
bufs
bufsz
builtin
diff --git a/src/third_party/wiredtiger/dist/s_symbols.list b/src/third_party/wiredtiger/dist/s_symbols.list
index d3803bc3afa..8f469e94433 100644
--- a/src/third_party/wiredtiger/dist/s_symbols.list
+++ b/src/third_party/wiredtiger/dist/s_symbols.list
@@ -8,6 +8,7 @@ wiredtiger_pack_start
wiredtiger_pack_str
wiredtiger_pack_uint
wiredtiger_strerror
+wiredtiger_strerror_r
wiredtiger_struct_pack
wiredtiger_struct_size
wiredtiger_struct_unpack
diff --git a/src/third_party/wiredtiger/dist/s_tags b/src/third_party/wiredtiger/dist/s_tags
index 908b5eb7e0d..faed132d05b 100644
--- a/src/third_party/wiredtiger/dist/s_tags
+++ b/src/third_party/wiredtiger/dist/s_tags
@@ -35,10 +35,19 @@ ctags $flags ../src/include/*.in ../src/*/*.[chi] 2>/dev/null)
rm -f tags
ctags $flags ../include/*.in ../*/*.[chi] 2>/dev/null)
+# Link the tags file into place if we're at the right level.
+link_tag()
+{
+ if test -e ../include/tags; then
+ rm -f tags && ln -s ../include/tags .
+ fi
+}
+
# Link to the tags file from standard build and source directories.
dirs="`python -c 'import dist; dist.print_source_dirs()'` ../src/os_win"
for i in $dirs; do
- if ! expr "$i" : ".*/include" > /dev/null; then
- (cd $i && rm -f tags && ln -s ../include/tags .)
+ if expr "$i" : ".*/include" > /dev/null; then
+ continue
fi
+ (cd $i && link_tag)
done
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index a6a047fd10e..69e8d2ed21e 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -162,10 +162,14 @@ connection_stats = [
'pages selected for eviction unable to be evicted'),
CacheStat('cache_eviction_force',
'pages evicted because they exceeded the in-memory maximum'),
+ CacheStat('cache_eviction_force_delete',
+ 'pages evicted because they had chains of deleted items'),
CacheStat('cache_eviction_force_fail',
'failed eviction of pages that exceeded the in-memory maximum'),
CacheStat('cache_eviction_hazard', 'hazard pointer blocked page eviction'),
CacheStat('cache_eviction_internal', 'internal pages evicted'),
+ CacheStat('cache_eviction_maximum_page_size',
+ 'maximum page size at eviction', 'max_aggregate,no_scale'),
CacheStat('cache_eviction_queue_empty',
'eviction server candidate queue empty when topping up'),
CacheStat('cache_eviction_queue_not_empty',
diff --git a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java
index c9d1c43d32d..c53938d0a58 100644
--- a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java
+++ b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackFormatInputStream.java
@@ -85,7 +85,6 @@ public class PackFormatInputStream {
protected char getType()
throws WiredTigerPackingException {
if (formatOff >= format.length()) {
- System.err.println("Raw format is: " + format);
throw new WiredTigerPackingException(
"No more fields in format.");
}
diff --git a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackInputStream.java b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackInputStream.java
index 75bdb3119a9..a49b2e01f17 100644
--- a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackInputStream.java
+++ b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackInputStream.java
@@ -225,6 +225,7 @@ public class PackInputStream {
public String getString()
throws WiredTigerPackingException {
int stringLength = 0;
+ int skipnull = 0;
format.checkType('S', false);
// Get the length for a fixed length string
if (format.getType() != 'S') {
@@ -235,10 +236,11 @@ public class PackInputStream {
// string length.
for (; valueOff + stringLength < value.length &&
value[valueOff + stringLength] != 0; stringLength++) {}
+ skipnull = 1;
}
format.consume();
String result = new String(value, valueOff, stringLength);
- valueOff += stringLength + 1;
+ valueOff += stringLength + skipnull;
return result;
}
@@ -250,7 +252,7 @@ public class PackInputStream {
private short unpackShort(boolean signed)
throws WiredTigerPackingException {
long ret = unpackLong(true);
- if ((signed && (ret > Short.MAX_VALUE || ret > Short.MIN_VALUE)) ||
+ if ((signed && (ret > Short.MAX_VALUE || ret < Short.MIN_VALUE)) ||
(!signed && (short)ret < 0)) {
throw new WiredTigerPackingException("Overflow unpacking short.");
}
@@ -265,7 +267,7 @@ public class PackInputStream {
private int unpackInt(boolean signed)
throws WiredTigerPackingException {
long ret = unpackLong(true);
- if ((signed && (ret > Integer.MAX_VALUE || ret > Integer.MIN_VALUE)) ||
+ if ((signed && (ret > Integer.MAX_VALUE || ret < Integer.MIN_VALUE)) ||
(!signed && (int)ret < 0)) {
throw new WiredTigerPackingException("Overflow unpacking integer.");
}
diff --git a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackOutputStream.java b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackOutputStream.java
index 60f40564afd..e79b4c63498 100644
--- a/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackOutputStream.java
+++ b/src/third_party/wiredtiger/lang/java/src/com/wiredtiger/db/PackOutputStream.java
@@ -174,13 +174,16 @@ public class PackOutputStream {
char fieldFormat = format.getType();
int stringLen = 0;
int padBytes = 0;
+ int valLen = 0;
// Strings have two possible encodings. A lower case 's' is not null
// terminated, and has a length define in the format (default 1). An
// upper case 'S' is variable length and has a null terminator.
if (fieldFormat == 's') {
stringLen = format.getLengthFromFormat(true);
- if (stringLen > value.length()) {
- padBytes = stringLen - value.length();
+ valLen = value.length();
+ if (stringLen > valLen) {
+ padBytes = stringLen - valLen;
+ stringLen = valLen;
}
} else {
stringLen = value.length();
diff --git a/src/third_party/wiredtiger/lang/java/wiredtiger.i b/src/third_party/wiredtiger/lang/java/wiredtiger.i
index a922a7a6b2e..09290a70c67 100644
--- a/src/third_party/wiredtiger/lang/java/wiredtiger.i
+++ b/src/third_party/wiredtiger/lang/java/wiredtiger.i
@@ -652,6 +652,19 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
}
/**
+ * Append a record number to the async_op's key.
+ *
+ * \param value The value to append
+ * \return This async_op object, so put calls can be chained.
+ */
+ public AsyncOp putKeyRecord(long value)
+ throws WiredTigerPackingException {
+ keyUnpacker = null;
+ keyPacker.addRecord(value);
+ return this;
+ }
+
+ /**
* Append a short integer to the async_op's key.
*
* \param value The value to append
@@ -744,6 +757,19 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
}
/**
+ * Append a record number to the async_op's value.
+ *
+ * \param value The value to append
+ * \return This async_op object, so put calls can be chained.
+ */
+ public AsyncOp putValueRecord(long value)
+ throws WiredTigerPackingException {
+ valueUnpacker = null;
+ valuePacker.addRecord(value);
+ return this;
+ }
+
+ /**
* Append a short integer to the async_op's value.
*
* \param value The value to append
@@ -835,6 +861,16 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
}
/**
+ * Retrieve a record number from the async_op's key.
+ *
+ * \return The requested value.
+ */
+ public long getKeyRecord()
+ throws WiredTigerPackingException {
+ return getKeyUnpacker().getRecord();
+ }
+
+ /**
* Retrieve a short integer from the async_op's key.
*
* \return The requested value.
@@ -920,6 +956,16 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
}
/**
+ * Retrieve a record number from the async_op's value.
+ *
+ * \return The requested value.
+ */
+ public long getValueRecord()
+ throws WiredTigerPackingException {
+ return getValueUnpacker().getRecord();
+ }
+
+ /**
* Retrieve a short integer from the async_op's value.
*
* \return The requested value.
@@ -1202,6 +1248,18 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
}
/**
+ * Append a record number to the cursor's key.
+ *
+ * \param value The value to append
+ * \return This cursor object, so put calls can be chained.
+ */
+ public Cursor putKeyRecord(long value)
+ throws WiredTigerPackingException {
+ keyPacker.addRecord(value);
+ return this;
+ }
+
+ /**
* Append a short integer to the cursor's key.
*
* \param value The value to append
@@ -1288,6 +1346,18 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
}
/**
+ * Append a record number to the cursor's value.
+ *
+ * \param value The value to append
+ * \return This cursor object, so put calls can be chained.
+ */
+ public Cursor putValueRecord(long value)
+ throws WiredTigerPackingException {
+ valuePacker.addRecord(value);
+ return this;
+ }
+
+ /**
* Append a short integer to the cursor's value.
*
* \param value The value to append
@@ -1377,6 +1447,16 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
}
/**
+ * Retrieve a record number from the cursor's key.
+ *
+ * \return The requested value.
+ */
+ public long getKeyRecord()
+ throws WiredTigerPackingException {
+ return keyUnpacker.getRecord();
+ }
+
+ /**
* Retrieve a short integer from the cursor's key.
*
* \return The requested value.
@@ -1462,6 +1542,16 @@ WT_ASYNC_CALLBACK javaApiAsyncHandler = {javaAsyncHandler};
}
/**
+ * Retrieve a record number from the cursor's value.
+ *
+ * \return The requested value.
+ */
+ public long getValueRecord()
+ throws WiredTigerPackingException {
+ return valueUnpacker.getRecord();
+ }
+
+ /**
* Retrieve a short integer from the cursor's value.
*
* \return The requested value.
@@ -1801,7 +1891,8 @@ err: if (ret != 0)
if ((ret = $self->open_cursor($self, uri, to_dup, config, &cursor)) != 0)
goto err;
- cursor->flags |= WT_CURSTD_RAW;
+ if ((cursor->flags & WT_CURSTD_DUMP_JSON) == 0)
+ cursor->flags |= WT_CURSTD_RAW;
if ((ret = __wt_calloc_def((WT_SESSION_IMPL *)cursor->session,
1, &jcb)) != 0)
diff --git a/src/third_party/wiredtiger/lang/python/wiredtiger.i b/src/third_party/wiredtiger/lang/python/wiredtiger.i
index 974118d0f61..de5afb0a0fa 100644
--- a/src/third_party/wiredtiger/lang/python/wiredtiger.i
+++ b/src/third_party/wiredtiger/lang/python/wiredtiger.i
@@ -339,7 +339,9 @@ retry:
if (result != 0 && result != EBUSY)
SWIG_ERROR_IF_NOT_SET(result);
else if (result == EBUSY) {
+ SWIG_PYTHON_THREAD_BEGIN_ALLOW;
__wt_sleep(0, 10000);
+ SWIG_PYTHON_THREAD_END_ALLOW;
goto retry;
}
}
@@ -361,10 +363,19 @@ retry:
}
%enddef
-/* Cursor compare can return any of -1, 0, 1 or WT_NOTFOUND. */
+/* Cursor compare can return any of -1, 0, 1. */
%define COMPARE_OK(m)
%exception m {
$action
+ if (result < -1 || result > 1)
+ SWIG_ERROR_IF_NOT_SET(result);
+}
+%enddef
+
+/* Cursor compare can return any of -1, 0, 1 or WT_NOTFOUND. */
+%define COMPARE_NOTFOUND_OK(m)
+%exception m {
+ $action
if ((result < -1 || result > 1) && result != WT_NOTFOUND)
SWIG_ERROR_IF_NOT_SET(result);
}
@@ -379,7 +390,7 @@ NOTFOUND_OK(__wt_cursor::search)
NOTFOUND_OK(__wt_cursor::update)
COMPARE_OK(__wt_cursor::compare)
-COMPARE_OK(__wt_cursor::search_near)
+COMPARE_NOTFOUND_OK(__wt_cursor::search_near)
/* Lastly, some methods need no (additional) error checking. */
%exception __wt_connection::get_home;
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index 4de94277364..af9f6a669f2 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -408,11 +408,13 @@ __debug_tree_shape_info(WT_PAGE *page)
v = page->memory_footprint;
if (v >= WT_GIGABYTE)
- snprintf(buf, sizeof(buf), "(%" PRIu64 "G)", v / WT_GIGABYTE);
+ snprintf(buf, sizeof(buf),
+ "(%p %" PRIu64 "G)", page, v / WT_GIGABYTE);
else if (v >= WT_MEGABYTE)
- snprintf(buf, sizeof(buf), "(%" PRIu64 "M)", v / WT_MEGABYTE);
+ snprintf(buf, sizeof(buf),
+ "(%p %" PRIu64 "M)", page, v / WT_MEGABYTE);
else
- snprintf(buf, sizeof(buf), "(%" PRIu64 ")", v);
+ snprintf(buf, sizeof(buf), "(%p %" PRIu64 ")", page, v);
return (buf);
}
@@ -429,16 +431,16 @@ __debug_tree_shape_worker(WT_DBG *ds, WT_PAGE *page, int level)
session = ds->session;
if (page->type == WT_PAGE_ROW_INT || page->type == WT_PAGE_COL_INT) {
- __dmsg(ds, "%*s" "I" "%s\n",
- level, " ", __debug_tree_shape_info(page));
+ __dmsg(ds, "%*s" "I" "%d %s\n",
+ level * 3, " ", level, __debug_tree_shape_info(page));
WT_INTL_FOREACH_BEGIN(session, page, ref) {
if (ref->state == WT_REF_MEM)
__debug_tree_shape_worker(
- ds, ref->page, level + 3);
+ ds, ref->page, level + 1);
} WT_INTL_FOREACH_END;
} else
- __dmsg(ds, "%*s" "L" "%s\n",
- level, " ", __debug_tree_shape_info(page));
+ __dmsg(ds, "%*s" "L" " %s\n",
+ level * 3, " ", __debug_tree_shape_info(page));
}
/*
@@ -458,8 +460,7 @@ __wt_debug_tree_shape(
if (page == NULL)
page = S2BT(session)->root.page;
- WT_WITH_PAGE_INDEX(session,
- __debug_tree_shape_worker(ds, page, 0));
+ WT_WITH_PAGE_INDEX(session, __debug_tree_shape_worker(ds, page, 1));
__dmsg_wrapup(ds);
return (0);
diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c
index c97ea176c97..622dfb1b294 100644
--- a/src/third_party/wiredtiger/src/btree/bt_delete.c
+++ b/src/third_party/wiredtiger/src/btree/bt_delete.c
@@ -207,6 +207,9 @@ __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref)
{
int skip;
+ if (ref->state != WT_REF_DELETED)
+ return (0);
+
/*
* Deleted pages come from two sources: either it's a fast-delete as
* described above, or the page has been emptied by other operations
@@ -225,11 +228,14 @@ __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref)
* the page could switch to an in-memory state at any time. Lock down
* the structure, just to be safe.
*/
+ if (ref->page_del == NULL)
+ return (1);
+
if (!WT_ATOMIC_CAS4(ref->state, WT_REF_DELETED, WT_REF_LOCKED))
return (0);
- skip = ref->page_del == NULL ||
- __wt_txn_visible(session, ref->page_del->txnid) ? 1 : 0;
+ skip = (ref->page_del == NULL ||
+ __wt_txn_visible(session, ref->page_del->txnid));
WT_PUBLISH(ref->state, WT_REF_DELETED);
return (skip);
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index 181ffdb3736..561e1c19218 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -37,8 +37,11 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_PAGE *page)
page->type != WT_PAGE_ROW_LEAF)
return (0);
- /* Eviction may be turned off, although that's rare. */
- if (F_ISSET(btree, WT_BTREE_NO_EVICTION))
+ /*
+ * Eviction may be turned off (although that's rare), or we may be in
+ * the middle of a checkpoint.
+ */
+ if (F_ISSET(btree, WT_BTREE_NO_EVICTION) || btree->checkpointing)
return (0);
/*
@@ -128,7 +131,13 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
force_attempts < 10 &&
__evict_force_check(session, page)) {
++force_attempts;
- WT_RET(__wt_page_release(session, ref, flags));
+ if ((ret = __wt_page_release_busy(
+ session, ref, flags)) == EBUSY) {
+ /* If forced eviction fails, stall. */
+ ret = 0;
+ wait_cnt += 1000;
+ } else
+ WT_RET(ret);
WT_STAT_FAST_CONN_INCR(
session, page_forcible_evict_blocked);
break;
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 1c62391f722..70d0758dede 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -9,15 +9,6 @@
#include "wt_internal.h"
/*
- * Tuning; global variables to allow the binary to be patched, we don't yet have
- * any real understanding of what might be useful to surface to applications.
- */
-static u_int __split_deepen_max_internal_image = 100;
-static u_int __split_deepen_min_child = 10;
-static u_int __split_deepen_per_child = 100;
-static u_int __split_deepen_split_child = 100;
-
-/*
* Track allocation increments, matching the cache calculations, which add an
* estimate of allocation overhead to every object.
*/
@@ -177,45 +168,57 @@ __split_safe_free(WT_SESSION_IMPL *session, int exclusive, void *p, size_t s)
}
/*
+ * Tuning; global variables to allow the binary to be patched, we don't yet have
+ * any real understanding of what might be useful to surface to applications.
+ */
+static u_int __split_deepen_min_child = 10000;
+static u_int __split_deepen_per_child = 100;
+
+/*
* __split_should_deepen --
* Return if we should deepen the tree.
*/
static int
-__split_should_deepen(WT_SESSION_IMPL *session, WT_PAGE *page)
+__split_should_deepen(
+ WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *childrenp)
{
WT_PAGE_INDEX *pindex;
+ WT_PAGE *page;
- /*
- * Splits are based on either the number of child pages that will be
- * created by the split (splitting an internal page that will be slow
- * to search), or by the memory footprint of the parent page (avoiding
- * an internal page that will eat up all of the cache and put eviction
- * pressure on the system).
- */
+ *childrenp = 0;
+
+ page = ref->page;
pindex = WT_INTL_INDEX_COPY(page);
/*
* Deepen the tree if the page's memory footprint is larger than the
- * maximum size for a page in memory. We need an absolute minimum
- * number of entries in order to split the page: if there is a single
- * huge key, splitting won't help.
+ * maximum size for a page in memory (presumably putting eviction
+ * pressure on the cache).
*/
- if (page->memory_footprint > S2BT(session)->maxmempage &&
- pindex->entries >= __split_deepen_min_child)
- return (1);
+ if (page->memory_footprint < S2BT(session)->maxmempage)
+ return (0);
/*
- * Deepen the tree if the page's memory footprint is at least N
- * times the maximum internal page size chunk in the backing file and
- * the split will result in at least N children in the newly created
- * intermediate layer.
+ * Ensure the page has enough entries to make it worth splitting and
+ * we get a significant payback (in the case of a set of large keys,
+ * splitting won't help).
*/
- if (page->memory_footprint >
- __split_deepen_max_internal_image * S2BT(session)->maxintlpage &&
- pindex->entries >=
- (__split_deepen_per_child * __split_deepen_split_child))
+ if (pindex->entries > __split_deepen_min_child) {
+ *childrenp = pindex->entries / __split_deepen_per_child;
return (1);
+ }
+ /*
+ * The root is a special-case: if it's putting cache pressure on the
+ * system, split it even if there are only a few entries, we can't
+ * push it out of memory. Sanity check: if the root page is too big
+ * with less than 100 keys, there are huge keys and/or a too-small
+ * cache, there's not much to do.
+ */
+ if (__wt_ref_is_root(ref) && pindex->entries > 100) {
+ *childrenp = pindex->entries / 10;
+ return (1);
+ }
return (0);
}
@@ -383,7 +386,7 @@ __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page)
* Split an internal page in-memory, deepening the tree.
*/
static int
-__split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent)
+__split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
{
WT_DECL_RET;
WT_PAGE *child;
@@ -391,7 +394,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent)
WT_REF **alloc_refp;
WT_REF *child_ref, **child_refp, *parent_ref, **parent_refp, *ref;
size_t child_incr, parent_decr, parent_incr, size;
- uint32_t children, chunk, i, j, remain, slots;
+ uint32_t chunk, i, j, remain, slots;
int panic;
void *p;
@@ -401,13 +404,6 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent)
pindex = WT_INTL_INDEX_COPY(parent);
- /*
- * Create N children, unless we are dealing with a large page without
- * many entries, in which case split into the minimum number of pages.
- */
- children = WT_MAX(pindex->entries / __split_deepen_per_child,
- __split_deepen_min_child);
-
WT_STAT_FAST_CONN_INCR(session, cache_eviction_deepen);
WT_ERR(__wt_verbose(session, WT_VERB_SPLIT,
"%p: %" PRIu32 " elements, splitting into %" PRIu32 " children",
@@ -717,10 +713,11 @@ __split_multi_inmem(
/*
* We modified the page above, which will have set the first dirty
* transaction to the last transaction current running. However, the
- * updates we installed may be older than that. Inherit the first
- * dirty transaction from the original page.
+ * updates we installed may be older than that. Set the first dirty
+ * transaction to an impossibly old value so this page is never skipped
+ * in a checkpoint.
*/
- page->modify->first_dirty_txn = orig->modify->first_dirty_txn;
+ page->modify->first_dirty_txn = WT_TXN_FIRST;
err: /* Free any resources that may have been cached in the cursor. */
WT_TRET(__wt_btcur_close(&cbt));
@@ -813,17 +810,20 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
int exclusive, int ref_discard)
{
WT_DECL_RET;
+ WT_IKEY *ikey;
WT_PAGE *parent;
WT_PAGE_INDEX *alloc_index, *pindex;
- WT_REF **alloc_refp, *parent_ref;
+ WT_REF **alloc_refp, *next_ref, *parent_ref;
size_t size;
- uint32_t i, j, parent_entries, result_entries;
+ uint32_t children, i, j;
+ uint32_t deleted_entries, parent_entries, result_entries;
int complete, hazard, locked;
parent = NULL; /* -Wconditional-uninitialized */
- alloc_index = NULL;
+ alloc_index = pindex = NULL;
parent_ref = NULL;
complete = hazard = locked = 0;
+ parent_entries = 0;
/*
* Get a page-level lock on the parent to single-thread splits into the
@@ -864,7 +864,29 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
pindex = WT_INTL_INDEX_COPY(parent);
parent_entries = pindex->entries;
- result_entries = (parent_entries - 1) + new_entries;
+
+ /*
+ * Remove any refs to deleted pages while we are splitting, we have
+ * the internal page locked down, and are copying the refs into a new
+ * array anyway. Switch them to the special split state, so that any
+ * reading thread will restart.
+ */
+ for (i = 0, deleted_entries = 0; i < parent_entries; ++i) {
+ next_ref = pindex->index[i];
+ WT_ASSERT(session, next_ref->state != WT_REF_SPLIT);
+ if (next_ref->state == WT_REF_DELETED &&
+ next_ref->page_del == NULL &&
+ WT_ATOMIC_CAS4(next_ref->state,
+ WT_REF_DELETED, WT_REF_SPLIT))
+ deleted_entries++;
+ }
+
+ /*
+ * The final entry count consists of: The original count, plus any
+ * new pages, less any refs we are removing because they only
+ * contained deleted items, less 1 for the page being replaced.
+ */
+ result_entries = (parent_entries + new_entries) - (deleted_entries + 1);
/*
* Allocate and initialize a new page index array for the parent, then
@@ -876,8 +898,9 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
WT_MEMSIZE_ADD(parent_incr, size);
alloc_index->index = (WT_REF **)(alloc_index + 1);
alloc_index->entries = result_entries;
- for (alloc_refp = alloc_index->index, i = 0; i < parent_entries; ++i)
- if (pindex->index[i] == ref)
+ for (alloc_refp = alloc_index->index, i = 0; i < parent_entries; ++i) {
+ next_ref = pindex->index[i];
+ if (next_ref == ref)
for (j = 0; j < new_entries; ++j) {
ref_new[j]->home = parent;
*alloc_refp++ = ref_new[j];
@@ -889,8 +912,26 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
*/
ref_new[j] = NULL;
}
- else
- *alloc_refp++ = pindex->index[i];
+ else if (next_ref->state == WT_REF_SPLIT) {
+ /*
+ * We're discarding a deleted reference.
+ * Free any resources it holds.
+ */
+ if (parent->type == WT_PAGE_ROW_INT) {
+ WT_TRET(__split_ovfl_key_cleanup(
+ session, parent, next_ref));
+ ikey = __wt_ref_key_instantiated(next_ref);
+ if (ikey != NULL)
+ WT_TRET(__split_safe_free(session, 0,
+ ikey,
+ sizeof(WT_IKEY) + ikey->size));
+ }
+
+ WT_TRET(__split_safe_free(
+ session, 0, next_ref, sizeof(WT_REF)));
+ } else
+ *alloc_refp++ = next_ref;
+ }
/*
* Update the parent page's index: this update makes the split visible
@@ -977,11 +1018,30 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
* Do the check here because we've just grown the parent page and
* are holding it locked.
*/
- if (ret == 0 && !exclusive && __split_should_deepen(session, parent))
+ if (ret == 0 && !exclusive &&
+ !F_ISSET_ATOMIC(parent, WT_PAGE_REFUSE_DEEPEN) &&
+ __split_should_deepen(session, parent_ref, &children)) {
+ /*
+ * XXX
+ * Temporary hack to avoid a bug where the root page is split
+ * even when it's no longer doing any good.
+ */
+ uint64_t __a, __b;
+ __a = parent->memory_footprint;
WT_WITH_PAGE_INDEX(session,
- ret = __split_deepen(session, parent));
+ ret = __split_deepen(session, parent, children));
+ __b = parent->memory_footprint;
+ if (__b * 2 >= __a)
+ F_SET_ATOMIC(parent, WT_PAGE_REFUSE_DEEPEN);
+ }
-err: if (locked)
+err: if (!complete)
+ for (i = 0; i < parent_entries; ++i) {
+ next_ref = pindex->index[i];
+ if (next_ref->state == WT_REF_SPLIT)
+ next_ref->state = WT_REF_DELETED;
+ }
+ if (locked)
F_CLR_ATOMIC(parent, WT_PAGE_SPLITTING);
if (hazard)
@@ -1137,10 +1197,11 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
/*
* We modified the page above, which will have set the first dirty
* transaction to the last transaction current running. However, the
- * updates we installed may be older than that. Inherit the first
- * dirty transaction from the original page.
+ * updates we installed may be older than that. Set the first dirty
+ * transaction to an impossibly old value so this page is never skipped
+ * in a checkpoint.
*/
- right->modify->first_dirty_txn = page->modify->first_dirty_txn;
+ right->modify->first_dirty_txn = WT_TXN_FIRST;
/*
* Calculate how much memory we're moving: figure out how deep the skip
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index c74a7177401..a2b2a6bb7c8 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -208,6 +208,12 @@ restart: /*
break;
} else if (LF_ISSET(WT_READ_TRUNCATE)) {
/*
+ * Avoid pulling a deleted page back in to try
+ * to delete it again.
+ */
+ if (__wt_delete_page_skip(session, ref))
+ break;
+ /*
* If deleting a range, try to delete the page
* without instantiating it.
*/
@@ -242,8 +248,7 @@ restart: /*
* If iterating a cursor, try to skip deleted
* pages that are visible to us.
*/
- if (ref->state == WT_REF_DELETED &&
- __wt_delete_page_skip(session, ref))
+ if (__wt_delete_page_skip(session, ref))
break;
}
diff --git a/src/third_party/wiredtiger/src/conn/api_strerror.c b/src/third_party/wiredtiger/src/conn/api_strerror.c
index caf536b24f7..396ae7a3e0f 100644
--- a/src/third_party/wiredtiger/src/conn/api_strerror.c
+++ b/src/third_party/wiredtiger/src/conn/api_strerror.c
@@ -3,18 +3,22 @@
#include "wt_internal.h"
/*
- * wiredtiger_strerror --
- * Return a string for any error value.
+ * Historically, there was only the wiredtiger_strerror call because the POSIX
+ * port didn't need anything more complex; Windows requires memory allocation
+ * of error strings, so we added the wiredtiger_strerror_r call. Because we
+ * want wiredtiger_strerror to continue to be as thread-safe as possible, errors
+ * are split into three categories: WiredTiger constant strings, system constant
+ * strings and Everything Else, and we check constant strings before Everything
+ * Else.
*/
-const char *
-wiredtiger_strerror(int error)
-{
- static char errbuf[64];
- char *p;
-
- if (error == 0)
- return ("Successful return: 0");
+/*
+ * __wiredtiger_error --
+ * Return a constant string for the WiredTiger errors.
+ */
+static const char *
+__wiredtiger_error(int error)
+{
switch (error) {
case WT_ROLLBACK:
return ("WT_ROLLBACK: conflict between concurrent operations");
@@ -28,16 +32,49 @@ wiredtiger_strerror(int error)
return ("WT_PANIC: WiredTiger library panic");
case WT_RESTART:
return ("WT_RESTART: restart the operation (internal)");
- default:
- if (error > 0 && (p = strerror(error)) != NULL)
- return (p);
- break;
}
+ return (NULL);
+}
+
+/*
+ * wiredtiger_strerror --
+ * Return a string for any error value, non-thread-safe version.
+ */
+const char *
+wiredtiger_strerror(int error)
+{
+ static char buf[128];
+ const char *p;
+
+ /* Check for a constant string. */
+ if ((p = __wiredtiger_error(error)) != NULL ||
+ (p = __wt_strerror(error)) != NULL)
+ return (p);
+
+ /* Else, fill in the non-thread-safe static buffer. */
+ if (wiredtiger_strerror_r(error, buf, sizeof(buf)) != 0)
+ (void)snprintf(buf, sizeof(buf), "error return: %d", error);
+
+ return (buf);
+}
+
+/*
+ * wiredtiger_strerror_r --
+ * Return a string for any error value, thread-safe version.
+ */
+int
+wiredtiger_strerror_r(int error, char *buf, size_t buflen)
+{
+ const char *p;
+
+ /* Require at least 2 bytes, printable character and trailing nul. */
+ if (buflen < 2)
+ return (ENOMEM);
+
+ /* Check for a constant string. */
+ if ((p = __wiredtiger_error(error)) != NULL ||
+ (p = __wt_strerror(error)) != NULL)
+ return (snprintf(buf, buflen, "%s", p) > 0 ? 0 : ENOMEM);
- /*
- * !!!
- * Not thread-safe, but this is never supposed to happen.
- */
- (void)snprintf(errbuf, sizeof(errbuf), "Unknown error: %d", error);
- return (errbuf);
+ return (__wt_strerror_r(error, buf, buflen));
}
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c
index cf129531dd4..91f82a5105b 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache.c
@@ -144,6 +144,8 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session)
WT_STAT_SET(stats, cache_bytes_inuse, __wt_cache_bytes_inuse(cache));
WT_STAT_SET(stats, cache_pages_inuse, __wt_cache_pages_inuse(cache));
WT_STAT_SET(stats, cache_bytes_dirty, cache->bytes_dirty);
+ WT_STAT_SET(stats,
+ cache_eviction_maximum_page_size, cache->evict_max_page_size);
WT_STAT_SET(stats, cache_pages_dirty, cache->pages_dirty);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_ds.c b/src/third_party/wiredtiger/src/cursor/cur_ds.c
index c8b8f6c4547..2cb791de85d 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_ds.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_ds.c
@@ -478,7 +478,6 @@ __wt_curds_open(
cursor = &data_source->iface;
*cursor = iface;
cursor->session = &session->iface;
- F_SET(cursor, WT_CURSTD_DATA_SOURCE);
/*
* XXX
diff --git a/src/third_party/wiredtiger/src/cursor/cur_log.c b/src/third_party/wiredtiger/src/cursor/cur_log.c
index 04ab1e2a14a..e3089e9fb83 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_log.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_log.c
@@ -150,6 +150,7 @@ static int
__curlog_kv(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
{
WT_CURSOR_LOG *cl;
+ WT_ITEM item;
uint32_t fileid, key_count, opsize, optype;
cl = (WT_CURSOR_LOG *)cursor;
@@ -180,11 +181,37 @@ __curlog_kv(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
* The log cursor sets the LSN and step count as the cursor key and
* and log record related data in the value. The data in the value
* contains any operation key/value that was in the log record.
+ * For the special case that the caller needs the result in raw form,
+ * we create packed versions of the key/value.
*/
- __wt_cursor_set_key(cursor, cl->cur_lsn->file, cl->cur_lsn->offset,
- key_count);
- __wt_cursor_set_value(cursor, cl->txnid, cl->rectype, optype,
- fileid, cl->opkey, cl->opvalue);
+ if (FLD_ISSET(cursor->flags, WT_CURSTD_RAW)) {
+ memset(&item, 0, sizeof(item));
+ WT_RET(wiredtiger_struct_size((WT_SESSION *)session,
+ &item.size, LOGC_KEY_FORMAT, cl->cur_lsn->file,
+ cl->cur_lsn->offset, key_count));
+ WT_RET(__wt_realloc(session, NULL, item.size, &cl->packed_key));
+ item.data = cl->packed_key;
+ WT_RET(wiredtiger_struct_pack((WT_SESSION *)session,
+ cl->packed_key, item.size, LOGC_KEY_FORMAT,
+ cl->cur_lsn->file, cl->cur_lsn->offset, key_count));
+ __wt_cursor_set_key(cursor, &item);
+
+ WT_RET(wiredtiger_struct_size((WT_SESSION *)session,
+ &item.size, LOGC_VALUE_FORMAT, cl->txnid, cl->rectype,
+ optype, fileid, cl->opkey, cl->opvalue));
+ WT_RET(__wt_realloc(session, NULL, item.size,
+ &cl->packed_value));
+ item.data = cl->packed_value;
+ WT_RET(wiredtiger_struct_pack((WT_SESSION *)session,
+ cl->packed_value, item.size, LOGC_VALUE_FORMAT, cl->txnid,
+ cl->rectype, optype, fileid, cl->opkey, cl->opvalue));
+ __wt_cursor_set_value(cursor, &item);
+ } else {
+ __wt_cursor_set_key(cursor, cl->cur_lsn->file,
+ cl->cur_lsn->offset, key_count);
+ __wt_cursor_set_value(cursor, cl->txnid, cl->rectype, optype,
+ fileid, cl->opkey, cl->opvalue);
+ }
return (0);
}
@@ -295,6 +322,8 @@ __curlog_close(WT_CURSOR *cursor)
__wt_scr_free(session, &cl->logrec);
__wt_scr_free(session, &cl->opkey);
__wt_scr_free(session, &cl->opvalue);
+ __wt_free(session, cl->packed_key);
+ __wt_free(session, cl->packed_value);
WT_TRET(__wt_cursor_close(cursor));
err: API_END_RET(session, ret);
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 60a5f82f233..a4ae0aaf55b 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -437,7 +437,7 @@ __evict_pass(WT_SESSION_IMPL *session)
WT_EVICT_WORKER *worker;
int loop;
uint32_t flags;
- uint64_t bytes_inuse, pages_evicted;
+ uint64_t bytes_inuse, dirty_target_size, pages_evicted, target_size;
conn = S2C(session);
cache = conn->cache;
@@ -465,9 +465,16 @@ __evict_pass(WT_SESSION_IMPL *session)
if (loop > 10)
LF_SET(WT_EVICT_PASS_AGGRESSIVE);
- /* Start a worker if we have capacity and the cache is full. */
+ /*
+ * Start a worker if we have capacity and we haven't reached
+ * the eviction targets.
+ */
bytes_inuse = __wt_cache_bytes_inuse(cache);
- if (bytes_inuse > conn->cache_size &&
+ target_size = (conn->cache_size * cache->eviction_target) / 100;
+ dirty_target_size =
+ (conn->cache_size * cache->eviction_dirty_target) / 100;
+ if ((bytes_inuse > target_size ||
+ cache->bytes_dirty > dirty_target_size) &&
conn->evict_workers < conn->evict_workers_max) {
WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER,
"Starting evict worker: %"PRIu32"\n",
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index aca3dc11ee9..b3a6f718ca2 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -21,12 +21,15 @@ static void __evict_excl_clear(WT_SESSION_IMPL *);
int
__wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
{
+ WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
WT_TXN_STATE *txn_state;
int forced_eviction, inmem_split, istree;
+ conn = S2C(session);
+
page = ref->page;
forced_eviction = (page->read_gen == WT_READGEN_OLDEST);
inmem_split = istree = 0;
@@ -40,7 +43,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
*/
txn_state = WT_SESSION_TXN_STATE(session);
if (txn_state->snap_min == WT_TXN_NONE)
- txn_state->snap_min = S2C(session)->txn_global.oldest_id;
+ txn_state->snap_min = conn->txn_global.oldest_id;
else
txn_state = NULL;
@@ -75,6 +78,14 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
WT_STAT_FAST_DATA_INCR(session, cache_eviction_internal);
}
+ /*
+ * Track the largest page size seen at eviction, it tells us something
+ * about our ability to force pages out before they're larger than the
+ * cache.
+ */
+ if (page->memory_footprint > conn->cache->evict_max_page_size)
+ conn->cache->evict_max_page_size = page->memory_footprint;
+
/* Discard any subtree rooted in this page. */
if (istree)
WT_WITH_PAGE_INDEX(session,
@@ -119,8 +130,8 @@ done: session->excl_next = 0;
txn_state->snap_min = WT_TXN_NONE;
if ((inmem_split || (forced_eviction && ret == EBUSY)) &&
- !F_ISSET(S2C(session)->cache, WT_EVICT_WOULD_BLOCK)) {
- F_SET(S2C(session)->cache, WT_EVICT_WOULD_BLOCK);
+ !F_ISSET(conn->cache, WT_EVICT_WOULD_BLOCK)) {
+ F_SET(conn->cache, WT_EVICT_WOULD_BLOCK);
WT_TRET(__wt_evict_server_wake(session));
}
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index e1fc72677c5..dd10e522412 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -550,9 +550,10 @@ struct __wt_page {
#define WT_PAGE_DISK_ALLOC 0x02 /* Disk image in allocated memory */
#define WT_PAGE_DISK_MAPPED 0x04 /* Disk image in mapped memory */
#define WT_PAGE_EVICT_LRU 0x08 /* Page is on the LRU queue */
-#define WT_PAGE_SCANNING 0x10 /* Obsolete updates are being scanned */
-#define WT_PAGE_SPLITTING 0x20 /* An internal page is growing */
+#define WT_PAGE_REFUSE_DEEPEN 0x10 /* Don't deepen the tree at this page */
+#define WT_PAGE_SCANNING 0x20 /* Obsolete updates are being scanned */
#define WT_PAGE_SPLIT_INSERT 0x40 /* A leaf page was split for append */
+#define WT_PAGE_SPLITTING 0x80 /* An internal page is growing */
uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */
};
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index a333e4af565..d30ee46486a 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -165,65 +165,6 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page)
}
/*
- * __wt_cache_read_gen --
- * Get the current read generation number.
- */
-static inline uint64_t
-__wt_cache_read_gen(WT_SESSION_IMPL *session)
-{
- return (S2C(session)->cache->read_gen);
-}
-
-/*
- * __wt_cache_read_gen_incr --
- * Increment the current read generation number.
- */
-static inline void
-__wt_cache_read_gen_incr(WT_SESSION_IMPL *session)
-{
- ++S2C(session)->cache->read_gen;
-}
-
-/*
- * __wt_cache_read_gen_set --
- * Get the read generation to store in a page.
- */
-static inline uint64_t
-__wt_cache_read_gen_set(WT_SESSION_IMPL *session)
-{
- /*
- * We return read-generations from the future (where "the future" is
- * measured by increments of the global read generation). The reason
- * is because when acquiring a new hazard pointer for a page, we can
- * check its read generation, and if the read generation isn't less
- * than the current global generation, we don't bother updating the
- * page. In other words, the goal is to avoid some number of updates
- * immediately after each update we have to make.
- */
- return (__wt_cache_read_gen(session) + WT_READGEN_STEP);
-}
-
-/*
- * __wt_cache_pages_inuse --
- * Return the number of pages in use.
- */
-static inline uint64_t
-__wt_cache_pages_inuse(WT_CACHE *cache)
-{
- return (cache->pages_inmem - cache->pages_evict);
-}
-
-/*
- * __wt_cache_bytes_inuse --
- * Return the number of bytes in use.
- */
-static inline uint64_t
-__wt_cache_bytes_inuse(WT_CACHE *cache)
-{
- return (cache->bytes_inmem - cache->bytes_evict);
-}
-
-/*
* __wt_page_evict_soon --
* Set a page to be evicted as soon as possible.
*/
@@ -917,16 +858,16 @@ __wt_ref_info(WT_SESSION_IMPL *session,
}
/*
- * __wt_page_release --
- * Release a reference to a page.
+ * __wt_page_release_busy --
+ * Release a reference to a page, fail if busy during forced eviction.
*/
static inline int
-__wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
+__wt_page_release_busy(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
{
WT_BTREE *btree;
WT_DECL_RET;
WT_PAGE *page;
- int locked;
+ int locked, too_big;
btree = S2BT(session);
@@ -938,6 +879,8 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
return (0);
page = ref->page;
+ too_big = (page->memory_footprint < btree->maxmempage) ? 0 : 1;
+
/*
* Attempt to evict pages with the special "oldest" read generation.
*
@@ -970,12 +913,19 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
return (ret);
(void)WT_ATOMIC_ADD4(btree->evict_busy, 1);
- if ((ret = __wt_evict_page(session, ref)) == 0)
- WT_STAT_FAST_CONN_INCR(session, cache_eviction_force);
- else {
+ if ((ret = __wt_evict_page(session, ref)) == 0) {
+ if (too_big)
+ WT_STAT_FAST_CONN_INCR(session, cache_eviction_force);
+ else
+ /*
+ * If the page isn't too big, we are evicting it because
+ * it had a chain of deleted entries that make traversal
+ * expensive.
+ */
+ WT_STAT_FAST_CONN_INCR(
+ session, cache_eviction_force_delete);
+ } else {
WT_STAT_FAST_CONN_INCR(session, cache_eviction_force_fail);
- if (ret == EBUSY)
- ret = 0;
}
(void)WT_ATOMIC_SUB4(btree->evict_busy, 1);
@@ -983,6 +933,17 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
}
/*
+ * __wt_page_release --
+ * Release a reference to a page.
+ */
+static inline int
+__wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
+{
+ WT_RET_BUSY_OK(__wt_page_release_busy(session, ref, flags));
+ return (0);
+}
+
+/*
* __wt_page_swap_func --
* Swap one page's hazard pointer for another one when hazard pointer
* coupling up/down the tree.
diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h
index 75219e5b413..deccd676e26 100644
--- a/src/third_party/wiredtiger/src/include/cache.h
+++ b/src/third_party/wiredtiger/src/include/cache.h
@@ -61,6 +61,8 @@ struct __wt_cache {
uint64_t bytes_dirty; /* Bytes/pages currently dirty */
uint64_t pages_dirty;
+ uint64_t evict_max_page_size; /* Largest page seen at eviction */
+
/*
* Read information.
*/
diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i
index b997781272a..ee969255241 100644
--- a/src/third_party/wiredtiger/src/include/cache.i
+++ b/src/third_party/wiredtiger/src/include/cache.i
@@ -7,6 +7,65 @@
*/
/*
+ * __wt_cache_read_gen --
+ * Get the current read generation number.
+ */
+static inline uint64_t
+__wt_cache_read_gen(WT_SESSION_IMPL *session)
+{
+ return (S2C(session)->cache->read_gen);
+}
+
+/*
+ * __wt_cache_read_gen_incr --
+ * Increment the current read generation number.
+ */
+static inline void
+__wt_cache_read_gen_incr(WT_SESSION_IMPL *session)
+{
+ ++S2C(session)->cache->read_gen;
+}
+
+/*
+ * __wt_cache_read_gen_set --
+ * Get the read generation to store in a page.
+ */
+static inline uint64_t
+__wt_cache_read_gen_set(WT_SESSION_IMPL *session)
+{
+ /*
+ * We return read-generations from the future (where "the future" is
+ * measured by increments of the global read generation). The reason
+ * is because when acquiring a new hazard pointer for a page, we can
+ * check its read generation, and if the read generation isn't less
+ * than the current global generation, we don't bother updating the
+ * page. In other words, the goal is to avoid some number of updates
+ * immediately after each update we have to make.
+ */
+ return (__wt_cache_read_gen(session) + WT_READGEN_STEP);
+}
+
+/*
+ * __wt_cache_pages_inuse --
+ * Return the number of pages in use.
+ */
+static inline uint64_t
+__wt_cache_pages_inuse(WT_CACHE *cache)
+{
+ return (cache->pages_inmem - cache->pages_evict);
+}
+
+/*
+ * __wt_cache_bytes_inuse --
+ * Return the number of bytes in use.
+ */
+static inline uint64_t
+__wt_cache_bytes_inuse(WT_CACHE *cache)
+{
+ return (cache->bytes_inmem - cache->bytes_evict);
+}
+
+/*
* __wt_eviction_check --
* Wake the eviction server if necessary.
*/
diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h
index 0fc4b883a16..e46c1f7de1b 100644
--- a/src/third_party/wiredtiger/src/include/cursor.h
+++ b/src/third_party/wiredtiger/src/include/cursor.h
@@ -264,6 +264,8 @@ struct __wt_cursor_log {
WT_ITEM *logrec; /* Copy of record for cursor */
WT_ITEM *opkey, *opvalue; /* Op key/value copy */
const uint8_t *stepp, *stepp_end; /* Pointer within record */
+ uint8_t *packed_key; /* Packed key for 'raw' interface */
+ uint8_t *packed_value; /* Packed value for 'raw' interface */
uint32_t step_count; /* Intra-record count */
uint32_t rectype; /* Record type */
uint64_t txnid; /* Record txnid */
diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i
index ae6aafdd638..8fa9790e096 100644
--- a/src/third_party/wiredtiger/src/include/cursor.i
+++ b/src/third_party/wiredtiger/src/include/cursor.i
@@ -176,11 +176,23 @@ static inline int
__cursor_func_init(WT_CURSOR_BTREE *cbt, int reenter)
{
WT_SESSION_IMPL *session;
+ WT_TXN *txn;
session = (WT_SESSION_IMPL *)cbt->iface.session;
+ txn = &session->txn;
if (reenter)
WT_RET(__curfile_leave(cbt));
+
+ /*
+ * If there is no transaction active in this thread and we haven't
+ * checked if the cache is full, do it now. If we have to block for
+ * eviction, this is the best time to do it.
+ */
+ if (F_ISSET(txn, TXN_RUNNING) &&
+ !F_ISSET(txn, TXN_HAS_ID) && !F_ISSET(txn, TXN_HAS_SNAPSHOT))
+ WT_RET(__wt_cache_full_check(session));
+
if (!F_ISSET(cbt, WT_CBT_ACTIVE))
WT_RET(__curfile_enter(cbt));
__wt_txn_cursor_op(session);
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index b80719de7c0..d8ed3f5cef1 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -432,6 +432,8 @@ extern int __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp
extern int __wt_dlsym(WT_SESSION_IMPL *session, WT_DLH *dlh, const char *name, int fail, void *sym_ret);
extern int __wt_dlclose(WT_SESSION_IMPL *session, WT_DLH *dlh);
extern int __wt_errno(void);
+extern const char *__wt_strerror(int error);
+extern int __wt_strerror_r(int error, char *buf, size_t buflen);
extern int __wt_exist(WT_SESSION_IMPL *session, const char *filename, int *existp);
extern void __wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh);
extern int __wt_fallocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len);
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 53a4ce3af4a..6efb9970065 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -164,9 +164,11 @@ struct __wt_connection_stats {
WT_STATS cache_eviction_dirty;
WT_STATS cache_eviction_fail;
WT_STATS cache_eviction_force;
+ WT_STATS cache_eviction_force_delete;
WT_STATS cache_eviction_force_fail;
WT_STATS cache_eviction_hazard;
WT_STATS cache_eviction_internal;
+ WT_STATS cache_eviction_maximum_page_size;
WT_STATS cache_eviction_queue_empty;
WT_STATS cache_eviction_queue_not_empty;
WT_STATS cache_eviction_server_evicting;
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 36cb10c30d0..8380e55effb 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -7,6 +7,7 @@
*/
#define WT_TXN_NONE 0 /* No txn running in a session. */
+#define WT_TXN_FIRST 1 /* First transaction to run. */
#define WT_TXN_ABORTED UINT64_MAX /* Update rolled back, ignore. */
/*
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 745a8f75a99..656181790ed 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -227,6 +227,16 @@ __wt_txn_id_check(WT_SESSION_IMPL *session)
txn = &session->txn;
WT_ASSERT(session, F_ISSET(txn, TXN_RUNNING));
+
+ /*
+ * If there is no transaction active in this thread and we haven't
+ * checked if the cache is full, do it now. If we have to block for
+ * eviction, this is the best time to do it.
+ */
+ if (F_ISSET(txn, TXN_RUNNING) &&
+ !F_ISSET(txn, TXN_HAS_ID) && !F_ISSET(txn, TXN_HAS_SNAPSHOT))
+ WT_RET(__wt_cache_full_check(session));
+
if (!F_ISSET(txn, TXN_HAS_ID)) {
conn = S2C(session);
txn_global = &conn->txn_global;
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index ee9c58e4278..91eb41af4f3 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -525,18 +525,17 @@ struct __wt_cursor {
#define WT_CURSTD_APPEND 0x0001
#define WT_CURSTD_BULK 0x0002
-#define WT_CURSTD_DATA_SOURCE 0x0004
-#define WT_CURSTD_DUMP_HEX 0x0008
-#define WT_CURSTD_DUMP_JSON 0x0010
-#define WT_CURSTD_DUMP_PRINT 0x0020
-#define WT_CURSTD_KEY_EXT 0x0040 /* Key points out of the tree. */
-#define WT_CURSTD_KEY_INT 0x0080 /* Key points into the tree. */
+#define WT_CURSTD_DUMP_HEX 0x0004
+#define WT_CURSTD_DUMP_JSON 0x0008
+#define WT_CURSTD_DUMP_PRINT 0x0010
+#define WT_CURSTD_KEY_EXT 0x0020 /* Key points out of the tree. */
+#define WT_CURSTD_KEY_INT 0x0040 /* Key points into the tree. */
#define WT_CURSTD_KEY_SET (WT_CURSTD_KEY_EXT | WT_CURSTD_KEY_INT)
-#define WT_CURSTD_OPEN 0x0100
-#define WT_CURSTD_OVERWRITE 0x0200
-#define WT_CURSTD_RAW 0x0400
-#define WT_CURSTD_VALUE_EXT 0x0800 /* Value points out of the tree. */
-#define WT_CURSTD_VALUE_INT 0x1000 /* Value points into the tree. */
+#define WT_CURSTD_OPEN 0x0080
+#define WT_CURSTD_OVERWRITE 0x0100
+#define WT_CURSTD_RAW 0x0200
+#define WT_CURSTD_VALUE_EXT 0x0400 /* Value points out of the tree. */
+#define WT_CURSTD_VALUE_INT 0x0800 /* Value points into the tree. */
#define WT_CURSTD_VALUE_SET (WT_CURSTD_VALUE_EXT | WT_CURSTD_VALUE_INT)
uint32_t flags;
#endif
@@ -2020,15 +2019,26 @@ int wiredtiger_open(const char *home,
WT_CONNECTION **connectionp);
/*!
- * Return information about an error as a string; wiredtiger_strerror is a
- * superset of the ISO C99/POSIX 1003.1-2001 function strerror.
+ * Return information about a WiredTiger error as a string, not thread-safe.
*
* @snippet ex_all.c Display an error
*
- * @param err a return value from a WiredTiger, C library or POSIX function
+ * @param error a return value from a WiredTiger call
* @returns a string representation of the error
*/
-const char *wiredtiger_strerror(int err);
+const char *wiredtiger_strerror(int error);
+
+/*!
+ * Return information about a WiredTiger error as a string, thread-safe version.
+ *
+ * @snippet ex_all.c Display an error thread safe
+ *
+ * @param error a return value from a WiredTiger call
+ * @param buf a buffer of at least \c buflen bytes
+ * @param buflen the length of the buffer
+ * @returns zero for success, non-zero to indicate an error.
+ */
+int wiredtiger_strerror_r(int error, char *buf, size_t buflen);
#if !defined(SWIG)
/*!
@@ -3155,204 +3165,208 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1030
/*! cache: pages evicted because they exceeded the in-memory maximum */
#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1031
+/*! cache: pages evicted because they had chains of deleted items */
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1032
/*! cache: failed eviction of pages that exceeded the in-memory maximum */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1032
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1033
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1033
+#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1034
/*! cache: internal pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1034
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1035
+/*! cache: maximum page size at eviction */
+#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1036
/*! cache: eviction server candidate queue empty when topping up */
-#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1035
+#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1037
/*! cache: eviction server candidate queue not empty when topping up */
-#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1036
+#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1038
/*! cache: eviction server evicting pages */
-#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1037
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1039
/*! cache: eviction server populating queue, but not evicting pages */
-#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1038
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1040
/*! cache: eviction server unable to reach eviction goal */
-#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1039
+#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1041
/*! cache: pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT 1040
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT 1042
/*! cache: pages walked for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK 1041
+#define WT_STAT_CONN_CACHE_EVICTION_WALK 1043
/*! cache: in-memory page splits */
-#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1042
+#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1044
/*! cache: tracked dirty pages in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1043
+#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1045
/*! cache: pages currently held in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_INUSE 1044
+#define WT_STAT_CONN_CACHE_PAGES_INUSE 1046
/*! cache: pages read into cache */
-#define WT_STAT_CONN_CACHE_READ 1045
+#define WT_STAT_CONN_CACHE_READ 1047
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 1046
+#define WT_STAT_CONN_CACHE_WRITE 1048
/*! connection: pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 1047
+#define WT_STAT_CONN_COND_WAIT 1049
/*! cursor: cursor create calls */
-#define WT_STAT_CONN_CURSOR_CREATE 1048
+#define WT_STAT_CONN_CURSOR_CREATE 1050
/*! cursor: cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 1049
+#define WT_STAT_CONN_CURSOR_INSERT 1051
/*! cursor: cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 1050
+#define WT_STAT_CONN_CURSOR_NEXT 1052
/*! cursor: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1051
+#define WT_STAT_CONN_CURSOR_PREV 1053
/*! cursor: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1052
+#define WT_STAT_CONN_CURSOR_REMOVE 1054
/*! cursor: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1053
+#define WT_STAT_CONN_CURSOR_RESET 1055
/*! cursor: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1054
+#define WT_STAT_CONN_CURSOR_SEARCH 1056
/*! cursor: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1055
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1057
/*! cursor: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1056
+#define WT_STAT_CONN_CURSOR_UPDATE 1058
/*! data-handle: connection dhandles swept */
-#define WT_STAT_CONN_DH_CONN_HANDLES 1057
+#define WT_STAT_CONN_DH_CONN_HANDLES 1059
/*! data-handle: connection candidate referenced */
-#define WT_STAT_CONN_DH_CONN_REF 1058
+#define WT_STAT_CONN_DH_CONN_REF 1060
/*! data-handle: connection sweeps */
-#define WT_STAT_CONN_DH_CONN_SWEEPS 1059
+#define WT_STAT_CONN_DH_CONN_SWEEPS 1061
/*! data-handle: connection time-of-death sets */
-#define WT_STAT_CONN_DH_CONN_TOD 1060
+#define WT_STAT_CONN_DH_CONN_TOD 1062
/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1061
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1063
/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1062
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1064
/*! connection: files currently open */
-#define WT_STAT_CONN_FILE_OPEN 1063
+#define WT_STAT_CONN_FILE_OPEN 1065
/*! log: log buffer size increases */
-#define WT_STAT_CONN_LOG_BUFFER_GROW 1064
+#define WT_STAT_CONN_LOG_BUFFER_GROW 1066
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1065
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1067
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1066
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1068
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1067
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1069
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1068
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1070
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1069
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1071
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1070
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1072
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1071
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1073
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1072
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1074
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1073
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1075
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1074
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1076
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1075
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1077
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1076
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1078
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1077
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1079
/*! log: log read operations */
-#define WT_STAT_CONN_LOG_READS 1078
+#define WT_STAT_CONN_LOG_READS 1080
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1079
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1081
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1080
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1082
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1081
+#define WT_STAT_CONN_LOG_SCANS 1083
/*! log: consolidated slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1082
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1084
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1083
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1085
/*! log: consolidated slot joins */
-#define WT_STAT_CONN_LOG_SLOT_JOINS 1084
+#define WT_STAT_CONN_LOG_SLOT_JOINS 1086
/*! log: consolidated slot join races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1085
+#define WT_STAT_CONN_LOG_SLOT_RACES 1087
/*! log: slots selected for switching that were unavailable */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1086
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1088
/*! log: record size exceeded maximum */
-#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1087
+#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1089
/*! log: failed to find a slot large enough for record */
-#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1088
+#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1090
/*! log: consolidated slot join transitions */
-#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1089
+#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1091
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1090
+#define WT_STAT_CONN_LOG_SYNC 1092
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1091
+#define WT_STAT_CONN_LOG_WRITES 1093
/*! LSM: sleep for LSM checkpoint throttle */
-#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1092
+#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1094
/*! LSM: sleep for LSM merge throttle */
-#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1093
+#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1095
/*! LSM: rows merged in an LSM tree */
-#define WT_STAT_CONN_LSM_ROWS_MERGED 1094
+#define WT_STAT_CONN_LSM_ROWS_MERGED 1096
/*! LSM: application work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1095
+#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1097
/*! LSM: merge work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1096
+#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1098
/*! LSM: tree queue hit maximum */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1097
+#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1099
/*! LSM: switch work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1098
+#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1100
/*! LSM: tree maintenance operations scheduled */
-#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1099
+#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1101
/*! LSM: tree maintenance operations discarded */
-#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1100
+#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1102
/*! LSM: tree maintenance operations executed */
-#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1101
+#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1103
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1102
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1104
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1103
+#define WT_STAT_CONN_MEMORY_FREE 1105
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1104
+#define WT_STAT_CONN_MEMORY_GROW 1106
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1105
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1107
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1106
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1108
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1107
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1109
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1108
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1110
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1109
+#define WT_STAT_CONN_PAGE_SLEEP 1111
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1110
+#define WT_STAT_CONN_READ_IO 1112
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1111
+#define WT_STAT_CONN_REC_PAGES 1113
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1112
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1114
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1113
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1115
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1114
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1116
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1115
+#define WT_STAT_CONN_RWLOCK_READ 1117
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1116
+#define WT_STAT_CONN_RWLOCK_WRITE 1118
/*! session: open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1117
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1119
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1118
+#define WT_STAT_CONN_SESSION_OPEN 1120
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1119
+#define WT_STAT_CONN_TXN_BEGIN 1121
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1120
+#define WT_STAT_CONN_TXN_CHECKPOINT 1122
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1121
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1123
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1122
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1124
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1123
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1125
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1124
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1126
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1125
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1127
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1126
+#define WT_STAT_CONN_TXN_COMMIT 1128
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1127
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1129
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1128
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1130
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1129
+#define WT_STAT_CONN_TXN_ROLLBACK 1131
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1130
+#define WT_STAT_CONN_WRITE_IO 1132
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index 138b64a6e27..1b3a9b62626 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -322,13 +322,13 @@ struct __wt_update;
#include "misc.i"
#include "intpack.i" /* required by cell.i, packing.i */
#include "packing.i"
+#include "cache.i" /* required by txn.i */
#include "cell.i" /* required by btree.i */
#include "mutex.i" /* required by btree.i */
#include "txn.i" /* required by btree.i */
#include "btree.i" /* required by cursor.i */
-#include "cache.i" /* required by cursor.i */
#include "cursor.i"
#include "bitstring.i"
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index a3abb336f3d..944e748a6a8 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -957,10 +957,14 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
&slot->slot_buf, slot->slot_buf.memsize * 2));
}
/*
- * If we have a file to close, close it now.
+ * If we have a file to close, close it now. First fsync so
+ * that a later sync will be assured all earlier transactions
+ * in earlier log files are also on disk.
*/
- if (close_fh)
+ if (close_fh) {
+ WT_ERR(__wt_fsync(session, close_fh));
WT_ERR(__wt_close(session, close_fh));
+ }
err: if (locked)
__wt_spin_unlock(session, &log->log_sync_lock);
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
index 2dfaea1ec3a..0d44b16d85c 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor.c
@@ -171,8 +171,6 @@ __clsm_enter(WT_CURSOR_LSM *clsm, int reset, int update)
lsm_tree->nchunks != 0)
goto open;
- WT_RET(__wt_cache_full_check(session));
-
if (clsm->dsk_gen != lsm_tree->dsk_gen &&
lsm_tree->nchunks != 0)
goto open;
@@ -1484,11 +1482,8 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
WT_ERR(__wt_cursor_init(cursor, cursor->uri, owner, cfg, cursorp));
if (0) {
-err: __wt_lsm_tree_release(session, lsm_tree);
- if (clsm != NULL) {
- clsm->lsm_tree = NULL;
+err: if (clsm != NULL)
WT_TRET(__clsm_close(cursor));
- }
}
return (ret);
diff --git a/src/third_party/wiredtiger/src/os_posix/os_errno.c b/src/third_party/wiredtiger/src/os_posix/os_errno.c
index ed3451a9c1c..a58f13583ce 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_errno.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_errno.c
@@ -21,3 +21,49 @@ __wt_errno(void)
*/
return (errno == 0 ? WT_ERROR : errno);
}
+
+/*
+ * __wt_strerror --
+ * POSIX implementation of wiredtiger_strerror.
+ */
+const char *
+__wt_strerror(int error)
+{
+ const char *p;
+
+ /*
+ * POSIX errors are non-negative integers; check for 0 explicitly
+ * in-case the underlying strerror doesn't handle 0, some don't.
+ */
+ if (error == 0)
+ return ("Successful return: 0");
+ if (error > 0 && (p = strerror(error)) != NULL)
+ return (p);
+ return (NULL);
+}
+
+/*
+ * __wt_strerror_r --
+ * POSIX implementation of wiredtiger_strerror_r.
+ */
+int
+__wt_strerror_r(int error, char *buf, size_t buflen)
+{
+ const char *p;
+
+ /* Require at least 2 bytes, printable character and trailing nul. */
+ if (buflen < 2)
+ return (ENOMEM);
+
+ /*
+ * Check for POSIX errors then fallback to something generic. Copy the
+ * string into the user's buffer, return success if anything printed.
+ */
+ p = __wt_strerror(error);
+ if (p != NULL && snprintf(buf, buflen, "%s", p) > 0)
+ return (0);
+
+ /* Fallback to a generic message, then guess it's a memory problem. */
+ return (
+ snprintf(buf, buflen, "error return: %d", error) > 0 ? 0 : ENOMEM);
+}
diff --git a/src/third_party/wiredtiger/src/os_win/os_errno.c b/src/third_party/wiredtiger/src/os_win/os_errno.c
index e321912d829..00ee638fbe3 100644
--- a/src/third_party/wiredtiger/src/os_win/os_errno.c
+++ b/src/third_party/wiredtiger/src/os_win/os_errno.c
@@ -8,6 +8,34 @@
#include "wt_internal.h"
+static const int windows_error_offset = -29000;
+
+/*
+ * __wt_map_error_to_windows_error --
+ * Return a negative integer, an encoded Windows error
+ * Standard C errors are positive integers from 0 - ~200
+ * Windows errors are from 0 - 15999 according to the documentation
+ */
+static DWORD
+__wt_map_error_to_windows_error(int error) {
+ /* Ensure we do not exceed the error range
+ Also validate he do not get any COM errors
+ (which are negative integers)
+ */
+ WT_ASSERT(NULL, error > 0 && error > -(windows_error_offset));
+
+ return (error + -(windows_error_offset));
+}
+
+/*
+ * __wt_map_error_to_windows_error --
+ * Return a positive integer, a decoded Windows error
+ */
+static int
+__wt_map_windows_error_to_error(DWORD winerr) {
+ return (winerr + windows_error_offset);
+}
+
/*
* __wt_errno --
* Return errno, or WT_ERROR if errno not set.
@@ -24,5 +52,73 @@ __wt_errno(void)
/* GetLastError should only be called if we hit an actual error */
WT_ASSERT(NULL, err != ERROR_SUCCESS);
- return (err == ERROR_SUCCESS ? WT_ERROR : err);
+ return (err == ERROR_SUCCESS ?
+ WT_ERROR : __wt_map_windows_error_to_error(err));
+}
+
+/*
+ * __wt_strerror --
+ * Windows implementation of wiredtiger_strerror.
+ */
+const char *
+__wt_strerror(int error)
+{
+ const char *p;
+
+ /*
+ * POSIX errors are non-negative integers; check for 0 explicitly
+ * in-case the underlying strerror doesn't handle 0, some don't.
+ */
+ if (error == 0)
+ return ("Successful return: 0");
+ if (error > 0 && (p = strerror(error)) != NULL)
+ return (p);
+ return (NULL);
+}
+
+/*
+ * __wt_strerror_r --
+ * Windows implementation of wiredtiger_strerror_r.
+ */
+int
+__wt_strerror_r(int error, char *buf, size_t buflen)
+{
+ DWORD lasterror;
+ const char *p;
+
+ /* Require at least 2 bytes, printable character and trailing nul. */
+ if (buflen < 2)
+ return (ENOMEM);
+
+ /*
+ * Check for POSIX errors, Windows errors, then fallback to something
+ * generic. Copy the string into the user's buffer, return success if
+ * anything printed.
+ */
+ p = __wt_strerror(error);
+ if (p != NULL && snprintf(buf, buflen, "%s", p) > 0)
+ return (0);
+
+ if (error < 0) {
+ error = __wt_map_error_to_windows_error(error);
+
+ lasterror = FormatMessageA(
+ FORMAT_MESSAGE_FROM_SYSTEM |
+ FORMAT_MESSAGE_IGNORE_INSERTS,
+ NULL,
+ error,
+ 0, /* let system choose the correct LANGID */
+ buf,
+ buflen,
+ NULL);
+
+ if (lasterror != 0)
+ return (0);
+
+ /* Fall through to the fallback error code */
+ }
+
+ /* Fallback to a generic message, then guess it's a memory problem. */
+ return (
+ snprintf(buf, buflen, "error return: %d", error) > 0 ? 0 : ENOMEM);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_ftruncate.c b/src/third_party/wiredtiger/src/os_win/os_ftruncate.c
index e80308536f1..d9b43e4596f 100644
--- a/src/third_party/wiredtiger/src/os_win/os_ftruncate.c
+++ b/src/third_party/wiredtiger/src/os_win/os_ftruncate.c
@@ -17,7 +17,6 @@ __wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
{
WT_DECL_RET;
LARGE_INTEGER largeint;
- uint32_t lasterror;
largeint.QuadPart = len;
@@ -32,10 +31,8 @@ __wt_ftruncate(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t len)
return (0);
}
- lasterror = GetLastError();
-
- if (lasterror = ERROR_USER_MAPPED_FILE)
+ if (GetLastError() == ERROR_USER_MAPPED_FILE)
return (EBUSY);
- WT_RET_MSG(session, lasterror, "%s SetEndOfFile error", fh->name);
+ WT_RET_MSG(session, __wt_errno(), "%s SetEndOfFile error", fh->name);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c
index 71ea8ed49a2..36de49d1aae 100644
--- a/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c
+++ b/src/third_party/wiredtiger/src/os_win/os_mtx_cond.c
@@ -45,7 +45,6 @@ __wt_cond_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, long usecs)
{
WT_DECL_RET;
int locked;
- int lasterror;
int milliseconds;
locked = 0;
WT_ASSERT(session, usecs >= 0);
@@ -82,8 +81,7 @@ __wt_cond_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, long usecs)
&cond->cond, &cond->mtx, INFINITE);
if (ret == 0) {
- lasterror = GetLastError();
- if (lasterror == ERROR_TIMEOUT) {
+ if (GetLastError() == ERROR_TIMEOUT) {
ret = 1;
}
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_rename.c b/src/third_party/wiredtiger/src/os_win/os_rename.c
index 8c2784457c4..a0f33843218 100644
--- a/src/third_party/wiredtiger/src/os_win/os_rename.c
+++ b/src/third_party/wiredtiger/src/os_win/os_rename.c
@@ -33,13 +33,13 @@ __wt_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
*/
if ((ret = GetFileAttributesA(to_path)) != INVALID_FILE_ATTRIBUTES) {
if ((ret = DeleteFileA(to_path)) == FALSE) {
- lasterror = GetLastError();
+ lasterror = __wt_errno();
goto err;
}
}
if ((MoveFileA(from_path, to_path)) == FALSE)
- lasterror = GetLastError();
+ lasterror = __wt_errno();
err:
__wt_free(session, from_path);
diff --git a/src/third_party/wiredtiger/src/schema/schema_open.c b/src/third_party/wiredtiger/src/schema/schema_open.c
index 81447b173ae..d613ced00aa 100644
--- a/src/third_party/wiredtiger/src/schema/schema_open.c
+++ b/src/third_party/wiredtiger/src/schema/schema_open.c
@@ -327,6 +327,13 @@ __wt_schema_open_index(WT_SESSION_IMPL *session,
table->indices[i] = idx;
idx = NULL;
+
+ /*
+ * If the slot is bigger than anything else we've seen,
+ * bump the number of indices.
+ */
+ if (i >= table->nindices)
+ table->nindices = i + 1;
}
/* If we were looking for a single index, we're done. */
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index 3ab5e0acab1..8ee143133ae 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -736,13 +736,6 @@ __session_begin_transaction(WT_SESSION *wt_session, const char *config)
if (F_ISSET(&session->txn, TXN_RUNNING))
WT_ERR_MSG(session, EINVAL, "Transaction already running");
- /*
- * There is no transaction active in this thread; check if the cache is
- * full, if we have to block for eviction, this is the best time to do
- * it.
- */
- WT_ERR(__wt_cache_full_check(session));
-
ret = __wt_txn_begin(session, cfg);
err: API_END_RET(session, ret);
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index c93168cd9a1..223d62d0559 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -367,6 +367,8 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats)
stats->cache_inmem_split.desc = "cache: in-memory page splits";
stats->cache_eviction_internal.desc = "cache: internal pages evicted";
stats->cache_bytes_max.desc = "cache: maximum bytes configured";
+ stats->cache_eviction_maximum_page_size.desc =
+ "cache: maximum page size at eviction";
stats->cache_eviction_dirty.desc = "cache: modified pages evicted";
stats->cache_eviction_deepen.desc =
"cache: page split during eviction deepened the tree";
@@ -374,6 +376,8 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats)
"cache: pages currently held in the cache";
stats->cache_eviction_force.desc =
"cache: pages evicted because they exceeded the in-memory maximum";
+ stats->cache_eviction_force_delete.desc =
+ "cache: pages evicted because they had chains of deleted items";
stats->cache_eviction_app.desc =
"cache: pages evicted by application threads";
stats->cache_read.desc = "cache: pages read into cache";
@@ -548,9 +552,11 @@ __wt_stat_refresh_connection_stats(void *stats_arg)
stats->cache_eviction_hazard.v = 0;
stats->cache_inmem_split.v = 0;
stats->cache_eviction_internal.v = 0;
+ stats->cache_eviction_maximum_page_size.v = 0;
stats->cache_eviction_dirty.v = 0;
stats->cache_eviction_deepen.v = 0;
stats->cache_eviction_force.v = 0;
+ stats->cache_eviction_force_delete.v = 0;
stats->cache_eviction_app.v = 0;
stats->cache_read.v = 0;
stats->cache_eviction_fail.v = 0;
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index cd130002c81..5b8f11a88a5 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -361,8 +361,15 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
/* If we are logging, write a commit log record. */
if (ret == 0 && txn->mod_count > 0 &&
FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED) &&
- !F_ISSET(session, WT_SESSION_NO_LOGGING))
+ !F_ISSET(session, WT_SESSION_NO_LOGGING)) {
+ /*
+ * We are about to block on I/O writing the log.
+ * Release our snapshot in case it is keeping data pinned.
+ * This is particularly important for checkpoints.
+ */
+ __wt_txn_release_snapshot(session);
ret = __wt_txn_log_commit(session, cfg);
+ }
/*
* If anything went wrong, roll back.
@@ -531,9 +538,8 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
conn = S2C(session);
txn_global = &conn->txn_global;
- txn_global->current = 1;
- txn_global->oldest_id = 1;
- txn_global->last_running = 1;
+ txn_global->current = txn_global->last_running =
+ txn_global->oldest_id = WT_TXN_FIRST;
WT_RET(__wt_calloc_def(
session, conn->session_size, &txn_global->states));
diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c
index f66bd7e09c8..f706efa8a70 100644
--- a/src/third_party/wiredtiger/src/txn/txn_log.c
+++ b/src/third_party/wiredtiger/src/txn/txn_log.c
@@ -270,6 +270,7 @@ __wt_txn_checkpoint_log(
{
WT_DECL_ITEM(logrec);
WT_DECL_RET;
+ WT_ITEM *ckpt_snapshot, empty;
WT_LSN *ckpt_lsn;
WT_TXN *txn;
uint8_t *end, *p;
@@ -319,19 +320,22 @@ __wt_txn_checkpoint_log(
*/
if (!txn->full_ckpt) {
txn->ckpt_nsnapshot = 0;
+ WT_CLEAR(empty);
+ ckpt_snapshot = &empty;
*ckpt_lsn = S2C(session)->log->alloc_lsn;
- }
+ } else
+ ckpt_snapshot = txn->ckpt_snapshot;
/* Write the checkpoint log record. */
WT_ERR(__wt_struct_size(session, &recsize, fmt,
rectype, ckpt_lsn->file, ckpt_lsn->offset,
- txn->ckpt_nsnapshot, &txn->ckpt_snapshot));
+ txn->ckpt_nsnapshot, ckpt_snapshot));
WT_ERR(__wt_logrec_alloc(session, recsize, &logrec));
WT_ERR(__wt_struct_pack(session,
(uint8_t *)logrec->data + logrec->size, recsize, fmt,
rectype, ckpt_lsn->file, ckpt_lsn->offset,
- txn->ckpt_nsnapshot, &txn->ckpt_snapshot));
+ txn->ckpt_nsnapshot, ckpt_snapshot));
logrec->size += (uint32_t)recsize;
WT_ERR(__wt_log_write(session, logrec, lsnp, 0));
diff --git a/src/third_party/wiredtiger/tools/stat_data.py b/src/third_party/wiredtiger/tools/stat_data.py
index 7c00f6a70a8..89e06dbbf90 100644
--- a/src/third_party/wiredtiger/tools/stat_data.py
+++ b/src/third_party/wiredtiger/tools/stat_data.py
@@ -4,6 +4,7 @@ no_scale_per_second_list = [
'async: maximum work queue length',
'cache: bytes currently in the cache',
'cache: maximum bytes configured',
+ 'cache: maximum page size at eviction',
'cache: pages currently held in the cache',
'cache: tracked dirty bytes in the cache',
'cache: tracked dirty pages in the cache',