summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger')
-rw-r--r--src/third_party/wiredtiger/README18
-rw-r--r--src/third_party/wiredtiger/build_posix/Make.subdirs1
-rw-r--r--src/third_party/wiredtiger/build_posix/aclocal/ax_func_posix_memalign.m450
-rw-r--r--src/third_party/wiredtiger/build_posix/configure.ac.in6
-rw-r--r--src/third_party/wiredtiger/build_win/filelist.win1
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py36
-rw-r--r--src/third_party/wiredtiger/dist/extlist1
-rw-r--r--src/third_party/wiredtiger/dist/filelist1
-rw-r--r--src/third_party/wiredtiger/dist/flags.py3
-rw-r--r--src/third_party/wiredtiger/dist/log.py26
-rw-r--r--src/third_party/wiredtiger/dist/s_define.list4
-rw-r--r--src/third_party/wiredtiger/dist/s_readme16
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok4
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py1
-rw-r--r--src/third_party/wiredtiger/ext/extractors/csv/Makefile.am10
-rw-r--r--src/third_party/wiredtiger/ext/extractors/csv/csv_extractor.c171
-rw-r--r--src/third_party/wiredtiger/lang/java/java_doc.i2
-rw-r--r--src/third_party/wiredtiger/src/block/block_open.c36
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c11
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c154
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c11
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c46
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c149
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_log.c83
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_open.c2
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_log.c35
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c5
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h19
-rw-r--r--src/third_party/wiredtiger/src/include/btree.h5
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i18
-rw-r--r--src/third_party/wiredtiger/src/include/config.h26
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h8
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.i14
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h11
-rw-r--r--src/third_party/wiredtiger/src/include/flags.h9
-rw-r--r--src/third_party/wiredtiger/src/include/gcc.h20
-rw-r--r--src/third_party/wiredtiger/src/include/lint.h6
-rw-r--r--src/third_party/wiredtiger/src/include/log.h40
-rw-r--r--src/third_party/wiredtiger/src/include/msvc.h11
-rw-r--r--src/third_party/wiredtiger/src/include/session.h1
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h1
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h20
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i41
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in61
-rw-r--r--src/third_party/wiredtiger/src/include/wt_internal.h2
-rw-r--r--src/third_party/wiredtiger/src/log/log.c131
-rw-r--r--src/third_party/wiredtiger/src/log/log_auto.c99
-rw-r--r--src/third_party/wiredtiger/src/log/log_slot.c76
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c21
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_manager.c30
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_tree.c17
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_turtle.c34
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c4
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_open.c7
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_thread.c13
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_track.c2
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_open.c3
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c125
-rw-r--r--src/third_party/wiredtiger/src/session/session_dhandle.c6
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c2
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c102
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c34
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_log.c85
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_nsnap.c369
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_recover.c19
65 files changed, 1856 insertions, 519 deletions
diff --git a/src/third_party/wiredtiger/README b/src/third_party/wiredtiger/README
index 5f7849fdf38..7463219341f 100644
--- a/src/third_party/wiredtiger/README
+++ b/src/third_party/wiredtiger/README
@@ -1,4 +1,4 @@
-WiredTiger 2.6.1: (May 15, 2015)
+WiredTiger 2.6.1: (May 28, 2015)
This is version 2.6.1 of WiredTiger.
@@ -6,22 +6,24 @@ WiredTiger release packages and documentation can be found at:
http://source.wiredtiger.com/
+The documentation for this specific release can be found at:
+
+ http://source.wiredtiger.com/2.6.1/index.html
+
+The WiredTiger source code can be found at:
+
+ http://github.com/wiredtiger/wiredtiger
+
WiredTiger uses JIRA for issue management:
http://jira.mongodb.org/browse/WT
Please do not report issues through GitHub.
-Information on configuring, building and installing WiredTiger can be
-found at:
-
- http://source.wiredtiger.com/2.6.1/install.html
-
WiredTiger licensing information can be found at:
http://source.wiredtiger.com/license.html
-For general questions and discussion, please use the WiredTiger mailing
-list:
+For general questions and discussion, there's a WiredTiger group:
http://groups.google.com/group/wiredtiger-users
diff --git a/src/third_party/wiredtiger/build_posix/Make.subdirs b/src/third_party/wiredtiger/build_posix/Make.subdirs
index 33d3cbb8bc1..0fa9e030659 100644
--- a/src/third_party/wiredtiger/build_posix/Make.subdirs
+++ b/src/third_party/wiredtiger/build_posix/Make.subdirs
@@ -14,6 +14,7 @@ ext/compressors/zlib ZLIB
ext/datasources/helium HAVE_HELIUM
ext/encryptors/nop
ext/encryptors/rotn
+ext/extractors/csv
ext/test/kvs_bdb HAVE_BERKELEY_DB
.
api/leveldb LEVELDB
diff --git a/src/third_party/wiredtiger/build_posix/aclocal/ax_func_posix_memalign.m4 b/src/third_party/wiredtiger/build_posix/aclocal/ax_func_posix_memalign.m4
new file mode 100644
index 00000000000..bd60adcbc81
--- /dev/null
+++ b/src/third_party/wiredtiger/build_posix/aclocal/ax_func_posix_memalign.m4
@@ -0,0 +1,50 @@
+# ===========================================================================
+# http://www.gnu.org/software/autoconf-archive/ax_func_posix_memalign.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+# AX_FUNC_POSIX_MEMALIGN
+#
+# DESCRIPTION
+#
+# Some versions of posix_memalign (notably glibc 2.2.5) incorrectly apply
+# their power-of-two check to the size argument, not the alignment
+# argument. AX_FUNC_POSIX_MEMALIGN defines HAVE_POSIX_MEMALIGN if the
+# power-of-two check is correctly applied to the alignment argument.
+#
+# LICENSE
+#
+# Copyright (c) 2008 Scott Pakin <pakin@uiuc.edu>
+#
+# Copying and distribution of this file, with or without modification, are
+# permitted in any medium without royalty provided the copyright notice
+# and this notice are preserved. This file is offered as-is, without any
+# warranty.
+
+#serial 7
+
+AC_DEFUN([AX_FUNC_POSIX_MEMALIGN],
+[AC_CACHE_CHECK([for working posix_memalign],
+ [ax_cv_func_posix_memalign_works],
+ [AC_TRY_RUN([
+#include <stdlib.h>
+
+int
+main ()
+{
+ void *buffer;
+
+ /* Some versions of glibc incorrectly perform the alignment check on
+ * the size word. */
+ exit (posix_memalign (&buffer, sizeof(void *), 123) != 0);
+}
+ ],
+ [ax_cv_func_posix_memalign_works=yes],
+ [ax_cv_func_posix_memalign_works=no],
+ [ax_cv_func_posix_memalign_works=no])])
+if test "$ax_cv_func_posix_memalign_works" = "yes" ; then
+ AC_DEFINE([HAVE_POSIX_MEMALIGN], [1],
+ [Define to 1 if `posix_memalign' works.])
+fi
+])
diff --git a/src/third_party/wiredtiger/build_posix/configure.ac.in b/src/third_party/wiredtiger/build_posix/configure.ac.in
index d93793a997b..4bfb4df7fa2 100644
--- a/src/third_party/wiredtiger/build_posix/configure.ac.in
+++ b/src/third_party/wiredtiger/build_posix/configure.ac.in
@@ -80,12 +80,16 @@ AC_CHECK_LIB(rt, sched_yield)
AC_CHECK_FUNCS([\
clock_gettime fallocate fcntl fread_unlocked ftruncate gettimeofday\
- posix_fadvise posix_fallocate posix_madvise posix_memalign\
+ posix_fadvise posix_fallocate posix_madvise\
strtouq sync_file_range])
# OS X wrongly reports that it has fdatasync
AS_CASE([$host_os], [darwin*], [], [AC_CHECK_FUNCS([fdatasync])])
+# Check for posix_memalign explicitly: it is a builtin in some compilers and
+# the generic declaration in AC_CHECK_FUNCS is incompatible.
+AX_FUNC_POSIX_MEMALIGN
+
AC_SYS_LARGEFILE
AC_C_BIGENDIAN
diff --git a/src/third_party/wiredtiger/build_win/filelist.win b/src/third_party/wiredtiger/build_win/filelist.win
index 4ec5d51ce51..099451e418d 100644
--- a/src/third_party/wiredtiger/build_win/filelist.win
+++ b/src/third_party/wiredtiger/build_win/filelist.win
@@ -168,4 +168,5 @@ src/txn/txn.c
src/txn/txn_ckpt.c
src/txn/txn_ext.c
src/txn/txn_log.c
+src/txn/txn_nsnap.c
src/txn/txn_recover.c
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index fbf71581fe9..db1bc85add4 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -790,6 +790,14 @@ methods = {
type='boolean'),
]),
'WT_SESSION.strerror' : Method([]),
+'WT_SESSION.transaction_sync' : Method([
+ Config('timeout_ms', '', r'''
+ maximum amount of time to wait for background sync to complete in
+ milliseconds. A value of zero disables the timeout and returns
+ immediately. The default waits forever.''',
+ type='int'),
+]),
+
'WT_SESSION.truncate' : Method([]),
'WT_SESSION.upgrade' : Method([]),
'WT_SESSION.verify' : Method([
@@ -826,13 +834,21 @@ methods = {
priority of the transaction for resolving conflicts.
Transactions with higher values are less likely to abort''',
min='-100', max='100'),
+ Config('snapshot', '', r'''
+ use a named, in-memory snapshot, see
+ @ref transaction_named_snapshots'''),
Config('sync', '', r'''
whether to sync log records when the transaction commits,
inherited from ::wiredtiger_open \c transaction_sync''',
type='boolean'),
]),
-'WT_SESSION.commit_transaction' : Method([]),
+'WT_SESSION.commit_transaction' : Method([
+ Config('sync', '', r'''
+ override whether to sync log records when the transaction commits,
+ inherited from ::wiredtiger_open \c transaction_sync''',
+ choices=['background', 'off', 'on']),
+]),
'WT_SESSION.rollback_transaction' : Method([]),
'WT_SESSION.checkpoint' : Method([
@@ -857,6 +873,24 @@ methods = {
if non-empty, checkpoint the list of objects''', type='list'),
]),
+'WT_SESSION.snapshot' : Method([
+ Config('drop', '', r'''
+ if non-empty, specifies which snapshots to drop. Where a group
+ of snapshots are being dropped, the order is based on snapshot
+ creation order not alphanumeric name order''',
+ type='category', subconfig=[
+ Config('all', 'false', r'''
+ drop all named snapshots''', type='boolean'),
+ Config('before', '', r'''
+ drop all snapshots up to but not including the specified name'''),
+ Config('names', '', r'''
+ drop specific named snapshots''', type='list'),
+ Config('to', '', r'''
+ drop all snapshots up to and including the specified name.'''),
+ ]),
+ Config('name', '', r'''specify a name for the snapshot'''),
+]),
+
'WT_CONNECTION.add_collator' : Method([]),
'WT_CONNECTION.add_compressor' : Method([]),
'WT_CONNECTION.add_data_source' : Method([]),
diff --git a/src/third_party/wiredtiger/dist/extlist b/src/third_party/wiredtiger/dist/extlist
index fb4540653b2..874d21289d2 100644
--- a/src/third_party/wiredtiger/dist/extlist
+++ b/src/third_party/wiredtiger/dist/extlist
@@ -9,3 +9,4 @@ ext/compressors/zlib/zlib_compress.c
ext/datasources/helium/helium.c
ext/encryptors/nop/nop_encrypt.c
ext/encryptors/rotn/rotn_encrypt.c
+ext/extractors/csv/csv_extractor.c
diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist
index 8d797c36e47..c3321cf845d 100644
--- a/src/third_party/wiredtiger/dist/filelist
+++ b/src/third_party/wiredtiger/dist/filelist
@@ -166,4 +166,5 @@ src/txn/txn.c
src/txn/txn_ckpt.c
src/txn/txn_ext.c
src/txn/txn_log.c
+src/txn/txn_nsnap.c
src/txn/txn_recover.c
diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py
index 544e3b5d549..ae97740073d 100644
--- a/src/third_party/wiredtiger/dist/flags.py
+++ b/src/third_party/wiredtiger/dist/flags.py
@@ -29,6 +29,7 @@ flags = {
'LOGSCAN_RECOVER',
],
'log_write' : [
+ 'LOG_BACKGROUND',
'LOG_DSYNC',
'LOG_FLUSH',
'LOG_FSYNC',
@@ -55,7 +56,7 @@ flags = {
'SKIP_UPDATE_RESTORE',
],
'txn_log_checkpoint' : [
- 'TXN_LOG_CKPT_FAIL',
+ 'TXN_LOG_CKPT_CLEANUP',
'TXN_LOG_CKPT_PREPARE',
'TXN_LOG_CKPT_START',
'TXN_LOG_CKPT_STOP',
diff --git a/src/third_party/wiredtiger/dist/log.py b/src/third_party/wiredtiger/dist/log.py
index abe72cea5c4..da4a9c10727 100644
--- a/src/third_party/wiredtiger/dist/log.py
+++ b/src/third_party/wiredtiger/dist/log.py
@@ -12,7 +12,7 @@ tmp_file = '__tmp'
field_types = {
'string' : ('const char *', 'S', '%s', 'arg', ''),
'item' : ('WT_ITEM *', 'u', '%s', 'escaped',
- 'WT_RET(__logrec_jsonify_str(session, &escaped, &arg));'),
+ 'WT_ERR(__logrec_jsonify_str(session, &escaped, &arg));'),
'recno' : ('uint64_t', 'r', '%" PRIu64 "', 'arg', ''),
'uint32' : ('uint32_t', 'I', '%" PRIu32 "', 'arg', ''),
'uint64' : ('uint64_t', 'Q', '%" PRIu64 "', 'arg', ''),
@@ -256,31 +256,35 @@ int
__wt_logop_%(name)s_print(
WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out)
{
-\t%(arg_decls)s
+%(arg_ret)s\t%(arg_decls)s
\t%(arg_init)sWT_RET(__wt_logop_%(name)s_unpack(
\t session, pp, end%(arg_addrs)s));
-\tfprintf(out, " \\"optype\\": \\"%(name)s\\",\\n");
+\tWT_RET(__wt_fprintf(out, " \\"optype\\": \\"%(name)s\\",\\n"));
\t%(print_args)s
-\t%(arg_fini)sreturn (0);
+%(arg_fini)s
}
''' % {
'name' : optype.name,
+ 'arg_ret' : ('\tWT_DECL_RET;\n' if has_escape(optype.fields) else ''),
'arg_decls' : ('\n\t'.join('%s%s%s;' %
(clocaltype(f), '' if clocaltype(f)[-1] == '*' else ' ', f[1])
for f in optype.fields)) + escape_decl(optype.fields),
'arg_init' : ('escaped = NULL;\n\t' if has_escape(optype.fields) else ''),
- 'arg_fini' : ('__wt_free(session, escaped);\n\t'
- if has_escape(optype.fields) else ''),
+ 'arg_fini' : ('\nerr:\t__wt_free(session, escaped);\n\treturn (ret);'
+ if has_escape(optype.fields) else '\treturn (0);'),
'arg_addrs' : ''.join(', &%s' % f[1] for f in optype.fields),
'print_args' : '\n\t'.join(
- '%sfprintf(out, " \\"%s\\": \\"%s\\",\\n",%s);' %
- (printf_setup(f), f[1], printf_fmt(f), printf_arg(f))
+ '%s%s(__wt_fprintf(out,\n\t " \\"%s\\": \\"%s\\",\\n",%s));' %
+ (printf_setup(f),
+ 'WT_ERR' if has_escape(optype.fields) else 'WT_RET',
+ f[1], printf_fmt(f), printf_arg(f))
for f in optype.fields[:-1]) + str(
- '\n\t%sfprintf(out, " \\"%s\\": \\"%s\\"",%s);' %
- (printf_setup(last_field), last_field[1],
- printf_fmt(last_field), printf_arg(last_field))),
+ '\n\t%s%s(__wt_fprintf(out,\n\t " \\"%s\\": \\"%s\\"",%s));' %
+ (printf_setup(last_field),
+ 'WT_ERR' if has_escape(optype.fields) else 'WT_RET',
+ last_field[1], printf_fmt(last_field), printf_arg(last_field))),
})
# Emit the printlog entry point
diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list
index c9f2db406bf..623a34447a8 100644
--- a/src/third_party/wiredtiger/dist/s_define.list
+++ b/src/third_party/wiredtiger/dist/s_define.list
@@ -20,9 +20,6 @@ WT_ATOMIC_ADD1
WT_ATOMIC_ADD2
WT_ATOMIC_CAS1
WT_ATOMIC_CAS2
-WT_ATOMIC_CAS_VAL1
-WT_ATOMIC_CAS_VAL2
-WT_ATOMIC_CAS_VAL4
WT_ATOMIC_FETCH_ADD1
WT_ATOMIC_FETCH_ADD2
WT_ATOMIC_FETCH_ADD4
@@ -68,7 +65,6 @@ __WIREDTIGER_EXT_H_
__WIREDTIGER_H_
__WT_ATOMIC_ADD
__WT_ATOMIC_CAS
-__WT_ATOMIC_CAS_VAL
__WT_ATOMIC_FETCH_ADD
__WT_ATOMIC_STORE
__WT_ATOMIC_SUB
diff --git a/src/third_party/wiredtiger/dist/s_readme b/src/third_party/wiredtiger/dist/s_readme
index f497a04fa1d..be809a6455c 100644
--- a/src/third_party/wiredtiger/dist/s_readme
+++ b/src/third_party/wiredtiger/dist/s_readme
@@ -35,23 +35,25 @@ WiredTiger release packages and documentation can be found at:
http://source.wiredtiger.com/
+The documentation for this specific release can be found at:
+
+ http://source.wiredtiger.com/$WIREDTIGER_VERSION/index.html
+
+The WiredTiger source code can be found at:
+
+ http://github.com/wiredtiger/wiredtiger
+
WiredTiger uses JIRA for issue management:
http://jira.mongodb.org/browse/WT
Please do not report issues through GitHub.
-Information on configuring, building and installing WiredTiger can be
-found at:
-
- http://source.wiredtiger.com/$WIREDTIGER_VERSION/install.html
-
WiredTiger licensing information can be found at:
http://source.wiredtiger.com/license.html
-For general questions and discussion, please use the WiredTiger mailing
-list:
+For general questions and discussion, there's a WiredTiger group:
http://groups.google.com/group/wiredtiger-users
END_TEXT
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index d57b19aa029..fa87c0086b6 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -44,6 +44,7 @@ CAS
CCCCCCCCCCCCCCCCCC
CELL's
CELLs
+CET
CFLAGS
CHECKKEY
CKPT
@@ -53,6 +54,7 @@ CONFIG
COVERITY
CPUs
CRC
+CSV
CURSORs
CURSTD
CallsCustDate
@@ -440,6 +442,7 @@ crc
create's
crypto
cryptobad
+csv
ctime
ctype
curbackup
@@ -736,6 +739,7 @@ noraw
notfound
notsup
nset
+nsnap
nul
nuls
numSymbols
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index ae43ce91fb4..791cb30e99c 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -285,6 +285,7 @@ connection_stats = [
'no_clear,no_scale'),
TxnStat('txn_pinned_range',
'transaction range of IDs currently pinned', 'no_clear,no_scale'),
+ TxnStat('txn_sync', 'transaction sync calls'),
TxnStat('txn_commit', 'transactions committed'),
TxnStat('txn_fail_cache', 'transaction failures due to cache overflow'),
TxnStat('txn_rollback', 'transactions rolled back'),
diff --git a/src/third_party/wiredtiger/ext/extractors/csv/Makefile.am b/src/third_party/wiredtiger/ext/extractors/csv/Makefile.am
new file mode 100644
index 00000000000..bb2a35bbf8e
--- /dev/null
+++ b/src/third_party/wiredtiger/ext/extractors/csv/Makefile.am
@@ -0,0 +1,10 @@
+AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include
+
+noinst_LTLIBRARIES = libwiredtiger_csv_extractor.la
+libwiredtiger_csv_extractor_la_SOURCES = csv_extractor.c
+
+# libtool hack: noinst_LTLIBRARIES turns off building shared libraries as well
+# as installation, it will only build static libraries. As far as I can tell,
+# the "approved" libtool way to turn them back on is by adding -rpath.
+libwiredtiger_csv_extractor_la_LDFLAGS = \
+ -avoid-version -module -rpath /nowhere
diff --git a/src/third_party/wiredtiger/ext/extractors/csv/csv_extractor.c b/src/third_party/wiredtiger/ext/extractors/csv/csv_extractor.c
new file mode 100644
index 00000000000..0dd110955ad
--- /dev/null
+++ b/src/third_party/wiredtiger/ext/extractors/csv/csv_extractor.c
@@ -0,0 +1,171 @@
+/*-
+ * Public Domain 2014-2015 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <string.h>
+#include <limits.h>
+#include <errno.h>
+#include <stdlib.h>
+
+#include <wiredtiger_ext.h>
+
+/*
+ * A simple WiredTiger extractor that separates a single string field,
+ * interpreted as column separated values (CSV), into component pieces.
+ * When an index is configured with this extractor and app_metadata
+ * set to a number N, the Nth field is returned as a string.
+ *
+ * For example, if a value in the primary table is
+ * "Paris,France,CET,2273305"
+ * and this extractor is configured with app_metadata=2, then
+ * the extractor for this value would return "CET".
+ */
+
+/* Local extractor structure. */
+typedef struct {
+ WT_EXTRACTOR extractor; /* Must come first */
+ WT_EXTENSION_API *wt_api; /* Extension API */
+ int field_num; /* Field to extract */
+} CSV_EXTRACTOR;
+
+/*
+ * csv_extract --
+ * WiredTiger CSV extraction.
+ */
+static int
+csv_extract(WT_EXTRACTOR *extractor, WT_SESSION *session,
+ const WT_ITEM *key, const WT_ITEM *value, WT_CURSOR *result_cursor)
+{
+ char *copy, *p, *pend, *valstr;
+ const CSV_EXTRACTOR *cvs_extractor;
+ int i, ret;
+ size_t len;
+ WT_EXTENSION_API *wtapi;
+
+ (void)key; /* Unused parameters */
+
+ cvs_extractor = (const CSV_EXTRACTOR *)extractor;
+ wtapi = cvs_extractor->wt_api;
+
+ /* Unpack the value. */
+ if ((ret = wtapi->struct_unpack(wtapi,
+ session, value->data, value->size, "S", &valstr)) != 0)
+ return (ret);
+
+ p = valstr;
+ pend = strchr(p, ',');
+ for (i = 0; i < cvs_extractor->field_num && pend != NULL; i++) {
+ p = pend + 1;
+ pend = strchr(p, ',');
+ }
+ if (i == cvs_extractor->field_num) {
+ if (pend == NULL)
+ pend = p + strlen(p);
+ /*
+ * The key we must return is a null terminated string, but p
+ * is not necessarily NULL-terminated. So make a copy, just
+ * for the duration of the insert.
+ */
+ len = (size_t)(pend - p);
+ if ((copy = malloc(len + 1)) == NULL)
+ return (errno);
+ strncpy(copy, p, len);
+ copy[len] = '\0';
+ result_cursor->set_key(result_cursor, copy);
+ ret = result_cursor->insert(result_cursor);
+ free(copy);
+ if (ret != 0)
+ return (ret);
+ }
+ return (0);
+}
+
+/*
+ * csv_customize --
+ * The customize function creates a customized extractor,
+ * needed to save the field number.
+ */
+static int
+csv_customize(WT_EXTRACTOR *extractor, WT_SESSION *session,
+ const char *uri, WT_CONFIG_ITEM *appcfg, WT_EXTRACTOR **customp)
+{
+ const CSV_EXTRACTOR *orig;
+ CSV_EXTRACTOR *csv_extractor;
+ long field_num;
+
+ (void)session; /* Unused parameters */
+ (void)uri; /* Unused parameters */
+
+ orig = (const CSV_EXTRACTOR *)extractor;
+ field_num = strtol(appcfg->str, NULL, 10);
+ if (field_num < 0 || field_num > INT_MAX)
+ return (EINVAL);
+ if ((csv_extractor = calloc(1, sizeof(CSV_EXTRACTOR))) == NULL)
+ return (errno);
+
+ *csv_extractor = *orig;
+ csv_extractor->field_num = field_num;
+ *customp = (WT_EXTRACTOR *)csv_extractor;
+ return (0);
+}
+
+/*
+ * csv_terminate --
+ * Terminate is called to free the CSV and any associated memory.
+ */
+static int
+csv_terminate(WT_EXTRACTOR *extractor, WT_SESSION *session)
+{
+ (void)session; /* Unused parameters */
+
+ /* Free the allocated memory. */
+ free(extractor);
+ return (0);
+}
+
+/*
+ * wiredtiger_extension_init --
+ * WiredTiger CSV extraction extension.
+ */
+int
+wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
+{
+ CSV_EXTRACTOR *csv_extractor;
+
+ (void)config; /* Unused parameters */
+
+ if ((csv_extractor = calloc(1, sizeof(CSV_EXTRACTOR))) == NULL)
+ return (errno);
+
+ csv_extractor->extractor.extract = csv_extract;
+ csv_extractor->extractor.customize = csv_customize;
+ csv_extractor->extractor.terminate = csv_terminate;
+ csv_extractor->wt_api = connection->get_extension_api(connection);
+
+ return (connection->add_extractor(
+ connection, "csv", (WT_EXTRACTOR *)csv_extractor, NULL));
+}
diff --git a/src/third_party/wiredtiger/lang/java/java_doc.i b/src/third_party/wiredtiger/lang/java/java_doc.i
index 2129c89a409..ea80b80df2d 100644
--- a/src/third_party/wiredtiger/lang/java/java_doc.i
+++ b/src/third_party/wiredtiger/lang/java/java_doc.i
@@ -43,7 +43,9 @@ COPYDOC(__wt_session, WT_SESSION, begin_transaction)
COPYDOC(__wt_session, WT_SESSION, commit_transaction)
COPYDOC(__wt_session, WT_SESSION, rollback_transaction)
COPYDOC(__wt_session, WT_SESSION, checkpoint)
+COPYDOC(__wt_session, WT_SESSION, snapshot)
COPYDOC(__wt_session, WT_SESSION, transaction_pinned_range)
+COPYDOC(__wt_session, WT_SESSION, transaction_sync)
COPYDOC(__wt_connection, WT_CONNECTION, async_flush)
COPYDOC(__wt_connection, WT_CONNECTION, async_new_op)
COPYDOC(__wt_connection, WT_CONNECTION, close)
diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c
index 64bd86294f2..5a882f0fb7c 100644
--- a/src/third_party/wiredtiger/src/block/block_open.c
+++ b/src/third_party/wiredtiger/src/block/block_open.c
@@ -51,11 +51,41 @@ __wt_block_manager_create(
WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize)
{
WT_DECL_RET;
+ WT_DECL_ITEM(tmp);
WT_FH *fh;
+ int exists, suffix;
char *path;
- /* Create the underlying file and open a handle. */
- WT_RET(__wt_open(session, filename, 1, 1, WT_FILE_TYPE_DATA, &fh));
+ /*
+ * Create the underlying file and open a handle.
+ *
+ * Since WiredTiger schema operations are (currently) non-transactional,
+ * it's possible to see a partially-created file left from a previous
+ * create. Further, there's nothing to prevent users from creating files
+ * in our space. Move any existing files out of the way and complain.
+ */
+ for (;;) {
+ if ((ret = __wt_open(
+ session, filename, 1, 1, WT_FILE_TYPE_DATA, &fh)) == 0)
+ break;
+ WT_ERR_TEST(ret != EEXIST, ret);
+
+ if (tmp == NULL)
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ for (suffix = 1;; ++suffix) {
+ WT_ERR(__wt_buf_fmt(
+ session, tmp, "%s.%d", filename, suffix));
+ WT_ERR(__wt_exist(session, tmp->data, &exists));
+ if (!exists) {
+ WT_ERR(
+ __wt_rename(session, filename, tmp->data));
+ WT_ERR(__wt_msg(session,
+ "unexpected file %s found, renamed to %s",
+ filename, (char *)tmp->data));
+ break;
+ }
+ }
+ }
/* Write out the file's meta-data. */
ret = __wt_desc_init(session, fh, allocsize);
@@ -83,6 +113,8 @@ __wt_block_manager_create(
if (ret != 0)
WT_TRET(__wt_remove(session, filename));
+err: __wt_scr_free(session, &tmp);
+
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index b1fa5ce6178..751ec8581c2 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -188,7 +188,7 @@ __dmsg(WT_DBG *ds, const char *fmt, ...)
}
} else {
va_start(ap, fmt);
- (void)vfprintf(ds->fp, fmt, ap);
+ (void)__wt_vfprintf(ds->fp, fmt, ap);
va_end(ap);
}
}
@@ -202,13 +202,14 @@ __wt_debug_addr_print(
WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
{
WT_DECL_ITEM(buf);
+ WT_DECL_RET;
WT_RET(__wt_scr_alloc(session, 128, &buf));
- fprintf(stderr, "%s\n",
- __wt_addr_string(session, addr, addr_size, buf));
+ ret = __wt_fprintf(stderr,
+ "%s\n", __wt_addr_string(session, addr, addr_size, buf));
__wt_scr_free(session, &buf);
- return (0);
+ return (ret);
}
/*
@@ -437,7 +438,7 @@ __debug_tree_shape_worker(WT_DBG *ds, WT_PAGE *page, int level)
session = ds->session;
- if (page->type == WT_PAGE_ROW_INT || page->type == WT_PAGE_COL_INT) {
+ if (WT_PAGE_IS_INTERNAL(page)) {
__dmsg(ds, "%*s" "I" "%d %s\n",
level * 3, " ", level, __debug_tree_shape_info(page));
WT_INTL_FOREACH_BEGIN(session, page, ref) {
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index d4c8cf1b92d..c316be6f908 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -170,15 +170,12 @@ __split_safe_free(WT_SESSION_IMPL *session,
* Return if we should deepen the tree.
*/
static int
-__split_should_deepen(
- WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *childrenp)
+__split_should_deepen(WT_SESSION_IMPL *session, WT_REF *ref)
{
WT_BTREE *btree;
WT_PAGE *page;
WT_PAGE_INDEX *pindex;
- *childrenp = 0;
-
btree = S2BT(session);
page = ref->page;
@@ -202,10 +199,8 @@ __split_should_deepen(
* we get a significant payback (in the case of a set of large keys,
* splitting won't help).
*/
- if (pindex->entries > btree->split_deepen_min_child) {
- *childrenp = pindex->entries / btree->split_deepen_per_child;
+ if (pindex->entries > btree->split_deepen_min_child)
return (1);
- }
/*
* Don't allow a single page to put pressure on cache usage. The root
@@ -216,10 +211,8 @@ __split_should_deepen(
*/
if (pindex->entries >= 100 &&
(__wt_ref_is_root(ref) ||
- page->memory_footprint >= S2C(session)->cache_size / 4)) {
- *childrenp = pindex->entries / 10;
+ page->memory_footprint >= S2C(session)->cache_size / 4))
return (1);
- }
return (0);
}
@@ -385,8 +378,9 @@ __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page)
* Split an internal page in-memory, deepening the tree.
*/
static int
-__split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
+__split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent)
{
+ WT_BTREE *btree;
WT_DECL_RET;
WT_PAGE *child;
WT_PAGE_INDEX *alloc_index, *child_pindex, *pindex;
@@ -394,10 +388,15 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
WT_REF *child_ref, **child_refp, *parent_ref, **parent_refp, *ref;
size_t child_incr, parent_decr, parent_incr, size;
uint64_t split_gen;
- uint32_t chunk, i, j, remain, slots;
+ uint32_t children, chunk, i, j, moved_entries, new_entries, remain;
+ uint32_t skip_leading, slots;
int panic;
void *p;
+ WT_STAT_FAST_CONN_INCR(session, cache_eviction_deepen);
+ WT_STAT_FAST_DATA_INCR(session, cache_eviction_deepen);
+
+ btree = S2BT(session);
alloc_index = NULL;
parent_incr = parent_decr = 0;
panic = 0;
@@ -409,53 +408,76 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
*/
pindex = WT_INTL_INDEX_GET_SAFE(parent);
- WT_STAT_FAST_CONN_INCR(session, cache_eviction_deepen);
- WT_STAT_FAST_DATA_INCR(session, cache_eviction_deepen);
- WT_ERR(__wt_verbose(session, WT_VERB_SPLIT,
- "%p: %" PRIu32 " elements, splitting into %" PRIu32 " children",
- parent, pindex->entries, children));
-
/*
- * If the workload is prepending/appending to the tree, we could deepen
- * without bound. Don't let that happen, keep the first/last pages of
- * the tree at their current level.
+ * A prepending/appending workload will repeatedly deepen parts of the
+ * tree that aren't changing, and appending workloads are not uncommon.
+ * First, keep the first/last pages of the tree at their current level,
+ * to catch simple workloads. Second, track the number of entries which
+ * resulted from the last time we deepened this page, and if we refilled
+ * this page without splitting into those slots, ignore them for this
+ * split. It's not exact because an eviction might split into any part
+ * of the page: if 80% of the splits are at the end of the page, assume
+ * an append-style workload. Of course, the plan eventually fails: when
+ * repeatedly deepening this page for an append-only workload, we will
+ * progressively ignore more and more of the slots. When ignoring 90% of
+ * the slots, deepen the entire page again.
*
- * XXX
- * To improve this, we could track which pages were last merged into
- * this page by eviction, and leave those pages alone, to prevent any
- * sustained insert into the tree from deepening a single location.
+ * Figure out how many slots we're leaving at this level and how many
+ * child pages we're creating.
+ */
+#undef skip_trailing
+#define skip_trailing 1
+ skip_leading = 1;
+ new_entries = pindex->entries - parent->pg_intl_deepen_split_last;
+ if (parent->pg_intl_deepen_split_append > (new_entries * 8) / 10)
+ skip_leading = parent->pg_intl_deepen_split_last;
+ if (skip_leading > (pindex->entries * 9) * 10)
+ skip_leading = 1;
+
+ /*
+ * In a few (rare) cases we split pages with only a few entries, and in
+ * those cases we keep it simple, 10 children, skip only first and last
+ * entries. Otherwise, split into a lot of child pages.
*/
-#undef SPLIT_CORRECT_1
-#define SPLIT_CORRECT_1 1 /* First page correction */
-#undef SPLIT_CORRECT_2
-#define SPLIT_CORRECT_2 2 /* First/last page correction */
+ moved_entries = pindex->entries - (skip_leading + skip_trailing);
+ children = moved_entries / btree->split_deepen_per_child;
+ if (children < 10) {
+ children = 10;
+ skip_leading = 1;
+ moved_entries =
+ pindex->entries - (skip_leading + skip_trailing);
+ }
+
+ WT_ERR(__wt_verbose(session, WT_VERB_SPLIT,
+ "%p: %" PRIu32 " elements, splitting into %" PRIu32 " children",
+ parent, pindex->entries, children));
/*
- * Allocate a new WT_PAGE_INDEX and set of WT_REF objects. Initialize
- * the first/last slots of the allocated WT_PAGE_INDEX to point to the
- * first/last pages we're keeping at the current level, and the rest of
- * the slots to point to new WT_REF objects.
+ * Allocate a new WT_PAGE_INDEX and set of WT_REF objects. Initialize
+ * the slots of the allocated WT_PAGE_INDEX to point to the pages we're
+ * keeping at the current level, and the rest of the slots to point to
+ * new WT_REF objects.
*/
size = sizeof(WT_PAGE_INDEX) +
- (children + SPLIT_CORRECT_2) * sizeof(WT_REF *);
+ (children + skip_leading + skip_trailing) * sizeof(WT_REF *);
WT_ERR(__wt_calloc(session, 1, size, &alloc_index));
parent_incr += size;
alloc_index->index = (WT_REF **)(alloc_index + 1);
- alloc_index->entries = children + SPLIT_CORRECT_2;
- alloc_index->index[0] = pindex->index[0];
+ alloc_index->entries = children + skip_leading + skip_trailing;
+ for (alloc_refp = alloc_index->index,
+ i = 0; i < skip_leading; ++alloc_refp, ++i)
+ alloc_index->index[i] = pindex->index[i];
+ for (i = 0; i < children; ++alloc_refp, ++i)
+ WT_ERR(__wt_calloc_one(session, alloc_refp));
+ parent_incr += children * sizeof(WT_REF);
alloc_index->index[alloc_index->entries - 1] =
pindex->index[pindex->entries - 1];
- for (alloc_refp = alloc_index->index + SPLIT_CORRECT_1,
- i = 0; i < children; ++alloc_refp, ++i) {
- WT_ERR(__wt_calloc_one(session, alloc_refp));
- parent_incr += sizeof(WT_REF);
- }
/* Allocate child pages, and connect them into the new page index. */
- chunk = (pindex->entries - SPLIT_CORRECT_2) / children;
- remain = (pindex->entries - SPLIT_CORRECT_2) - chunk * (children - 1);
- for (parent_refp = pindex->index + SPLIT_CORRECT_1,
- alloc_refp = alloc_index->index + SPLIT_CORRECT_1,
+ chunk = moved_entries / children;
+ remain = moved_entries - chunk * (children - 1);
+ for (parent_refp = pindex->index + skip_leading,
+ alloc_refp = alloc_index->index + skip_leading,
i = 0; i < children; ++i) {
slots = i == children - 1 ? remain : chunk;
WT_ERR(__wt_page_alloc(
@@ -513,10 +535,11 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
}
__wt_cache_page_inmem_incr(session, child, child_incr);
}
- WT_ASSERT(session, alloc_refp -
- alloc_index->index == alloc_index->entries - SPLIT_CORRECT_1);
WT_ASSERT(session,
- parent_refp - pindex->index == pindex->entries - SPLIT_CORRECT_1);
+ alloc_refp - alloc_index->index ==
+ alloc_index->entries - skip_trailing);
+ WT_ASSERT(session,
+ parent_refp - pindex->index == pindex->entries - skip_trailing);
/*
* Confirm the parent page's index hasn't moved, then update it, which
@@ -541,6 +564,12 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
WT_WITH_PAGE_INDEX(session,
__split_verify_intl_key_order(session, parent));
#endif
+ /*
+ * Save the number of entries created by deepening the tree and reset
+ * the count of splits into this page after that point.
+ */
+ parent->pg_intl_deepen_split_append = 0;
+ parent->pg_intl_deepen_split_last = alloc_index->entries;
/*
* The moved reference structures now reference the wrong parent page,
@@ -823,7 +852,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref,
WT_REF **alloc_refp, *next_ref, *parent_ref;
size_t parent_decr, size;
uint64_t split_gen;
- uint32_t children, i, j;
+ uint32_t i, j;
uint32_t deleted_entries, parent_entries, result_entries;
int complete, hazard;
@@ -907,8 +936,8 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref,
}
/*
- * The final entry count consists of: The original count, plus any
- * new pages, less any refs we are removing.
+ * The final entry count consists of the original count, plus any new
+ * pages, less any WT_REFs we're removing.
*/
result_entries = (parent_entries + new_entries) - deleted_entries;
@@ -924,7 +953,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref,
alloc_index->entries = result_entries;
for (alloc_refp = alloc_index->index, i = 0; i < parent_entries; ++i) {
next_ref = pindex->index[i];
- if (next_ref == ref)
+ if (next_ref == ref) {
for (j = 0; j < new_entries; ++j) {
ref_new[j]->home = parent;
*alloc_refp++ = ref_new[j];
@@ -936,7 +965,22 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref,
*/
ref_new[j] = NULL;
}
- else if (next_ref->state != WT_REF_SPLIT)
+
+ /*
+ * We detect append-style workloads to avoid repeatedly
+ * deepening parts of the tree where no work is being
+ * done by tracking if we're splitting after the slots
+ * created by the last split to deepen this parent.
+ *
+ * Note the calculation: i is a 0-based array offset and
+ * split-last is a count of entries, also either or both
+ * i and split-last might be unsigned 0, don't decrement
+ * either one.
+ */
+ if (i > parent->pg_intl_deepen_split_last)
+ parent->
+ pg_intl_deepen_split_append += new_entries;
+ } else if (next_ref->state != WT_REF_SPLIT)
/* Skip refs we have marked for deletion. */
*alloc_refp++ = next_ref;
}
@@ -1070,7 +1114,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref,
*/
if (ret == 0 && !LF_ISSET(WT_EVICT_EXCLUSIVE) &&
!F_ISSET_ATOMIC(parent, WT_PAGE_REFUSE_DEEPEN) &&
- __split_should_deepen(session, parent_ref, &children)) {
+ __split_should_deepen(session, parent_ref)) {
/*
* XXX
* Temporary hack to avoid a bug where the root page is split
@@ -1078,7 +1122,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref,
*/
uint64_t __a, __b;
__a = parent->memory_footprint;
- ret = __split_deepen(session, parent, children);
+ ret = __split_deepen(session, parent);
__b = parent->memory_footprint;
if (__b * 2 >= __a)
F_SET_ATOMIC(parent, WT_PAGE_REFUSE_DEEPEN);
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index 45c2029f6ed..0ded95c0d8c 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -118,15 +118,22 @@ __verify_config_offsets(WT_SESSION_IMPL *session, const char *cfg[], int *quitp)
static int
__verify_tree_shape(WT_SESSION_IMPL *session, WT_VSTUFF *vs)
{
+ uint32_t total;
size_t i;
- WT_RET(__wt_msg(session, "Internal page tree-depth:"));
+ for (i = 0, total = 0; i < WT_ELEMENTS(vs->depth_internal); ++i)
+ total += vs->depth_internal[i];
+ WT_RET(__wt_msg(
+ session, "Internal page tree-depth (total %" PRIu32 "):", total));
for (i = 0; i < WT_ELEMENTS(vs->depth_internal); ++i)
if (vs->depth_internal[i] != 0)
WT_RET(__wt_msg(session,
"\t%03zu: %u", i, vs->depth_internal[i]));
- WT_RET(__wt_msg(session, "Leaf page tree-depth:"));
+ for (i = 0, total = 0; i < WT_ELEMENTS(vs->depth_leaf); ++i)
+ total += vs->depth_leaf[i];
+ WT_RET(__wt_msg(
+ session, "Leaf page tree-depth (total %" PRIu32 "):", total));
for (i = 0; i < WT_ELEMENTS(vs->depth_leaf); ++i)
if (vs->depth_leaf[i] != 0)
WT_RET(__wt_msg(session,
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 845b4e65825..64fc802160c 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -148,6 +148,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_begin_transaction[] = {
NULL, 0 },
{ "name", "string", NULL, NULL, NULL, 0 },
{ "priority", "int", NULL, "min=-100,max=100", NULL, 0 },
+ { "snapshot", "string", NULL, NULL, NULL, 0 },
{ "sync", "boolean", NULL, NULL, NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
};
@@ -160,6 +161,13 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_checkpoint[] = {
{ NULL, NULL, NULL, NULL, NULL, 0 }
};
+static const WT_CONFIG_CHECK confchk_WT_SESSION_commit_transaction[] = {
+ { "sync", "string",
+ NULL, "choices=[\"background\",\"off\",\"on\"]",
+ NULL, 0 },
+ { NULL, NULL, NULL, NULL, NULL, 0 }
+};
+
static const WT_CONFIG_CHECK confchk_WT_SESSION_compact[] = {
{ "timeout", "int", NULL, NULL, NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
@@ -287,6 +295,28 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_salvage[] = {
{ NULL, NULL, NULL, NULL, NULL, 0 }
};
+static const WT_CONFIG_CHECK
+ confchk_WT_SESSION_snapshot_drop_subconfigs[] = {
+ { "all", "boolean", NULL, NULL, NULL, 0 },
+ { "before", "string", NULL, NULL, NULL, 0 },
+ { "names", "list", NULL, NULL, NULL, 0 },
+ { "to", "string", NULL, NULL, NULL, 0 },
+ { NULL, NULL, NULL, NULL, NULL, 0 }
+};
+
+static const WT_CONFIG_CHECK confchk_WT_SESSION_snapshot[] = {
+ { "drop", "category",
+ NULL, NULL,
+ confchk_WT_SESSION_snapshot_drop_subconfigs, 4 },
+ { "name", "string", NULL, NULL, NULL, 0 },
+ { NULL, NULL, NULL, NULL, NULL, 0 }
+};
+
+static const WT_CONFIG_CHECK confchk_WT_SESSION_transaction_sync[] = {
+ { "timeout_ms", "int", NULL, NULL, NULL, 0 },
+ { NULL, NULL, NULL, NULL, NULL, 0 }
+};
+
static const WT_CONFIG_CHECK confchk_WT_SESSION_verify[] = {
{ "dump_address", "boolean", NULL, NULL, NULL, 0 },
{ "dump_blocks", "boolean", NULL, NULL, NULL, 0 },
@@ -759,8 +789,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
confchk_WT_CURSOR_reconfigure, 2
},
{ "WT_SESSION.begin_transaction",
- "isolation=,name=,priority=0,sync=",
- confchk_WT_SESSION_begin_transaction, 4
+ "isolation=,name=,priority=0,snapshot=,sync=",
+ confchk_WT_SESSION_begin_transaction, 5
},
{ "WT_SESSION.checkpoint",
"drop=,force=0,name=,target=",
@@ -771,8 +801,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
NULL, 0
},
{ "WT_SESSION.commit_transaction",
- "",
- NULL, 0
+ "sync=",
+ confchk_WT_SESSION_commit_transaction, 1
},
{ "WT_SESSION.compact",
"timeout=1200",
@@ -824,10 +854,18 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"force=0",
confchk_WT_SESSION_salvage, 1
},
+ { "WT_SESSION.snapshot",
+ "drop=(all=0,before=,names=,to=),name=",
+ confchk_WT_SESSION_snapshot, 2
+ },
{ "WT_SESSION.strerror",
"",
NULL, 0
},
+ { "WT_SESSION.transaction_sync",
+ "timeout_ms=",
+ confchk_WT_SESSION_transaction_sync, 1
+ },
{ "WT_SESSION.truncate",
"",
NULL, 0
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index c57be1a9423..d42287497a5 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -406,15 +406,14 @@ __wt_encryptor_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval,
WT_KEYED_ENCRYPTOR *kenc;
WT_NAMED_ENCRYPTOR *nenc;
uint64_t bucket, hash;
- int locked;
*kencryptorp = NULL;
+
kenc = NULL;
conn = S2C(session);
- locked = 0;
__wt_spin_lock(session, &conn->encryptor_lock);
- locked = 1;
+
WT_ERR(__encryptor_confchk(session, cval, &nenc));
if (nenc == NULL) {
if (keyid->len != 0)
@@ -462,8 +461,7 @@ err: if (kenc != NULL) {
__wt_free(session, kenc->keyid);
__wt_free(session, kenc);
}
- if (locked)
- __wt_spin_unlock(session, &conn->encryptor_lock);
+ __wt_spin_unlock(session, &conn->encryptor_lock);
return (ret);
}
@@ -637,7 +635,7 @@ __extractor_confchk(
*/
int
__wt_extractor_config(WT_SESSION_IMPL *session,
- const char *config, WT_EXTRACTOR **extractorp, int *ownp)
+ const char *uri, const char *config, WT_EXTRACTOR **extractorp, int *ownp)
{
WT_CONFIG_ITEM cname;
WT_EXTRACTOR *extractor;
@@ -658,7 +656,7 @@ __wt_extractor_config(WT_SESSION_IMPL *session,
WT_RET(__wt_config_getones(session,
config, "app_metadata", &cname));
WT_RET(extractor->customize(extractor, &session->iface,
- session->dhandle->name, &cname, extractorp));
+ uri, &cname, extractorp));
}
if (*extractorp == NULL)
@@ -987,6 +985,9 @@ err: /*
wt_session, NULL));
}
+ /* Release all named snapshots. */
+ WT_TRET(__wt_txn_named_snapshot_destroy(session));
+
/* Close open, external sessions. */
for (s = conn->sessions, i = 0; i < conn->session_cnt; ++s, ++i)
if (s->active && !F_ISSET(s, WT_SESSION_INTERNAL)) {
@@ -1372,11 +1373,15 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
WT_FH *fh;
size_t len;
wt_off_t size;
+ int exist, is_create;
char buf[256];
conn = S2C(session);
fh = NULL;
+ WT_RET(__wt_config_gets(session, cfg, "create", &cval));
+ is_create = cval.val == 0 ? 0 : 1;
+
__wt_spin_lock(session, &__wt_process.spinlock);
/*
@@ -1410,10 +1415,6 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
* WiredTiger file and system utilities on Windows can't copy locked
* files.
*
- * For this reason, we don't use the lock file's existence to decide if
- * we're creating the database or not, use the WiredTiger file instead,
- * it has existed in every version of WiredTiger.
- *
* Additionally, avoid an upgrade race: a 2.3.1 release process might
* have the WiredTiger file locked, and we're going to create the lock
* file and lock it instead. For this reason, first acquire a lock on
@@ -1424,12 +1425,22 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
* then successfully lock the WiredTiger file, but I can't think of any
* way to fix that.)
*
- * Open the WiredTiger lock file, creating it if it doesn't exist. (I'm
- * not removing the lock file if we create it and subsequently fail, it
- * isn't simple to detect that case, and there's no risk other than a
- * useless file being left in the directory.)
+ * Open the WiredTiger lock file, optionally creating it if it doesn't
+ * exist. The "optional" part of that statement is tricky: we don't want
+ * to create the lock file in random directories when users mistype the
+ * database home directory path, so we only create the lock file in two
+ * cases: First, applications creating databases will configure create,
+ * create the lock file. Second, after a hot backup, all of the standard
+ * files will have been copied into place except for the lock file (see
+ * above, locked files cannot be copied on Windows). If the WiredTiger
+ * file exists in the directory, create the lock file, covering the case
+ * of a hot backup.
*/
- WT_ERR(__wt_open(session, WT_SINGLETHREAD, 1, 0, 0, &conn->lock_fh));
+ exist = 0;
+ if (!is_create)
+ WT_ERR(__wt_exist(session, WT_WIREDTIGER, &exist));
+ WT_ERR(__wt_open(session,
+ WT_SINGLETHREAD, is_create || exist, 0, 0, &conn->lock_fh));
/*
* Lock a byte of the file: if we don't get the lock, some other process
@@ -1442,22 +1453,22 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
"process");
/*
- * If the size of the lock file is 0, we created it (or we won a locking
- * race with the thread that created it, it doesn't matter).
+ * If the size of the lock file is non-zero, we created it (or won a
+ * locking race with the thread that created it, it doesn't matter).
*
* Write something into the file, zero-length files make me nervous.
+ *
+ * The test against the expected length is sheer paranoia (the length
+ * should be 0 or correct), but it shouldn't hurt.
*/
- WT_ERR(__wt_filesize(session, conn->lock_fh, &size));
- if (size == 0) {
#define WT_SINGLETHREAD_STRING "WiredTiger lock file\n"
+ WT_ERR(__wt_filesize(session, conn->lock_fh, &size));
+ if (size != strlen(WT_SINGLETHREAD_STRING))
WT_ERR(__wt_write(session, conn->lock_fh, (wt_off_t)0,
strlen(WT_SINGLETHREAD_STRING), WT_SINGLETHREAD_STRING));
- }
/* We own the lock file, optionally create the WiredTiger file. */
- WT_ERR(__wt_config_gets(session, cfg, "create", &cval));
- WT_ERR(__wt_open(session,
- WT_WIREDTIGER, cval.val == 0 ? 0 : 1, 0, 0, &fh));
+ WT_ERR(__wt_open(session, WT_WIREDTIGER, is_create, 0, 0, &fh));
/*
* Lock the WiredTiger file (for backward compatibility reasons as
@@ -1471,25 +1482,27 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_bytelock(fh, (wt_off_t)0, 0));
/*
- * If the size of the file is zero, we created it, fill it in. If the
- * size of the file is non-zero, fail if configured for exclusivity.
+ * We own the database home, figure out if we're creating it. There are
+ * a few files created when initializing the database home and we could
+ * crash in-between any of them, so there's no simple test. The last
+ * thing we do during initialization is rename a turtle file into place,
+ * and there's never a database home after that point without a turtle
+ * file. If the turtle file doesn't exist, it's a create.
*/
- WT_ERR(__wt_filesize(session, fh, &size));
- if (size == 0) {
+ WT_ERR(__wt_exist(session, WT_METADATA_TURTLE, &exist));
+ conn->is_new = exist ? 0 : 1;
+
+ if (conn->is_new) {
len = (size_t)snprintf(buf, sizeof(buf),
"%s\n%s\n", WT_WIREDTIGER, WIREDTIGER_VERSION_STRING);
WT_ERR(__wt_write(session, fh, (wt_off_t)0, len, buf));
WT_ERR(__wt_fsync(session, fh));
-
- conn->is_new = 1;
} else {
WT_ERR(__wt_config_gets(session, cfg, "exclusive", &cval));
if (cval.val != 0)
WT_ERR_MSG(session, EEXIST,
"WiredTiger database already exists and exclusive "
"option configured");
-
- conn->is_new = 0;
}
err: /*
@@ -1632,16 +1645,17 @@ __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[])
* Save the base configuration used to create a database.
*/
static int
-__conn_write_base_config(
- WT_SESSION_IMPL *session, const char *cfg[], const char *base_config)
+__conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[])
{
FILE *fp;
WT_CONFIG parser;
WT_CONFIG_ITEM cval, k, v;
WT_DECL_RET;
int exist;
+ const char *base_config;
fp = NULL;
+ base_config = NULL;
/*
* Discard any base configuration setup file left-over from previous
@@ -1650,7 +1664,12 @@ __conn_write_base_config(
*/
WT_RET(__wt_remove_if_exists(session, WT_BASECONFIG_SET));
- /* The base configuration file is optional, check the configuration. */
+ /*
+ * The base configuration file is only written if creating the database,
+ * and even then, a base configuration file is optional.
+ */
+ if (!S2C(session)->is_new)
+ return (0);
WT_RET(__wt_config_gets(session, cfg, "config_base", &cval));
if (!cval.val)
return (0);
@@ -1670,13 +1689,13 @@ __conn_write_base_config(
WT_RET(__wt_fopen(session,
WT_BASECONFIG_SET, WT_FHANDLE_WRITE, 0, &fp));
- fprintf(fp, "%s\n\n",
+ WT_ERR(__wt_fprintf(fp, "%s\n\n",
"# Do not modify this file.\n"
"#\n"
"# WiredTiger created this file when the database was created,\n"
"# to store persistent database settings. Instead of changing\n"
"# these settings, set a WIREDTIGER_CONFIG environment variable\n"
- "# or create a WiredTiger.config file to override them.");
+ "# or create a WiredTiger.config file to override them."));
/*
* The base configuration file contains all changes to default settings
@@ -1690,10 +1709,18 @@ __conn_write_base_config(
* file in that protection.
*
* We were passed the configuration items specified by the application.
- * That list includes configuring the default setting, presumably if
+ * That list includes configuring the default settings, presumably if
* the application configured it explicitly, that setting should survive
* even if the default changes.
+ *
+ * When writing the base configuration file, we write the version and
+ * any configuration information set by the application (in other words,
+ * the stack except for cfg[0]). However, some configuration values need
+ * to be stripped out from the base configuration file; do that now, and
+ * merge the rest to be written.
*/
+ WT_ERR(__wt_config_merge(session, cfg + 1,
+ "create=,encryption=(secretkey=),log=(recover=)", &base_config));
WT_ERR(__wt_config_init(session, &parser, base_config));
while ((ret = __wt_config_next(&parser, &k, &v)) == 0) {
/* Fix quoting for non-trivial settings. */
@@ -1701,18 +1728,23 @@ __conn_write_base_config(
--v.str;
v.len += 2;
}
- fprintf(fp,
- "%.*s=%.*s\n", (int)k.len, k.str, (int)v.len, v.str);
+ WT_ERR(__wt_fprintf(fp,
+ "%.*s=%.*s\n", (int)k.len, k.str, (int)v.len, v.str));
}
WT_ERR_NOTFOUND_OK(ret);
/* Flush the handle and rename the file into place. */
- return (__wt_sync_and_rename_fp(
- session, &fp, WT_BASECONFIG_SET, WT_BASECONFIG));
+ ret = __wt_sync_and_rename_fp(
+ session, &fp, WT_BASECONFIG_SET, WT_BASECONFIG);
+
+ if (0) {
+ /* Close open file handle, remove any temporary file. */
+err: if (fp != NULL)
+ WT_TRET(__wt_fclose(&fp, WT_FHANDLE_WRITE));
+ WT_TRET(__wt_remove_if_exists(session, WT_BASECONFIG_SET));
+ }
- /* Close any file handle left open, remove any temporary file. */
-err: WT_TRET(__wt_fclose(&fp, WT_FHANDLE_WRITE));
- WT_TRET(__wt_remove_if_exists(session, WT_BASECONFIG_SET));
+ __wt_free(session, base_config);
return (ret);
}
@@ -1759,7 +1791,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
WT_DECL_RET;
const WT_NAME_FLAG *ft;
WT_SESSION_IMPL *session;
- const char *base_merge, *enc_cfg[] = { NULL, NULL };
+ const char *enc_cfg[] = { NULL, NULL };
char version[64];
/* Leave lots of space for optional additional configuration. */
@@ -1770,7 +1802,6 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
conn = NULL;
session = NULL;
- base_merge = NULL;
WT_RET(__wt_library_init());
@@ -1972,29 +2003,23 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
(WT_CONFIG_ARG *)enc_cfg, &conn->kencryptor));
/*
+ * Configuration completed; optionally write a base configuration file.
+ */
+ WT_ERR(__conn_write_base_config(session, cfg));
+
+ /*
* Check on the turtle and metadata files, creating them if necessary
* (which avoids application threads racing to create the metadata file
* later). Once the metadata file exists, get a reference to it in
* the connection's session.
+ *
+ * THE TURTLE FILE MUST BE THE LAST FILE CREATED WHEN INITIALIZING THE
+ * DATABASE HOME, IT'S WHAT WE USE TO DECIDE IF WE'RE CREATING OR NOT.
*/
WT_ERR(__wt_turtle_init(session));
WT_ERR(__wt_metadata_open(session));
/*
- * Configuration completed; optionally write the base configuration file
- * if it doesn't already exist.
- *
- * When writing the base configuration file, we write the version and
- * any configuration information set by the application (in other words,
- * the stack except for cfg[0]). However, some configuration values need
- * to be stripped out from the base configuration file; do that now, and
- * merge the rest to be written.
- */
- WT_ERR(__wt_config_merge(session,
- cfg + 1, "create=,encryption=(secretkey=)", &base_merge));
- WT_ERR(__conn_write_base_config(session, cfg, base_merge));
-
- /*
* Start the worker threads last.
*/
WT_ERR(__wt_connection_workers(session, cfg));
@@ -2008,8 +2033,6 @@ err: /* Discard the scratch buffers. */
__wt_scr_free(session, &i2);
__wt_scr_free(session, &i3);
- __wt_free(session, base_merge);
-
/*
* We may have allocated scratch memory when using the dummy session or
* the subsequently created real session, and we don't want to tie down
diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c
index 29ab25e0226..bd57cd4b438 100644
--- a/src/third_party/wiredtiger/src/conn/conn_log.c
+++ b/src/third_party/wiredtiger/src/conn/conn_log.c
@@ -270,17 +270,18 @@ err:
}
/*
- * __log_close_server --
- * The log close server thread.
+ * __log_file_server --
+ * The log file server thread. This worker thread manages
+ * log file operations such as closing and syncing.
*/
static WT_THREAD_RET
-__log_close_server(void *arg)
+__log_file_server(void *arg)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_FH *close_fh;
WT_LOG *log;
- WT_LSN close_end_lsn, close_lsn;
+ WT_LSN close_end_lsn, close_lsn, min_lsn;
WT_SESSION_IMPL *session;
int locked;
@@ -321,10 +322,40 @@ __log_close_server(void *arg)
WT_ERR(__wt_cond_signal(session, log->log_sync_cond));
locked = 0;
__wt_spin_unlock(session, &log->log_sync_lock);
- } else
- /* Wait until the next event. */
- WT_ERR(__wt_cond_wait(session,
- conn->log_close_cond, WT_MILLION));
+ }
+ /*
+ * If a later thread asked for a background sync, do it now.
+ */
+ if (WT_LOG_CMP(&log->bg_sync_lsn, &log->sync_lsn) > 0) {
+ /*
+ * Save the latest write LSN which is the minimum
+ * we will have written to disk.
+ */
+ min_lsn = log->write_lsn;
+ /*
+ * The sync LSN we asked for better be smaller than
+ * the current written LSN.
+ */
+ WT_ASSERT(session,
+ WT_LOG_CMP(&log->bg_sync_lsn, &min_lsn) <= 0);
+ WT_ERR(__wt_fsync(session, log->log_fh));
+ __wt_spin_lock(session, &log->log_sync_lock);
+ locked = 1;
+ /*
+ * The sync LSN could have advanced while we were
+ * writing to disk.
+ */
+ if (WT_LOG_CMP(&log->sync_lsn, &min_lsn) <= 0) {
+ log->sync_lsn = min_lsn;
+ WT_ERR(__wt_cond_signal(
+ session, log->log_sync_cond));
+ }
+ locked = 0;
+ __wt_spin_unlock(session, &log->log_sync_lock);
+ }
+ /* Wait until the next event. */
+ WT_ERR(__wt_cond_wait(
+ session, conn->log_file_cond, WT_MILLION));
}
if (0) {
@@ -362,7 +393,7 @@ __log_wrlsn_server(void *arg)
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_LOG *log;
- WT_LOG_WRLSN_ENTRY written[SLOT_POOL];
+ WT_LOG_WRLSN_ENTRY written[WT_SLOT_POOL];
WT_LOGSLOT *slot;
WT_SESSION_IMPL *session;
size_t written_i;
@@ -385,7 +416,7 @@ __log_wrlsn_server(void *arg)
* Walk the array once saving any slots that are in the
* WT_LOG_SLOT_WRITTEN state.
*/
- while (i < SLOT_POOL) {
+ while (i < WT_SLOT_POOL) {
save_i = i;
slot = &log->slot_pool[i++];
if (slot->slot_state != WT_LOG_SLOT_WRITTEN)
@@ -427,9 +458,9 @@ __log_wrlsn_server(void *arg)
/*
* Signal the close thread if needed.
*/
- if (F_ISSET(slot, SLOT_CLOSEFH))
+ if (F_ISSET(slot, WT_SLOT_CLOSEFH))
WT_ERR(__wt_cond_signal(session,
- conn->log_close_cond));
+ conn->log_file_cond));
WT_ERR(__wt_log_slot_free(session, slot));
}
}
@@ -579,16 +610,16 @@ __wt_logmgr_open(WT_SESSION_IMPL *session)
* If logging is enabled, this thread runs.
*/
WT_RET(__wt_open_internal_session(
- conn, "log-close-server", 0, 0, &conn->log_close_session));
- WT_RET(__wt_cond_alloc(conn->log_close_session,
- "log close server", 0, &conn->log_close_cond));
+ conn, "log-close-server", 0, 0, &conn->log_file_session));
+ WT_RET(__wt_cond_alloc(conn->log_file_session,
+ "log close server", 0, &conn->log_file_cond));
/*
* Start the log file close thread.
*/
- WT_RET(__wt_thread_create(conn->log_close_session,
- &conn->log_close_tid, __log_close_server, conn->log_close_session));
- conn->log_close_tid_set = 1;
+ WT_RET(__wt_thread_create(conn->log_file_session,
+ &conn->log_file_tid, __log_file_server, conn->log_file_session));
+ conn->log_file_tid_set = 1;
/*
* Start the log write LSN thread. It is not configurable.
@@ -663,16 +694,16 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session)
conn->log_tid_set = 0;
}
WT_TRET(__wt_cond_destroy(session, &conn->log_cond));
- if (conn->log_close_tid_set) {
- WT_TRET(__wt_cond_signal(session, conn->log_close_cond));
- WT_TRET(__wt_thread_join(session, conn->log_close_tid));
- conn->log_close_tid_set = 0;
+ if (conn->log_file_tid_set) {
+ WT_TRET(__wt_cond_signal(session, conn->log_file_cond));
+ WT_TRET(__wt_thread_join(session, conn->log_file_tid));
+ conn->log_file_tid_set = 0;
}
- WT_TRET(__wt_cond_destroy(session, &conn->log_close_cond));
- if (conn->log_close_session != NULL) {
- wt_session = &conn->log_close_session->iface;
+ WT_TRET(__wt_cond_destroy(session, &conn->log_file_cond));
+ if (conn->log_file_session != NULL) {
+ wt_session = &conn->log_file_session->iface;
WT_TRET(wt_session->close(wt_session, NULL));
- conn->log_close_session = NULL;
+ conn->log_file_session = NULL;
}
if (conn->log_wrlsn_tid_set) {
WT_TRET(__wt_cond_signal(session, conn->log_wrlsn_cond));
diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c
index 9cae1c5c4d1..c4350d90adb 100644
--- a/src/third_party/wiredtiger/src/conn/conn_open.c
+++ b/src/third_party/wiredtiger/src/conn/conn_open.c
@@ -162,7 +162,7 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
WT_TRET(__wt_cache_destroy(session));
/* Discard transaction state. */
- __wt_txn_global_destroy(session);
+ WT_TRET(__wt_txn_global_destroy(session));
/* Close extensions, first calling any unload entry point. */
while ((dlh = TAILQ_FIRST(&conn->dlhqh)) != NULL) {
diff --git a/src/third_party/wiredtiger/src/cursor/cur_log.c b/src/third_party/wiredtiger/src/cursor/cur_log.c
index 4b72a472cb7..3376f2a3166 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_log.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_log.c
@@ -31,7 +31,7 @@ __curlog_logrec(WT_SESSION_IMPL *session,
* Read the log header. Set up the step pointers to walk the
* operations inside the record. Get the record type.
*/
- cl->stepp = LOG_SKIP_HEADER(cl->logrec->data);
+ cl->stepp = WT_LOG_SKIP_HEADER(cl->logrec->data);
cl->stepp_end = (uint8_t *)cl->logrec->data + logrec->size;
WT_RET(__wt_logrec_read(session, &cl->stepp, cl->stepp_end,
&cl->rectype));
@@ -174,8 +174,8 @@ __curlog_kv(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
* header and the adjusted size. Add one to skip over the type
* which is normally consumed by __wt_logrec_read.
*/
- cl->opvalue->data = LOG_SKIP_HEADER(cl->logrec->data) + 1;
- cl->opvalue->size = LOG_REC_SIZE(cl->logrec->size) - 1;
+ cl->opvalue->data = WT_LOG_SKIP_HEADER(cl->logrec->data) + 1;
+ cl->opvalue->size = WT_LOG_REC_SIZE(cl->logrec->size) - 1;
}
/*
* The log cursor sets the LSN and step count as the cursor key and
@@ -187,24 +187,25 @@ __curlog_kv(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
if (FLD_ISSET(cursor->flags, WT_CURSTD_RAW)) {
memset(&item, 0, sizeof(item));
WT_RET(wiredtiger_struct_size((WT_SESSION *)session,
- &item.size, LOGC_KEY_FORMAT, cl->cur_lsn->file,
+ &item.size, WT_LOGC_KEY_FORMAT, cl->cur_lsn->file,
cl->cur_lsn->offset, key_count));
WT_RET(__wt_realloc(session, NULL, item.size, &cl->packed_key));
item.data = cl->packed_key;
WT_RET(wiredtiger_struct_pack((WT_SESSION *)session,
- cl->packed_key, item.size, LOGC_KEY_FORMAT,
+ cl->packed_key, item.size, WT_LOGC_KEY_FORMAT,
cl->cur_lsn->file, cl->cur_lsn->offset, key_count));
__wt_cursor_set_key(cursor, &item);
WT_RET(wiredtiger_struct_size((WT_SESSION *)session,
- &item.size, LOGC_VALUE_FORMAT, cl->txnid, cl->rectype,
+ &item.size, WT_LOGC_VALUE_FORMAT, cl->txnid, cl->rectype,
optype, fileid, cl->opkey, cl->opvalue));
WT_RET(__wt_realloc(session, NULL, item.size,
&cl->packed_value));
item.data = cl->packed_value;
WT_RET(wiredtiger_struct_pack((WT_SESSION *)session,
- cl->packed_value, item.size, LOGC_VALUE_FORMAT, cl->txnid,
- cl->rectype, optype, fileid, cl->opkey, cl->opvalue));
+ cl->packed_value, item.size, WT_LOGC_VALUE_FORMAT,
+ cl->txnid, cl->rectype, optype, fileid, cl->opkey,
+ cl->opvalue));
__wt_cursor_set_value(cursor, &item);
} else {
__wt_cursor_set_key(cursor, cl->cur_lsn->file,
@@ -237,8 +238,11 @@ __curlog_next(WT_CURSOR *cursor)
*/
if (cl->stepp == NULL || cl->stepp >= cl->stepp_end || !*cl->stepp) {
cl->txnid = 0;
- WT_ERR(__wt_log_scan(session, cl->next_lsn, WT_LOGSCAN_ONE,
- __curlog_logrec, cl));
+ ret = __wt_log_scan(session, cl->next_lsn, WT_LOGSCAN_ONE,
+ __curlog_logrec, cl);
+ if (ret == ENOENT)
+ ret = WT_NOTFOUND;
+ WT_ERR(ret);
}
WT_ASSERT(session, cl->logrec->data != NULL);
WT_ERR(__curlog_kv(session, cursor));
@@ -271,8 +275,11 @@ __curlog_search(WT_CURSOR *cursor)
*/
WT_ERR(__wt_cursor_get_key((WT_CURSOR *)cl,
&key.file, &key.offset, &counter));
- WT_ERR(__wt_log_scan(session, &key, WT_LOGSCAN_ONE,
- __curlog_logrec, cl));
+ ret = __wt_log_scan(session, &key, WT_LOGSCAN_ONE,
+ __curlog_logrec, cl);
+ if (ret == ENOENT)
+ ret = WT_NOTFOUND;
+ WT_ERR(ret);
WT_ERR(__curlog_kv(session, cursor));
WT_STAT_FAST_CONN_INCR(session, cursor_search);
WT_STAT_FAST_DATA_INCR(session, cursor_search);
@@ -377,8 +384,8 @@ __wt_curlog_open(WT_SESSION_IMPL *session,
WT_ERR(__wt_scr_alloc(session, 0, &cl->logrec));
WT_ERR(__wt_scr_alloc(session, 0, &cl->opkey));
WT_ERR(__wt_scr_alloc(session, 0, &cl->opvalue));
- cursor->key_format = LOGC_KEY_FORMAT;
- cursor->value_format = LOGC_VALUE_FORMAT;
+ cursor->key_format = WT_LOGC_KEY_FORMAT;
+ cursor->value_format = WT_LOGC_VALUE_FORMAT;
WT_INIT_LSN(cl->cur_lsn);
WT_INIT_LSN(cl->next_lsn);
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index e54ed0ff8e7..ac95032748d 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -277,8 +277,6 @@ __evict_review(
WT_PAGE_MODIFY *mod;
uint32_t reconcile_flags;
- reconcile_flags = WT_EVICTING;
-
/*
* Get exclusive access to the page if our caller doesn't have the tree
* locked down.
@@ -347,6 +345,7 @@ __evict_review(
* Don't set the update-restore flag for internal pages, they don't have
* updates that can be saved and restored.
*/
+ reconcile_flags = WT_EVICTING;
if (__wt_page_is_modified(page)) {
if (LF_ISSET(WT_EVICT_EXCLUSIVE))
FLD_SET(reconcile_flags, WT_SKIP_UPDATE_ERR);
@@ -365,7 +364,7 @@ __evict_review(
*/
if (!LF_ISSET(WT_EVICT_EXCLUSIVE) && mod != NULL &&
!__wt_txn_visible_all(session, mod->rec_max_txn) &&
- !LF_ISSET(WT_SKIP_UPDATE_RESTORE))
+ !FLD_ISSET(reconcile_flags, WT_SKIP_UPDATE_RESTORE))
return (EBUSY);
return (0);
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index ebdd5187dbc..ad1f4c096df 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -374,9 +374,8 @@ struct __wt_page {
/*
* Internal pages (both column- and row-store).
*
- * The page record number is only used by column-store, but it
- * makes some things simpler and it doesn't cost us any memory,
- * other structures in this union are still as large.
+ * The page record number is only used by column-store, but it's
+ * simpler having only one kind of internal page.
*
* In-memory internal pages have an array of pointers to child
* structures, maintained in collated order. When a page is
@@ -410,10 +409,24 @@ struct __wt_page {
uint32_t entries;
WT_REF **index;
} * volatile __index; /* Collated children */
+
+ /*
+ * When splitting to deepen the tree, track the number
+ * of entries in the newly created parent, and how many
+ * subsequent splits follow the initial set of entries.
+ * If future splits into the page are generally after
+ * the initial set of items, perform future deepening
+ * splits in this page to optimize for an append-style
+ * workload.
+ */
+ uint32_t deepen_split_append;
+ uint32_t deepen_split_last;
} intl;
#undef pg_intl_recno
#define pg_intl_recno u.intl.recno
#define pg_intl_parent_ref u.intl.parent_ref
+#define pg_intl_deepen_split_append u.intl.deepen_split_append
+#define pg_intl_deepen_split_last u.intl.deepen_split_last
/*
* Macros to copy/set the index because the name is obscured to ensure
diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h
index 2db14e293ed..76a25639ffd 100644
--- a/src/third_party/wiredtiger/src/include/btree.h
+++ b/src/third_party/wiredtiger/src/include/btree.h
@@ -148,8 +148,9 @@ struct __wt_btree {
#define WT_BTREE_IN_MEMORY 0x00200 /* Cache-resident object */
#define WT_BTREE_NO_EVICTION 0x00400 /* Disable eviction */
#define WT_BTREE_SALVAGE 0x00800 /* Handle is for salvage */
-#define WT_BTREE_UPGRADE 0x01000 /* Handle is for upgrade */
-#define WT_BTREE_VERIFY 0x02000 /* Handle is for verify */
+#define WT_BTREE_SKIP_CKPT 0x01000 /* Handle skipped checkpoint */
+#define WT_BTREE_UPGRADE 0x02000 /* Handle is for upgrade */
+#define WT_BTREE_VERIFY 0x04000 /* Handle is for verify */
uint32_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 2ff89c1bdd5..06d41b89036 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -1136,10 +1136,10 @@ __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref)
* between.
*/
locked = WT_ATOMIC_CAS4(ref->state, WT_REF_MEM, WT_REF_LOCKED);
- WT_TRET(__wt_hazard_clear(session, page));
- if (!locked) {
- WT_TRET(EBUSY);
- return (ret);
+ if ((ret = __wt_hazard_clear(session, page)) != 0 || !locked) {
+ if (locked)
+ ref->state = WT_REF_MEM;
+ return (ret == 0 ? EBUSY : ret);
}
(void)WT_ATOMIC_ADD4(btree->evict_busy, 1);
@@ -1156,9 +1156,9 @@ __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref)
*/
WT_STAT_FAST_CONN_INCR(
session, cache_eviction_force_delete);
- } else {
+ } else
WT_STAT_FAST_CONN_INCR(session, cache_eviction_force_fail);
- }
+
(void)WT_ATOMIC_SUB4(btree->evict_busy, 1);
return (ret);
@@ -1180,7 +1180,7 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
* Discard our hazard pointer. Ignore pages we don't have and the root
* page, which sticks in memory, regardless.
*/
- if (ref == NULL || __wt_ref_is_root(ref))
+ if (ref == NULL || ref->page == NULL || __wt_ref_is_root(ref))
return (0);
/*
@@ -1205,8 +1205,8 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
page = ref->page;
if (F_ISSET(btree, WT_BTREE_NO_EVICTION) ||
LF_ISSET(WT_READ_NO_EVICT) ||
- page->read_gen != WT_READGEN_OLDEST || !__wt_page_can_evict(
- session, page, WT_EVICT_CHECK_SPLITS, NULL))
+ page->read_gen != WT_READGEN_OLDEST ||
+ !__wt_page_can_evict(session, page, WT_EVICT_CHECK_SPLITS, NULL))
return (__wt_hazard_clear(session, page));
WT_RET_BUSY_OK(__wt_page_release_evict(session, ref));
diff --git a/src/third_party/wiredtiger/src/include/config.h b/src/third_party/wiredtiger/src/include/config.h
index 8ec9048e861..161eea5bc80 100644
--- a/src/third_party/wiredtiger/src/include/config.h
+++ b/src/third_party/wiredtiger/src/include/config.h
@@ -74,18 +74,20 @@ struct __wt_config_parser_impl {
#define WT_CONFIG_ENTRY_WT_SESSION_rename 22
#define WT_CONFIG_ENTRY_WT_SESSION_rollback_transaction 23
#define WT_CONFIG_ENTRY_WT_SESSION_salvage 24
-#define WT_CONFIG_ENTRY_WT_SESSION_strerror 25
-#define WT_CONFIG_ENTRY_WT_SESSION_truncate 26
-#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 27
-#define WT_CONFIG_ENTRY_WT_SESSION_verify 28
-#define WT_CONFIG_ENTRY_colgroup_meta 29
-#define WT_CONFIG_ENTRY_file_meta 30
-#define WT_CONFIG_ENTRY_index_meta 31
-#define WT_CONFIG_ENTRY_table_meta 32
-#define WT_CONFIG_ENTRY_wiredtiger_open 33
-#define WT_CONFIG_ENTRY_wiredtiger_open_all 34
-#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 35
-#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 36
+#define WT_CONFIG_ENTRY_WT_SESSION_snapshot 25
+#define WT_CONFIG_ENTRY_WT_SESSION_strerror 26
+#define WT_CONFIG_ENTRY_WT_SESSION_transaction_sync 27
+#define WT_CONFIG_ENTRY_WT_SESSION_truncate 28
+#define WT_CONFIG_ENTRY_WT_SESSION_upgrade 29
+#define WT_CONFIG_ENTRY_WT_SESSION_verify 30
+#define WT_CONFIG_ENTRY_colgroup_meta 31
+#define WT_CONFIG_ENTRY_file_meta 32
+#define WT_CONFIG_ENTRY_index_meta 33
+#define WT_CONFIG_ENTRY_table_meta 34
+#define WT_CONFIG_ENTRY_wiredtiger_open 35
+#define WT_CONFIG_ENTRY_wiredtiger_open_all 36
+#define WT_CONFIG_ENTRY_wiredtiger_open_basecfg 37
+#define WT_CONFIG_ENTRY_wiredtiger_open_usercfg 38
/*
* configuration section: END
* DO NOT EDIT: automatically built by dist/flags.py.
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index a039edb7493..209eabea91c 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -330,10 +330,10 @@ struct __wt_connection_impl {
WT_SESSION_IMPL *log_session; /* Log server session */
wt_thread_t log_tid; /* Log server thread */
int log_tid_set; /* Log server thread set */
- WT_CONDVAR *log_close_cond;/* Log close thread wait mutex */
- WT_SESSION_IMPL *log_close_session;/* Log close thread session */
- wt_thread_t log_close_tid; /* Log close thread thread */
- int log_close_tid_set;/* Log close thread set */
+ WT_CONDVAR *log_file_cond; /* Log file thread wait mutex */
+ WT_SESSION_IMPL *log_file_session;/* Log file thread session */
+ wt_thread_t log_file_tid; /* Log file thread thread */
+ int log_file_tid_set;/* Log file thread set */
WT_CONDVAR *log_wrlsn_cond;/* Log write lsn thread wait mutex */
WT_SESSION_IMPL *log_wrlsn_session;/* Log write lsn thread session */
wt_thread_t log_wrlsn_tid; /* Log write lsn thread thread */
diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i
index 57c19f50417..780198b4bb0 100644
--- a/src/third_party/wiredtiger/src/include/cursor.i
+++ b/src/third_party/wiredtiger/src/include/cursor.i
@@ -105,6 +105,7 @@ __curfile_enter(WT_CURSOR_BTREE *cbt)
static inline int
__curfile_leave(WT_CURSOR_BTREE *cbt)
{
+ WT_DECL_RET;
WT_SESSION_IMPL *session;
session = (WT_SESSION_IMPL *)cbt->iface.session;
@@ -125,13 +126,16 @@ __curfile_leave(WT_CURSOR_BTREE *cbt)
cbt->page_deleted_count = 0;
/*
- * Release any page references we're holding. This can trigger
- * eviction (e.g., forced eviction of big pages), so it is important to
- * do it after releasing our snapshot above.
+ * Release any page references we're holding. This can trigger eviction
+ * (e.g., forced eviction of big pages), so it's important to do after
+ * releasing our snapshot above.
+ *
+ * Clear the reference regardless, so we don't try the release twice.
*/
- WT_RET(__wt_page_release(session, cbt->ref, 0));
+ ret = __wt_page_release(session, cbt->ref, 0);
cbt->ref = NULL;
- return (0);
+
+ return (ret);
}
/*
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 7406bd471e2..3d3c851daad 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -210,7 +210,7 @@ extern int __wt_conn_remove_compressor(WT_SESSION_IMPL *session);
extern int __wt_conn_remove_data_source(WT_SESSION_IMPL *session);
extern int __wt_encryptor_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval, WT_CONFIG_ITEM *keyid, WT_CONFIG_ARG *cfg_arg, WT_KEYED_ENCRYPTOR **kencryptorp);
extern int __wt_conn_remove_encryptor(WT_SESSION_IMPL *session);
-extern int __wt_extractor_config(WT_SESSION_IMPL *session, const char *config, WT_EXTRACTOR **extractorp, int *ownp);
+extern int __wt_extractor_config(WT_SESSION_IMPL *session, const char *uri, const char *config, WT_EXTRACTOR **extractorp, int *ownp);
extern int __wt_conn_remove_extractor(WT_SESSION_IMPL *session);
extern int __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_cache_config(WT_SESSION_IMPL *session, int reconfigure, const char *cfg[]);
@@ -315,6 +315,8 @@ extern void __wt_cache_dump(WT_SESSION_IMPL *session);
extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags);
extern void __wt_evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref);
extern int __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn);
+extern int __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn);
+extern int __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn);
extern int __wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, int *rec);
extern void __wt_log_written_reset(WT_SESSION_IMPL *session);
extern int __wt_log_get_all_files(WT_SESSION_IMPL *session, char ***filesp, u_int *countp, uint32_t *maxid, int active_only);
@@ -668,7 +670,7 @@ extern int __wt_txn_init(WT_SESSION_IMPL *session);
extern void __wt_txn_stats_update(WT_SESSION_IMPL *session);
extern void __wt_txn_destroy(WT_SESSION_IMPL *session);
extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]);
-extern void __wt_txn_global_destroy(WT_SESSION_IMPL *session);
+extern int __wt_txn_global_destroy(WT_SESSION_IMPL *session);
extern int __wt_checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len);
extern int __wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
@@ -688,4 +690,9 @@ extern int __wt_txn_checkpoint_log( WT_SESSION_IMPL *session, int full, uint32_t
extern int __wt_txn_truncate_log( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop);
extern int __wt_txn_truncate_end(WT_SESSION_IMPL *session);
extern int __wt_txn_printlog(WT_SESSION *wt_session, FILE *out);
+extern int __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[]);
+extern int __wt_txn_named_snapshot_drop(WT_SESSION_IMPL *session, const char *cfg[]);
+extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval);
+extern int __wt_txn_named_snapshot_config(WT_SESSION_IMPL *session, const char *cfg[], int *has_create, int *has_drops);
+extern int __wt_txn_named_snapshot_destroy(WT_SESSION_IMPL *session);
extern int __wt_txn_recover(WT_SESSION_IMPL *session);
diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h
index 95aa6f9809d..faada258c09 100644
--- a/src/third_party/wiredtiger/src/include/flags.h
+++ b/src/third_party/wiredtiger/src/include/flags.h
@@ -30,9 +30,10 @@
#define WT_LOGSCAN_FROM_CKP 0x00000002
#define WT_LOGSCAN_ONE 0x00000004
#define WT_LOGSCAN_RECOVER 0x00000008
-#define WT_LOG_DSYNC 0x00000001
-#define WT_LOG_FLUSH 0x00000002
-#define WT_LOG_FSYNC 0x00000004
+#define WT_LOG_BACKGROUND 0x00000001
+#define WT_LOG_DSYNC 0x00000002
+#define WT_LOG_FLUSH 0x00000004
+#define WT_LOG_FSYNC 0x00000008
#define WT_READ_CACHE 0x00000001
#define WT_READ_COMPACT 0x00000002
#define WT_READ_NO_EVICT 0x00000004
@@ -65,7 +66,7 @@
#define WT_SYNC_DISCARD 0x00000004
#define WT_SYNC_DISCARD_FORCE 0x00000008
#define WT_SYNC_WRITE_LEAVES 0x00000010
-#define WT_TXN_LOG_CKPT_FAIL 0x00000001
+#define WT_TXN_LOG_CKPT_CLEANUP 0x00000001
#define WT_TXN_LOG_CKPT_PREPARE 0x00000002
#define WT_TXN_LOG_CKPT_START 0x00000004
#define WT_TXN_LOG_CKPT_STOP 0x00000008
diff --git a/src/third_party/wiredtiger/src/include/gcc.h b/src/third_party/wiredtiger/src/include/gcc.h
index 2efbb20b39a..cb9d3d5e212 100644
--- a/src/third_party/wiredtiger/src/include/gcc.h
+++ b/src/third_party/wiredtiger/src/include/gcc.h
@@ -87,21 +87,25 @@
* To avoid locking shared data structures such as statistics and to permit
* atomic state changes, we rely on the WT_ATOMIC_ADD and WT_ATOMIC_CAS
* (compare and swap) operations.
- *
- * Note that we avoid __sync_bool_compare_and_swap due to problems with
- * optimization with some versions of clang. See
- * http://llvm.org/bugs/show_bug.cgi?id=21499 for details.
*/
#define __WT_ATOMIC_ADD(v, val, n) \
(WT_STATIC_ASSERT(sizeof(v) == (n)), __sync_add_and_fetch(&(v), val))
#define __WT_ATOMIC_FETCH_ADD(v, val, n) \
(WT_STATIC_ASSERT(sizeof(v) == (n)), __sync_fetch_and_add(&(v), val))
+#ifdef __clang__
+/*
+ * We avoid __sync_bool_compare_and_swap with due to problems with
+ * optimization with some versions of clang. See
+ * http://llvm.org/bugs/show_bug.cgi?id=21499 for details.
+ */
#define __WT_ATOMIC_CAS(v, old, new, n) \
(WT_STATIC_ASSERT(sizeof(v) == (n)), \
__sync_val_compare_and_swap(&(v), old, new) == (old))
-#define __WT_ATOMIC_CAS_VAL(v, old, new, n) \
+#else
+#define __WT_ATOMIC_CAS(v, old, new, n) \
(WT_STATIC_ASSERT(sizeof(v) == (n)), \
- __sync_val_compare_and_swap(&(v), old, new))
+ __sync_bool_compare_and_swap(&(v), old, new))
+#endif
#define __WT_ATOMIC_STORE(v, val, n) \
(WT_STATIC_ASSERT(sizeof(v) == (n)), \
__sync_lock_test_and_set(&(v), val))
@@ -111,28 +115,24 @@
#define WT_ATOMIC_ADD1(v, val) __WT_ATOMIC_ADD(v, val, 1)
#define WT_ATOMIC_FETCH_ADD1(v, val) __WT_ATOMIC_FETCH_ADD(v, val, 1)
#define WT_ATOMIC_CAS1(v, old, new) __WT_ATOMIC_CAS(v, old, new, 1)
-#define WT_ATOMIC_CAS_VAL1(v, old, new) __WT_ATOMIC_CAS_VAL(v, old, new, 1)
#define WT_ATOMIC_STORE1(v, val) __WT_ATOMIC_STORE(v, val, 1)
#define WT_ATOMIC_SUB1(v, val) __WT_ATOMIC_SUB(v, val, 1)
#define WT_ATOMIC_ADD2(v, val) __WT_ATOMIC_ADD(v, val, 2)
#define WT_ATOMIC_FETCH_ADD2(v, val) __WT_ATOMIC_FETCH_ADD(v, val, 2)
#define WT_ATOMIC_CAS2(v, old, new) __WT_ATOMIC_CAS(v, old, new, 2)
-#define WT_ATOMIC_CAS_VAL2(v, old, new) __WT_ATOMIC_CAS_VAL(v, old, new, 2)
#define WT_ATOMIC_STORE2(v, val) __WT_ATOMIC_STORE(v, val, 2)
#define WT_ATOMIC_SUB2(v, val) __WT_ATOMIC_SUB(v, val, 2)
#define WT_ATOMIC_ADD4(v, val) __WT_ATOMIC_ADD(v, val, 4)
#define WT_ATOMIC_FETCH_ADD4(v, val) __WT_ATOMIC_FETCH_ADD(v, val, 4)
#define WT_ATOMIC_CAS4(v, old, new) __WT_ATOMIC_CAS(v, old, new, 4)
-#define WT_ATOMIC_CAS_VAL4(v, old, new) __WT_ATOMIC_CAS_VAL(v, old, new, 4)
#define WT_ATOMIC_STORE4(v, val) __WT_ATOMIC_STORE(v, val, 4)
#define WT_ATOMIC_SUB4(v, val) __WT_ATOMIC_SUB(v, val, 4)
#define WT_ATOMIC_ADD8(v, val) __WT_ATOMIC_ADD(v, val, 8)
#define WT_ATOMIC_FETCH_ADD8(v, val) __WT_ATOMIC_FETCH_ADD(v, val, 8)
#define WT_ATOMIC_CAS8(v, old, new) __WT_ATOMIC_CAS(v, old, new, 8)
-#define WT_ATOMIC_CAS_VAL8(v, old, new) __WT_ATOMIC_CAS_VAL(v, old, new, 8)
#define WT_ATOMIC_STORE8(v, val) __WT_ATOMIC_STORE(v, val, 8)
#define WT_ATOMIC_SUB8(v, val) __WT_ATOMIC_SUB(v, val, 8)
diff --git a/src/third_party/wiredtiger/src/include/lint.h b/src/third_party/wiredtiger/src/include/lint.h
index 631f00cb5cd..964aa5c118f 100644
--- a/src/third_party/wiredtiger/src/include/lint.h
+++ b/src/third_party/wiredtiger/src/include/lint.h
@@ -24,8 +24,6 @@
((v) += (val), (v))
#define __WT_ATOMIC_CAS(v, old, new) \
((v) = ((v) == (old) ? (new) : (old)), (v) == (old))
-#define __WT_ATOMIC_CAS_VAL(v, old, new) \
- ((v) = ((v) == (old) ? (new) : (old)), (v) == (old))
#define __WT_ATOMIC_STORE(v, val) \
((v) = (val))
#define __WT_ATOMIC_SUB(v, val) \
@@ -34,28 +32,24 @@
#define WT_ATOMIC_ADD1(v, val) __WT_ATOMIC_ADD(v, val)
#define WT_ATOMIC_FETCH_ADD1(v, val) __WT_ATOMIC_FETCH_ADD(v, val)
#define WT_ATOMIC_CAS1(v, old, new) __WT_ATOMIC_CAS(v, old, new)
-#define WT_ATOMIC_CAS_VAL1(v, old, new) __WT_ATOMIC_CAS_VAL(v, old, new)
#define WT_ATOMIC_STORE1(v, val) __WT_ATOMIC_STORE(v, val)
#define WT_ATOMIC_SUB1(v, val) __WT_ATOMIC_SUB(v, val)
#define WT_ATOMIC_ADD2(v, val) __WT_ATOMIC_ADD(v, val)
#define WT_ATOMIC_FETCH_ADD2(v, val) __WT_ATOMIC_FETCH_ADD(v, val)
#define WT_ATOMIC_CAS2(v, old, new) __WT_ATOMIC_CAS(v, old, new)
-#define WT_ATOMIC_CAS_VAL2(v, old, new) __WT_ATOMIC_CAS_VAL(v, old, new)
#define WT_ATOMIC_STORE2(v, val) __WT_ATOMIC_STORE(v, val)
#define WT_ATOMIC_SUB2(v, val) __WT_ATOMIC_SUB(v, val)
#define WT_ATOMIC_ADD4(v, val) __WT_ATOMIC_ADD(v, val)
#define WT_ATOMIC_FETCH_ADD4(v, val) __WT_ATOMIC_FETCH_ADD(v, val)
#define WT_ATOMIC_CAS4(v, old, new) __WT_ATOMIC_CAS(v, old, new)
-#define WT_ATOMIC_CAS_VAL4(v, old, new) __WT_ATOMIC_CAS_VAL(v, old, new)
#define WT_ATOMIC_STORE4(v, val) __WT_ATOMIC_STORE(v, val)
#define WT_ATOMIC_SUB4(v, val) __WT_ATOMIC_SUB(v, val)
#define WT_ATOMIC_ADD8(v, val) __WT_ATOMIC_ADD(v, val)
#define WT_ATOMIC_FETCH_ADD8(v, val) __WT_ATOMIC_FETCH_ADD(v, val)
#define WT_ATOMIC_CAS8(v, old, new) __WT_ATOMIC_CAS(v, old, new)
-#define WT_ATOMIC_CAS_VAL8(v, old, new) __WT_ATOMIC_CAS_VAL(v, old, new)
#define WT_ATOMIC_STORE8(v, val) __WT_ATOMIC_STORE(v, val)
#define WT_ATOMIC_SUB8(v, val) __WT_ATOMIC_SUB(v, val)
diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h
index b11d514d99a..3de72b8b9a6 100644
--- a/src/third_party/wiredtiger/src/include/log.h
+++ b/src/third_party/wiredtiger/src/include/log.h
@@ -39,12 +39,12 @@
* ever changes. The value is the following:
* txnid, record type, operation type, file id, operation key, operation value
*/
-#define LOGC_KEY_FORMAT WT_UNCHECKED_STRING(IqI)
-#define LOGC_VALUE_FORMAT WT_UNCHECKED_STRING(qIIIuu)
+#define WT_LOGC_KEY_FORMAT WT_UNCHECKED_STRING(IqI)
+#define WT_LOGC_VALUE_FORMAT WT_UNCHECKED_STRING(qIIIuu)
-#define LOG_SKIP_HEADER(data) \
+#define WT_LOG_SKIP_HEADER(data) \
((const uint8_t *)(data) + offsetof(WT_LOG_RECORD, record))
-#define LOG_REC_SIZE(size) \
+#define WT_LOG_REC_SIZE(size) \
((size) - offsetof(WT_LOG_RECORD, record))
/*
@@ -68,6 +68,9 @@
* WT_LOG_SLOT_WRITTEN - slot is written and should be processed by worker.
* WT_LOG_SLOT_READY - slot is ready for threads to join.
* > WT_LOG_SLOT_READY - threads are actively consolidating on this slot.
+ *
+ * The slot state must be volatile: threads loop checking the state and can't
+ * cache the first value they see.
*/
#define WT_LOG_SLOT_DONE 0
#define WT_LOG_SLOT_FREE 1
@@ -75,10 +78,10 @@
#define WT_LOG_SLOT_WRITTEN 3
#define WT_LOG_SLOT_READY 4
typedef WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) struct {
- int64_t slot_state; /* Slot state */
+ volatile int64_t slot_state; /* Slot state */
uint64_t slot_group_size; /* Group size */
int32_t slot_error; /* Error value */
-#define SLOT_INVALID_INDEX 0xffffffff
+#define WT_SLOT_INVALID_INDEX 0xffffffff
uint32_t slot_index; /* Active slot index */
wt_off_t slot_start_offset; /* Starting file offset */
WT_LSN slot_release_lsn; /* Slot release LSN */
@@ -88,15 +91,15 @@ typedef WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) struct {
WT_ITEM slot_buf; /* Buffer for grouped writes */
int32_t slot_churn; /* Active slots are scarce. */
-#define SLOT_BUF_GROW 0x01 /* Grow buffer on release */
-#define SLOT_BUFFERED 0x02 /* Buffer writes */
-#define SLOT_CLOSEFH 0x04 /* Close old fh on release */
-#define SLOT_SYNC 0x08 /* Needs sync on release */
-#define SLOT_SYNC_DIR 0x10 /* Directory sync on release */
+#define WT_SLOT_BUF_GROW 0x01 /* Grow buffer on release */
+#define WT_SLOT_BUFFERED 0x02 /* Buffer writes */
+#define WT_SLOT_CLOSEFH 0x04 /* Close old fh on release */
+#define WT_SLOT_SYNC 0x08 /* Needs sync on release */
+#define WT_SLOT_SYNC_DIR 0x10 /* Directory sync on release */
uint32_t flags; /* Flags */
} WT_LOGSLOT;
-#define SLOT_INIT_FLAGS (SLOT_BUFFERED)
+#define WT_SLOT_INIT_FLAGS (WT_SLOT_BUFFERED)
typedef struct {
WT_LOGSLOT *slot;
@@ -104,7 +107,7 @@ typedef struct {
} WT_MYSLOT;
/* Offset of first record */
-#define LOG_FIRST_RECORD log->allocsize
+#define WT_LOG_FIRST_RECORD log->allocsize
typedef struct {
uint32_t allocsize; /* Allocation alignment size */
@@ -123,6 +126,7 @@ typedef struct {
* System LSNs
*/
WT_LSN alloc_lsn; /* Next LSN for allocation */
+ WT_LSN bg_sync_lsn; /* Latest background sync LSN */
WT_LSN ckpt_lsn; /* Last checkpoint LSN */
WT_LSN first_lsn; /* First LSN */
WT_LSN sync_dir_lsn; /* LSN of the last directory sync */
@@ -147,16 +151,16 @@ typedef struct {
/*
* Consolidation array information
- * SLOT_ACTIVE must be less than SLOT_POOL.
+ * WT_SLOT_ACTIVE must be less than WT_SLOT_POOL.
* Our testing shows that the more consolidation we generate the
* better the performance we see which equates to an active slot
* slot count of one.
*/
-#define SLOT_ACTIVE 1
-#define SLOT_POOL 16
+#define WT_SLOT_ACTIVE 1
+#define WT_SLOT_POOL 16
uint32_t pool_index; /* Global pool index */
- WT_LOGSLOT *slot_array[SLOT_ACTIVE]; /* Active slots */
- WT_LOGSLOT slot_pool[SLOT_POOL]; /* Pool of all slots */
+ WT_LOGSLOT *slot_array[WT_SLOT_ACTIVE]; /* Active slots */
+ WT_LOGSLOT slot_pool[WT_SLOT_POOL]; /* Pool of all slots */
#define WT_LOG_FORCE_CONSOLIDATE 0x01 /* Disable direct writes */
uint32_t flags;
diff --git a/src/third_party/wiredtiger/src/include/msvc.h b/src/third_party/wiredtiger/src/include/msvc.h
index fa5b2d848e8..bc72ddf8193 100644
--- a/src/third_party/wiredtiger/src/include/msvc.h
+++ b/src/third_party/wiredtiger/src/include/msvc.h
@@ -41,9 +41,6 @@
(WT_STATIC_ASSERT(sizeof(v) == (n)), \
_InterlockedCompareExchange ## s \
((t*)&(v), (t)(new), (t)(old)) == (t)(old))
-#define __WT_ATOMIC_CAS_VAL(v, old, new, n, s, t) \
- (WT_STATIC_ASSERT(sizeof(v) == (n)), \
- _InterlockedCompareExchange ## s((t*)&(v), (t)(new), (t)(old)))
#define __WT_ATOMIC_STORE(v, val, n, s, t) \
(WT_STATIC_ASSERT(sizeof(v) == (n)), \
_InterlockedExchange ## s((t*)&(v), (t)(val)))
@@ -55,8 +52,6 @@
#define WT_ATOMIC_FETCH_ADD1(v, val) \
__WT_ATOMIC_FETCH_ADD(v, val, 1, 8, char)
#define WT_ATOMIC_CAS1(v, old, new) __WT_ATOMIC_CAS(v, old, new, 1, 8, char)
-#define WT_ATOMIC_CAS_VAL1(v, old, new) \
- __WT_ATOMIC_CAS_VAL(v, old, new, 1, 8, char)
#define WT_ATOMIC_STORE1(v, val) __WT_ATOMIC_STORE(v, val, 1, 8, char)
#define WT_ATOMIC_SUB1(v, val) __WT_ATOMIC_SUB(v, val, 1, 8, char)
@@ -65,16 +60,12 @@
__WT_ATOMIC_FETCH_ADD(v, val, 2, 16, short)
#define WT_ATOMIC_CAS2(v, old, new) \
__WT_ATOMIC_CAS(v, old, new, 2, 16, short)
-#define WT_ATOMIC_CAS_VAL2(v, old, new) \
- __WT_ATOMIC_CAS_VAL(v, old, new, 2, 16, short)
#define WT_ATOMIC_STORE2(v, val) __WT_ATOMIC_STORE(v, val, 2, 16, short)
#define WT_ATOMIC_SUB2(v, val) __WT_ATOMIC_SUB(v, val, 2, 16, short)
#define WT_ATOMIC_ADD4(v, val) __WT_ATOMIC_ADD(v, val, 4, , long)
#define WT_ATOMIC_FETCH_ADD4(v, val) __WT_ATOMIC_FETCH_ADD(v, val, 4, , long)
#define WT_ATOMIC_CAS4(v, old, new) __WT_ATOMIC_CAS(v, old, new, 4, , long)
-#define WT_ATOMIC_CAS_VAL4(v, old, new) \
- __WT_ATOMIC_CAS_VAL(v, old, new, 4, , long)
#define WT_ATOMIC_STORE4(v, val) __WT_ATOMIC_STORE(v, val, 4, , long)
#define WT_ATOMIC_SUB4(v, val) __WT_ATOMIC_SUB(v, val, 4, , long)
@@ -83,8 +74,6 @@
__WT_ATOMIC_FETCH_ADD(v, val, 8, 64, __int64)
#define WT_ATOMIC_CAS8(v, old, new) \
__WT_ATOMIC_CAS(v, old, new, 8, 64, __int64)
-#define WT_ATOMIC_CAS_VAL8(v, old, new) \
- __WT_ATOMIC_CAS_VAL(v, old, new, 8, 64, __int64)
#define WT_ATOMIC_STORE8(v, val) \
__WT_ATOMIC_STORE(v, val, 8, 64, __int64)
#define WT_ATOMIC_SUB8(v, val) __WT_ATOMIC_SUB(v, val, 8, 64, __int64)
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index daa47d6e776..8a8b229dbc0 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -112,6 +112,7 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl {
WT_TXN_ISOLATION isolation;
WT_TXN txn; /* Transaction state */
+ WT_LSN bg_sync_lsn; /* Background sync operation LSN. */
u_int ncursors; /* Count of active file cursors. */
void *block_manager; /* Block-manager support */
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 85d66b97e11..728c8c9fe8e 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -273,6 +273,7 @@ struct __wt_connection_stats {
WT_STATS txn_pinned_checkpoint_range;
WT_STATS txn_pinned_range;
WT_STATS txn_rollback;
+ WT_STATS txn_sync;
WT_STATS write_io;
};
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 9f600ac95c1..f44a02cc332 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -25,6 +25,16 @@
#define WT_SESSION_TXN_STATE(s) (&S2C(s)->txn_global.states[(s)->id])
+struct __wt_named_snapshot {
+ const char *name;
+
+ STAILQ_ENTRY(__wt_named_snapshot) q;
+
+ uint64_t snap_min, snap_max;
+ uint64_t *snapshot;
+ uint32_t snapshot_count;
+};
+
struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_txn_state {
volatile uint64_t id;
volatile uint64_t snap_min;
@@ -56,6 +66,11 @@ struct __wt_txn_global {
volatile uint64_t checkpoint_id;
volatile uint64_t checkpoint_snap_min;
+ /* Named snapshot state. */
+ WT_RWLOCK *nsnap_rwlock;
+ volatile uint64_t nsnap_oldest_id;
+ STAILQ_HEAD(__wt_nsnap_qh, __wt_named_snapshot) nsnaph;
+
WT_TXN_STATE *states; /* Per-session transaction states */
};
@@ -144,6 +159,9 @@ struct __wt_txn {
#define WT_TXN_ERROR 0x02
#define WT_TXN_HAS_ID 0x04
#define WT_TXN_HAS_SNAPSHOT 0x08
-#define WT_TXN_RUNNING 0x10
+#define WT_TXN_NAMED_SNAPSHOT 0x10
+#define WT_TXN_READONLY 0x20
+#define WT_TXN_RUNNING 0x40
+#define WT_TXN_SYNC_SET 0x80
uint32_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index b06062fc483..bcb2d084df8 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -64,6 +64,13 @@ __wt_txn_modify(WT_SESSION_IMPL *session, WT_UPDATE *upd)
{
WT_DECL_RET;
WT_TXN_OP *op;
+ WT_TXN *txn;
+
+ txn = &session->txn;
+
+ if (F_ISSET(txn, WT_TXN_READONLY))
+ WT_RET_MSG(session, WT_ROLLBACK,
+ "Attempt to update in a read only transaction");
WT_RET(__txn_next_op(session, &op));
op->type = F_ISSET(session, WT_SESSION_LOGGING_INMEM) ?
@@ -89,13 +96,12 @@ __wt_txn_modify_ref(WT_SESSION_IMPL *session, WT_REF *ref)
}
/*
- * __wt_txn_visible_all --
- * Check if a given transaction ID is "globally visible". This is, if
- * all sessions in the system will see the transaction ID including the
- * ID that belongs to a running checkpoint.
+ * __wt_txn_oldest_id --
+ * Return the oldest transaction ID that has to be kept for the current
+ * tree.
*/
-static inline int
-__wt_txn_visible_all(WT_SESSION_IMPL *session, uint64_t id)
+static inline uint64_t
+__wt_txn_oldest_id(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
WT_TXN_GLOBAL *txn_global;
@@ -125,6 +131,22 @@ __wt_txn_visible_all(WT_SESSION_IMPL *session, uint64_t id)
*/
oldest_id = checkpoint_snap_min;
+ return (oldest_id);
+}
+
+/*
+ * __wt_txn_visible_all --
+ * Check if a given transaction ID is "globally visible". This is, if
+ * all sessions in the system will see the transaction ID including the
+ * ID that belongs to a running checkpoint.
+ */
+static inline int
+__wt_txn_visible_all(WT_SESSION_IMPL *session, uint64_t id)
+{
+ uint64_t oldest_id;
+
+ oldest_id = __wt_txn_oldest_id(session);
+
return (WT_TXNID_LT(id, oldest_id));
}
@@ -219,7 +241,12 @@ __wt_txn_begin(WT_SESSION_IMPL *session, const char *cfg[])
if (cfg != NULL)
WT_RET(__wt_txn_config(session, cfg));
- if (txn->isolation == WT_ISO_SNAPSHOT) {
+ /*
+ * Allocate a snapshot if required. Named snapshot transactions already
+ * have an ID setup.
+ */
+ if (txn->isolation == WT_ISO_SNAPSHOT &&
+ !F_ISSET(txn, WT_TXN_NAMED_SNAPSHOT)) {
if (session->ncursors > 0)
WT_RET(__wt_session_copy_values(session));
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index a6f80039c10..fe27c4f1c62 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -1389,6 +1389,8 @@ struct __wt_session {
* @config{priority, priority of the transaction for resolving
* conflicts. Transactions with higher values are less likely to
* abort., an integer between -100 and 100; default \c 0.}
+ * @config{snapshot, use a named\, in-memory snapshot\, see @ref
+ * transaction_named_snapshots., a string; default empty.}
* @config{sync, whether to sync log records when the transaction
* commits\, inherited from ::wiredtiger_open \c transaction_sync., a
* boolean flag; default empty.}
@@ -1408,7 +1410,12 @@ struct __wt_session {
* @snippet ex_all.c transaction commit/rollback
*
* @param session the session handle
- * @configempty{WT_SESSION.commit_transaction, see dist/api_data.py}
+ * @configstart{WT_SESSION.commit_transaction, see dist/api_data.py}
+ * @config{sync, override whether to sync log records when the
+ * transaction commits\, inherited from ::wiredtiger_open \c
+ * transaction_sync., a string\, chosen from the following options: \c
+ * "background"\, \c "off"\, \c "on"; default empty.}
+ * @configend
* @errors
*/
int __F(commit_transaction)(WT_SESSION *session, const char *config);
@@ -1459,6 +1466,36 @@ struct __wt_session {
int __F(checkpoint)(WT_SESSION *session, const char *config);
/*!
+ * Manage named snapshot transactions. Use this API to create and drop
+ * named snapshots. Named snapshot transactions can be accessed via
+ * WT_CURSOR::open. See @ref transaction_named_snapshots.
+ *
+ * @snippet ex_all.c Snapshot examples
+ *
+ * @param session the session handle
+ * @configstart{WT_SESSION.snapshot, see dist/api_data.py}
+ * @config{drop = (, if non-empty\, specifies which snapshots to drop.
+ * Where a group of snapshots are being dropped\, the order is based on
+ * snapshot creation order not alphanumeric name order., a set of
+ * related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;all, drop all named snapshots., a
+ * boolean flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;before, drop all snapshots up to but
+ * not including the specified name., a string; default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;names, drop specific named
+ * snapshots., a list of strings; default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;to, drop all snapshots up to and
+ * including the specified name., a string; default empty.}
+ * @config{
+ * ),,}
+ * @config{name, specify a name for the snapshot., a string; default
+ * empty.}
+ * @configend
+ * @errors
+ */
+ int __F(snapshot)(WT_SESSION *session, const char *config);
+
+ /*!
* Return the transaction ID range pinned by the session handle.
*
* The ID range is approximate and is calculated based on the oldest
@@ -1474,6 +1511,24 @@ struct __wt_session {
*/
int __F(transaction_pinned_range)(WT_SESSION* session, uint64_t *range);
+ /*!
+ * Wait for a transaction to become synchronized. This method is
+ * only useful when ::wiredtiger_open is configured with the
+ * \c transaction_sync setting disabled. This method must be called
+ * when no transactions are active in the session.
+ *
+ * @snippet ex_all.c Transaction sync
+ *
+ * @param session the session handle
+ * @configstart{WT_SESSION.transaction_sync, see dist/api_data.py}
+ * @config{timeout_ms, maximum amount of time to wait for background
+ * sync to complete in milliseconds. A value of zero disables the
+ * timeout and returns immediately. The default waits forever., an
+ * integer; default \c .}
+ * @configend
+ * @errors
+ */
+ int __F(transaction_sync)(WT_SESSION *session, const char *config);
/*! @} */
};
@@ -3730,8 +3785,10 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_TXN_PINNED_RANGE 1139
/*! transaction: transactions rolled back */
#define WT_STAT_CONN_TXN_ROLLBACK 1140
+/*! transaction: transaction sync calls */
+#define WT_STAT_CONN_TXN_SYNC 1141
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1141
+#define WT_STAT_CONN_WRITE_IO 1142
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index 6a5c04d9c7c..b876a2d032d 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -214,6 +214,8 @@ struct __wt_named_encryptor;
typedef struct __wt_named_encryptor WT_NAMED_ENCRYPTOR;
struct __wt_named_extractor;
typedef struct __wt_named_extractor WT_NAMED_EXTRACTOR;
+struct __wt_named_snapshot;
+ typedef struct __wt_named_snapshot WT_NAMED_SNAPSHOT;
struct __wt_ovfl_reuse;
typedef struct __wt_ovfl_reuse WT_OVFL_REUSE;
struct __wt_ovfl_track;
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index 71312849036..8bfb42821af 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -34,6 +34,75 @@ __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn)
}
/*
+ * __wt_log_background --
+ * Record the given LSN as the background LSN and signal the
+ * thread as needed.
+ */
+int
+__wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_LOG *log;
+
+ conn = S2C(session);
+ log = conn->log;
+ session->bg_sync_lsn = *lsn;
+
+ /*
+ * Advance the logging subsystem background sync LSN if
+ * needed.
+ */
+ __wt_spin_lock(session, &log->log_sync_lock);
+ if (WT_LOG_CMP(lsn, &log->bg_sync_lsn) > 0)
+ log->bg_sync_lsn = *lsn;
+ __wt_spin_unlock(session, &log->log_sync_lock);
+ return (__wt_cond_signal(session, conn->log_file_cond));
+}
+
+/*
+ * __wt_log_force_sync --
+ * Force a sync of the log and files.
+ */
+int
+__wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
+{
+ WT_LOG *log;
+ WT_DECL_RET;
+
+ log = S2C(session)->log;
+
+ __wt_spin_lock(session, &log->log_sync_lock);
+ WT_ASSERT(session, log->log_dir_fh != NULL);
+ /*
+ * Sync the directory if the log file entry hasn't been written
+ * into the directory.
+ */
+ if (log->sync_dir_lsn.file < min_lsn->file) {
+ WT_ERR(__wt_verbose(session, WT_VERB_LOG,
+ "log_force_sync: sync directory %s",
+ log->log_dir_fh->name));
+ WT_ERR(__wt_directory_sync_fh(session, log->log_dir_fh));
+ log->sync_dir_lsn = *min_lsn;
+ WT_STAT_FAST_CONN_INCR(session, log_sync_dir);
+ }
+ /*
+ * Sync the log file if needed.
+ */
+ if (WT_LOG_CMP(&log->sync_lsn, min_lsn) < 0) {
+ WT_ERR(__wt_verbose(session, WT_VERB_LOG,
+ "log_force_sync: sync to LSN %d/%lu",
+ min_lsn->file, min_lsn->offset));
+ WT_ERR(__wt_fsync(session, log->log_fh));
+ log->sync_lsn = *min_lsn;
+ WT_STAT_FAST_CONN_INCR(session, log_sync);
+ WT_ERR(__wt_cond_signal(session, log->log_sync_cond));
+ }
+err:
+ __wt_spin_unlock(session, &log->log_sync_lock);
+ return (ret);
+}
+
+/*
* __wt_log_needs_recovery --
* Return 0 if we encounter a clean shutdown and 1 if recovery
* must be run in the given variable.
@@ -259,9 +328,9 @@ __log_prealloc(WT_SESSION_IMPL *session, WT_FH *fh)
ret = 0;
if (fh->fallocate_available == WT_FALLOCATE_NOT_AVAILABLE ||
(ret = __wt_fallocate(session, fh,
- LOG_FIRST_RECORD, conn->log_file_max)) == ENOTSUP)
+ WT_LOG_FIRST_RECORD, conn->log_file_max)) == ENOTSUP)
ret = __wt_ftruncate(session, fh,
- LOG_FIRST_RECORD + conn->log_file_max);
+ WT_LOG_FIRST_RECORD + conn->log_file_max);
return (ret);
}
@@ -303,7 +372,7 @@ __log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot)
if (!__log_size_fit(session, &log->alloc_lsn, recsize)) {
WT_RET(__wt_log_newfile(session, 0, &created_log));
if (log->log_close_fh != NULL)
- F_SET(slot, SLOT_CLOSEFH);
+ F_SET(slot, WT_SLOT_CLOSEFH);
}
/*
@@ -329,7 +398,7 @@ __log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot)
* Pre-allocate on the first real write into the log file, if it
* was just created (i.e. not pre-allocated).
*/
- if (log->alloc_lsn.offset == LOG_FIRST_RECORD && created_log)
+ if (log->alloc_lsn.offset == WT_LOG_FIRST_RECORD && created_log)
WT_RET(__log_prealloc(session, log->log_fh));
log->alloc_lsn.offset += (wt_off_t)recsize;
@@ -670,7 +739,7 @@ __log_truncate(WT_SESSION_IMPL *session,
* truncate them to the end of the log file header.
*/
WT_ERR(__wt_ftruncate(session,
- log_fh, LOG_FIRST_RECORD));
+ log_fh, WT_LOG_FIRST_RECORD));
WT_ERR(__wt_fsync(session, log_fh));
WT_ERR(__wt_close(session, &log_fh));
}
@@ -716,7 +785,7 @@ __wt_log_allocfile(
*/
WT_ERR(__log_openfile(session, 1, &log_fh, WT_LOG_TMPNAME, lognum));
WT_ERR(__log_file_header(session, log_fh, NULL, 1));
- WT_ERR(__wt_ftruncate(session, log_fh, LOG_FIRST_RECORD));
+ WT_ERR(__wt_ftruncate(session, log_fh, WT_LOG_FIRST_RECORD));
if (prealloc)
WT_ERR(__log_prealloc(session, log_fh));
WT_ERR(__wt_fsync(session, log_fh));
@@ -991,7 +1060,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, int *freep)
*freep = 1;
/* Write the buffered records */
- if (F_ISSET(slot, SLOT_BUFFERED)) {
+ if (F_ISSET(slot, WT_SLOT_BUFFERED)) {
write_size = (size_t)
(slot->slot_end_lsn.offset - slot->slot_start_offset);
WT_ERR(__wt_write(session, slot->slot_fh,
@@ -1005,8 +1074,8 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, int *freep)
* off to the worker thread. The caller is responsible for freeing
* the slot in that case. Otherwise the worker thread will free it.
*/
- if (F_ISSET(slot, SLOT_BUFFERED) &&
- !F_ISSET(slot, SLOT_SYNC | SLOT_SYNC_DIR)) {
+ if (F_ISSET(slot, WT_SLOT_BUFFERED) &&
+ !F_ISSET(slot, WT_SLOT_SYNC | WT_SLOT_SYNC_DIR)) {
*freep = 0;
slot->slot_state = WT_LOG_SLOT_WRITTEN;
/*
@@ -1036,15 +1105,15 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, int *freep)
/*
* Signal the close thread if needed.
*/
- if (F_ISSET(slot, SLOT_CLOSEFH))
- WT_ERR(__wt_cond_signal(session, conn->log_close_cond));
+ if (F_ISSET(slot, WT_SLOT_CLOSEFH))
+ WT_ERR(__wt_cond_signal(session, conn->log_file_cond));
/*
* Try to consolidate calls to fsync to wait less. Acquire a spin lock
* so that threads finishing writing to the log will wait while the
* current fsync completes and advance log->sync_lsn.
*/
- while (F_ISSET(slot, SLOT_SYNC | SLOT_SYNC_DIR)) {
+ while (F_ISSET(slot, WT_SLOT_SYNC | WT_SLOT_SYNC_DIR)) {
/*
* We have to wait until earlier log files have finished their
* sync operations. The most recent one will set the LSN to the
@@ -1069,7 +1138,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, int *freep)
* not yet stable in its parent directory. Do that
* now if needed.
*/
- if (F_ISSET(slot, SLOT_SYNC_DIR) &&
+ if (F_ISSET(slot, WT_SLOT_SYNC_DIR) &&
(log->sync_dir_lsn.file < sync_lsn.file)) {
WT_ASSERT(session, log->log_dir_fh != NULL);
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
@@ -1084,7 +1153,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, int *freep)
/*
* Sync the log file if needed.
*/
- if (F_ISSET(slot, SLOT_SYNC) &&
+ if (F_ISSET(slot, WT_SLOT_SYNC) &&
WT_LOG_CMP(&log->sync_lsn, &slot->slot_end_lsn) < 0) {
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
"log_release: sync log %s", log->log_fh->name));
@@ -1096,7 +1165,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, int *freep)
/*
* Clear the flags before leaving the loop.
*/
- F_CLR(slot, SLOT_SYNC | SLOT_SYNC_DIR);
+ F_CLR(slot, WT_SLOT_SYNC | WT_SLOT_SYNC_DIR);
locked = 0;
__wt_spin_unlock(session, &log->log_sync_lock);
break;
@@ -1174,7 +1243,7 @@ __wt_log_newfile(WT_SESSION_IMPL *session, int conn_create, int *created)
* the end of the header.
*/
log->alloc_lsn.file = log->fileid;
- log->alloc_lsn.offset = LOG_FIRST_RECORD;
+ log->alloc_lsn.offset = WT_LOG_FIRST_RECORD;
end_lsn = log->alloc_lsn;
/*
@@ -1467,8 +1536,6 @@ err: WT_STAT_FAST_CONN_INCR(session, log_scans);
*/
if (LF_ISSET(WT_LOGSCAN_ONE) && eol && ret == 0)
ret = WT_NOTFOUND;
- if (ret == ENOENT)
- ret = 0;
WT_TRET(__wt_close(session, &log_fh));
return (ret);
}
@@ -1499,9 +1566,9 @@ __log_direct_write(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp,
locked = 1;
if (LF_ISSET(WT_LOG_DSYNC | WT_LOG_FSYNC))
- F_SET(&tmp, SLOT_SYNC_DIR);
+ F_SET(&tmp, WT_SLOT_SYNC_DIR);
if (LF_ISSET(WT_LOG_FSYNC))
- F_SET(&tmp, SLOT_SYNC);
+ F_SET(&tmp, WT_SLOT_SYNC);
WT_ERR(__log_acquire(session, record->size, &tmp));
__wt_spin_unlock(session, &log->log_slot_lock);
locked = 0;
@@ -1697,8 +1764,16 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp,
ret = __log_direct_write(session, record, &lsn, flags);
if (ret == 0 && lsnp != NULL)
*lsnp = lsn;
- if (ret == 0)
- return (0);
+ /*
+ * All needed syncing will be handled directly except
+ * a background sync. Handle that here.
+ */
+ if (ret == 0) {
+ if (LF_ISSET(WT_LOG_BACKGROUND))
+ goto bg;
+ else
+ return (0);
+ }
if (ret != EAGAIN)
WT_ERR(ret);
/*
@@ -1761,8 +1836,15 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp,
(void)__wt_cond_wait(
session, log->log_write_cond, 10000);
}
-err:
- if (locked)
+
+ /*
+ * Advance the background sync LSN if needed.
+ */
+bg: if (LF_ISSET(WT_LOG_BACKGROUND) &&
+ WT_LOG_CMP(&session->bg_sync_lsn, &lsn) <= 0)
+ WT_ERR(__wt_log_background(session, &lsn));
+
+err: if (locked)
__wt_spin_unlock(session, &log->log_slot_lock);
if (ret == 0 && lsnp != NULL)
*lsnp = lsn;
@@ -1776,6 +1858,7 @@ err:
if (LF_ISSET(WT_LOG_DSYNC | WT_LOG_FSYNC) && ret == 0 &&
myslot.slot != NULL)
ret = myslot.slot->slot_error;
+
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/log/log_auto.c b/src/third_party/wiredtiger/src/log/log_auto.c
index bd830687df2..4f1dd1c0baf 100644
--- a/src/third_party/wiredtiger/src/log/log_auto.c
+++ b/src/third_party/wiredtiger/src/log/log_auto.c
@@ -122,6 +122,7 @@ int
__wt_logop_col_put_print(
WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out)
{
+ WT_DECL_RET;
uint32_t fileid;
uint64_t recno;
WT_ITEM value;
@@ -131,13 +132,17 @@ __wt_logop_col_put_print(
WT_RET(__wt_logop_col_put_unpack(
session, pp, end, &fileid, &recno, &value));
- fprintf(out, " \"optype\": \"col_put\",\n");
- fprintf(out, " \"fileid\": \"%" PRIu32 "\",\n", fileid);
- fprintf(out, " \"recno\": \"%" PRIu64 "\",\n", recno);
- WT_RET(__logrec_jsonify_str(session, &escaped, &value));
- fprintf(out, " \"value\": \"%s\"", escaped);
- __wt_free(session, escaped);
- return (0);
+ WT_RET(__wt_fprintf(out, " \"optype\": \"col_put\",\n"));
+ WT_ERR(__wt_fprintf(out,
+ " \"fileid\": \"%" PRIu32 "\",\n", fileid));
+ WT_ERR(__wt_fprintf(out,
+ " \"recno\": \"%" PRIu64 "\",\n", recno));
+ WT_ERR(__logrec_jsonify_str(session, &escaped, &value));
+ WT_ERR(__wt_fprintf(out,
+ " \"value\": \"%s\"", escaped));
+
+err: __wt_free(session, escaped);
+ return (ret);
}
int
@@ -190,9 +195,11 @@ __wt_logop_col_remove_print(
WT_RET(__wt_logop_col_remove_unpack(
session, pp, end, &fileid, &recno));
- fprintf(out, " \"optype\": \"col_remove\",\n");
- fprintf(out, " \"fileid\": \"%" PRIu32 "\",\n", fileid);
- fprintf(out, " \"recno\": \"%" PRIu64 "\"", recno);
+ WT_RET(__wt_fprintf(out, " \"optype\": \"col_remove\",\n"));
+ WT_RET(__wt_fprintf(out,
+ " \"fileid\": \"%" PRIu32 "\",\n", fileid));
+ WT_RET(__wt_fprintf(out,
+ " \"recno\": \"%" PRIu64 "\"", recno));
return (0);
}
@@ -247,10 +254,13 @@ __wt_logop_col_truncate_print(
WT_RET(__wt_logop_col_truncate_unpack(
session, pp, end, &fileid, &start, &stop));
- fprintf(out, " \"optype\": \"col_truncate\",\n");
- fprintf(out, " \"fileid\": \"%" PRIu32 "\",\n", fileid);
- fprintf(out, " \"start\": \"%" PRIu64 "\",\n", start);
- fprintf(out, " \"stop\": \"%" PRIu64 "\"", stop);
+ WT_RET(__wt_fprintf(out, " \"optype\": \"col_truncate\",\n"));
+ WT_RET(__wt_fprintf(out,
+ " \"fileid\": \"%" PRIu32 "\",\n", fileid));
+ WT_RET(__wt_fprintf(out,
+ " \"start\": \"%" PRIu64 "\",\n", start));
+ WT_RET(__wt_fprintf(out,
+ " \"stop\": \"%" PRIu64 "\"", stop));
return (0);
}
@@ -298,6 +308,7 @@ int
__wt_logop_row_put_print(
WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out)
{
+ WT_DECL_RET;
uint32_t fileid;
WT_ITEM key;
WT_ITEM value;
@@ -307,14 +318,18 @@ __wt_logop_row_put_print(
WT_RET(__wt_logop_row_put_unpack(
session, pp, end, &fileid, &key, &value));
- fprintf(out, " \"optype\": \"row_put\",\n");
- fprintf(out, " \"fileid\": \"%" PRIu32 "\",\n", fileid);
- WT_RET(__logrec_jsonify_str(session, &escaped, &key));
- fprintf(out, " \"key\": \"%s\",\n", escaped);
- WT_RET(__logrec_jsonify_str(session, &escaped, &value));
- fprintf(out, " \"value\": \"%s\"", escaped);
- __wt_free(session, escaped);
- return (0);
+ WT_RET(__wt_fprintf(out, " \"optype\": \"row_put\",\n"));
+ WT_ERR(__wt_fprintf(out,
+ " \"fileid\": \"%" PRIu32 "\",\n", fileid));
+ WT_ERR(__logrec_jsonify_str(session, &escaped, &key));
+ WT_ERR(__wt_fprintf(out,
+ " \"key\": \"%s\",\n", escaped));
+ WT_ERR(__logrec_jsonify_str(session, &escaped, &value));
+ WT_ERR(__wt_fprintf(out,
+ " \"value\": \"%s\"", escaped));
+
+err: __wt_free(session, escaped);
+ return (ret);
}
int
@@ -361,6 +376,7 @@ int
__wt_logop_row_remove_print(
WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out)
{
+ WT_DECL_RET;
uint32_t fileid;
WT_ITEM key;
char *escaped;
@@ -369,12 +385,15 @@ __wt_logop_row_remove_print(
WT_RET(__wt_logop_row_remove_unpack(
session, pp, end, &fileid, &key));
- fprintf(out, " \"optype\": \"row_remove\",\n");
- fprintf(out, " \"fileid\": \"%" PRIu32 "\",\n", fileid);
- WT_RET(__logrec_jsonify_str(session, &escaped, &key));
- fprintf(out, " \"key\": \"%s\"", escaped);
- __wt_free(session, escaped);
- return (0);
+ WT_RET(__wt_fprintf(out, " \"optype\": \"row_remove\",\n"));
+ WT_ERR(__wt_fprintf(out,
+ " \"fileid\": \"%" PRIu32 "\",\n", fileid));
+ WT_ERR(__logrec_jsonify_str(session, &escaped, &key));
+ WT_ERR(__wt_fprintf(out,
+ " \"key\": \"%s\"", escaped));
+
+err: __wt_free(session, escaped);
+ return (ret);
}
int
@@ -421,6 +440,7 @@ int
__wt_logop_row_truncate_print(
WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out)
{
+ WT_DECL_RET;
uint32_t fileid;
WT_ITEM start;
WT_ITEM stop;
@@ -431,15 +451,20 @@ __wt_logop_row_truncate_print(
WT_RET(__wt_logop_row_truncate_unpack(
session, pp, end, &fileid, &start, &stop, &mode));
- fprintf(out, " \"optype\": \"row_truncate\",\n");
- fprintf(out, " \"fileid\": \"%" PRIu32 "\",\n", fileid);
- WT_RET(__logrec_jsonify_str(session, &escaped, &start));
- fprintf(out, " \"start\": \"%s\",\n", escaped);
- WT_RET(__logrec_jsonify_str(session, &escaped, &stop));
- fprintf(out, " \"stop\": \"%s\",\n", escaped);
- fprintf(out, " \"mode\": \"%" PRIu32 "\"", mode);
- __wt_free(session, escaped);
- return (0);
+ WT_RET(__wt_fprintf(out, " \"optype\": \"row_truncate\",\n"));
+ WT_ERR(__wt_fprintf(out,
+ " \"fileid\": \"%" PRIu32 "\",\n", fileid));
+ WT_ERR(__logrec_jsonify_str(session, &escaped, &start));
+ WT_ERR(__wt_fprintf(out,
+ " \"start\": \"%s\",\n", escaped));
+ WT_ERR(__logrec_jsonify_str(session, &escaped, &stop));
+ WT_ERR(__wt_fprintf(out,
+ " \"stop\": \"%s\",\n", escaped));
+ WT_ERR(__wt_fprintf(out,
+ " \"mode\": \"%" PRIu32 "\"", mode));
+
+err: __wt_free(session, escaped);
+ return (ret);
}
int
diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c
index 02b3056be6f..45455b59e6b 100644
--- a/src/third_party/wiredtiger/src/log/log_slot.c
+++ b/src/third_party/wiredtiger/src/log/log_slot.c
@@ -35,15 +35,15 @@ __wt_log_slot_init(WT_SESSION_IMPL *session)
conn = S2C(session);
log = conn->log;
- for (i = 0; i < SLOT_POOL; i++) {
+ for (i = 0; i < WT_SLOT_POOL; i++) {
log->slot_pool[i].slot_state = WT_LOG_SLOT_FREE;
- log->slot_pool[i].slot_index = SLOT_INVALID_INDEX;
+ log->slot_pool[i].slot_index = WT_SLOT_INVALID_INDEX;
}
/*
* Set up the available slots from the pool the first time.
*/
- for (i = 0; i < SLOT_ACTIVE; i++) {
+ for (i = 0; i < WT_SLOT_ACTIVE; i++) {
slot = &log->slot_pool[i];
slot->slot_index = (uint32_t)i;
slot->slot_state = WT_LOG_SLOT_READY;
@@ -54,13 +54,13 @@ __wt_log_slot_init(WT_SESSION_IMPL *session)
* Allocate memory for buffers now that the arrays are setup. Split
* this out to make error handling simpler.
*/
- for (i = 0; i < SLOT_POOL; i++) {
+ for (i = 0; i < WT_SLOT_POOL; i++) {
WT_ERR(__wt_buf_init(session,
&log->slot_pool[i].slot_buf, WT_LOG_SLOT_BUF_INIT_SIZE));
- F_SET(&log->slot_pool[i], SLOT_INIT_FLAGS);
+ F_SET(&log->slot_pool[i], WT_SLOT_INIT_FLAGS);
}
WT_STAT_FAST_CONN_INCRV(session,
- log_buffer_size, WT_LOG_SLOT_BUF_INIT_SIZE * SLOT_POOL);
+ log_buffer_size, WT_LOG_SLOT_BUF_INIT_SIZE * WT_SLOT_POOL);
if (0) {
err: while (--i >= 0)
__wt_buf_free(session, &log->slot_pool[i].slot_buf);
@@ -82,7 +82,7 @@ __wt_log_slot_destroy(WT_SESSION_IMPL *session)
conn = S2C(session);
log = conn->log;
- for (i = 0; i < SLOT_POOL; i++)
+ for (i = 0; i < WT_SLOT_POOL; i++)
__wt_buf_free(session, &log->slot_pool[i].slot_buf);
return (0);
}
@@ -100,18 +100,29 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize,
WT_CONNECTION_IMPL *conn;
WT_LOG *log;
WT_LOGSLOT *slot;
- int64_t cur_state, new_state, old_state;
+ int64_t new_state, old_state;
uint32_t allocated_slot, slot_grow_attempts;
conn = S2C(session);
log = conn->log;
slot_grow_attempts = 0;
find_slot:
- allocated_slot = __wt_random(session->rnd) % SLOT_ACTIVE;
+ allocated_slot = WT_SLOT_ACTIVE == 1 ? 0 :
+ __wt_random(session->rnd) % WT_SLOT_ACTIVE;
+ /*
+ * Get the selected slot. Use a barrier to prevent the compiler from
+ * caching this read.
+ */
+ WT_BARRIER();
slot = log->slot_array[allocated_slot];
- old_state = slot->slot_state;
join_slot:
/*
+ * Read the current slot state. Use a barrier to prevent the compiler
+ * from caching this read.
+ */
+ WT_BARRIER();
+ old_state = slot->slot_state;
+ /*
* WT_LOG_SLOT_READY and higher means the slot is available for
* joining. Any other state means it is in use and transitioning
* from the active array.
@@ -135,20 +146,18 @@ join_slot:
* the slot for a buffer size increase and find another slot.
*/
if (new_state > (int64_t)slot->slot_buf.memsize) {
- F_SET(slot, SLOT_BUF_GROW);
+ F_SET(slot, WT_SLOT_BUF_GROW);
if (++slot_grow_attempts > 5) {
WT_STAT_FAST_CONN_INCR(session, log_slot_toosmall);
return (ENOMEM);
}
goto find_slot;
}
- cur_state = WT_ATOMIC_CAS_VAL8(slot->slot_state, old_state, new_state);
/*
* We lost a race to add our size into this slot. Check the state
* and try again.
*/
- if (cur_state != old_state) {
- old_state = cur_state;
+ if (!WT_ATOMIC_CAS8(slot->slot_state, old_state, new_state)) {
WT_STAT_FAST_CONN_INCR(session, log_slot_races);
goto join_slot;
}
@@ -159,9 +168,9 @@ join_slot:
*/
WT_STAT_FAST_CONN_INCR(session, log_slot_joins);
if (LF_ISSET(WT_LOG_DSYNC | WT_LOG_FSYNC))
- F_SET(slot, SLOT_SYNC_DIR);
+ F_SET(slot, WT_SLOT_SYNC_DIR);
if (LF_ISSET(WT_LOG_FSYNC))
- F_SET(slot, SLOT_SYNC);
+ F_SET(slot, WT_SLOT_SYNC);
myslotp->slot = slot;
myslotp->offset = (wt_off_t)old_state - WT_LOG_SLOT_READY;
return (0);
@@ -193,7 +202,7 @@ retry:
*/
pool_i = log->pool_index;
newslot = &log->slot_pool[pool_i];
- if (++log->pool_index >= SLOT_POOL)
+ if (++log->pool_index >= WT_SLOT_POOL)
log->pool_index = 0;
if (newslot->slot_state != WT_LOG_SLOT_FREE) {
WT_STAT_FAST_CONN_INCR(session, log_slot_switch_fails);
@@ -203,7 +212,7 @@ retry:
* churn is used to change how long we pause before closing
* the slot - which leads to more consolidation and less churn.
*/
- if (++switch_fails % SLOT_POOL == 0 && slot->slot_churn < 5)
+ if (++switch_fails % WT_SLOT_POOL == 0 && slot->slot_churn < 5)
++slot->slot_churn;
__wt_yield();
goto retry;
@@ -303,7 +312,7 @@ __wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
/*
* Grow the buffer if needed before returning it to the pool.
*/
- if (F_ISSET(slot, SLOT_BUF_GROW)) {
+ if (F_ISSET(slot, WT_SLOT_BUF_GROW)) {
WT_STAT_FAST_CONN_INCR(session, log_buffer_grow);
WT_STAT_FAST_CONN_INCRV(session,
log_buffer_size, slot->slot_buf.memsize);
@@ -320,7 +329,7 @@ err:
* We have to reset them them here because multiple threads may
* change the flags when joining the slot.
*/
- slot->flags = SLOT_INIT_FLAGS;
+ slot->flags = WT_SLOT_INIT_FLAGS;
slot->slot_state = WT_LOG_SLOT_FREE;
return (ret);
}
@@ -353,28 +362,25 @@ __wt_log_slot_grow_buffers(WT_SESSION_IMPL *session, size_t newsize)
* a separate lock if there is contention.
*/
__wt_spin_lock(session, &log->log_slot_lock);
- for (i = 0; i < SLOT_POOL; i++) {
+ for (i = 0; i < WT_SLOT_POOL; i++) {
slot = &log->slot_pool[i];
- /* Avoid atomic operations if they won't succeed. */
- if (slot->slot_state != WT_LOG_SLOT_FREE &&
- slot->slot_state != WT_LOG_SLOT_READY)
- continue;
+
/* Don't keep growing unrelated buffers. */
if (slot->slot_buf.memsize > (10 * newsize) &&
- !F_ISSET(slot, SLOT_BUF_GROW))
+ !F_ISSET(slot, WT_SLOT_BUF_GROW))
+ continue;
+
+ /* Avoid atomic operations if they won't succeed. */
+ orig_state = slot->slot_state;
+ if ((orig_state != WT_LOG_SLOT_FREE &&
+ orig_state != WT_LOG_SLOT_READY) ||
+ !WT_ATOMIC_CAS8(
+ slot->slot_state, orig_state, WT_LOG_SLOT_PENDING))
continue;
- orig_state = WT_ATOMIC_CAS_VAL8(
- slot->slot_state, WT_LOG_SLOT_FREE, WT_LOG_SLOT_PENDING);
- if (orig_state != WT_LOG_SLOT_FREE) {
- orig_state = WT_ATOMIC_CAS_VAL8(slot->slot_state,
- WT_LOG_SLOT_READY, WT_LOG_SLOT_PENDING);
- if (orig_state != WT_LOG_SLOT_READY)
- continue;
- }
/* We have a slot - now go ahead and grow the buffer. */
old_size = slot->slot_buf.memsize;
- F_CLR(slot, SLOT_BUF_GROW);
+ F_CLR(slot, WT_SLOT_BUF_GROW);
WT_ERR(__wt_buf_grow(session, &slot->slot_buf,
WT_MAX(slot->slot_buf.memsize * 2, newsize)));
slot->slot_state = orig_state;
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c b/src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c
index d67eb33c9e8..ef1c124111f 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_cursor_bulk.c
@@ -17,11 +17,14 @@ __clsm_close_bulk(WT_CURSOR *cursor)
{
WT_CURSOR_LSM *clsm;
WT_CURSOR *bulk_cursor;
+ WT_LSM_CHUNK *chunk;
WT_LSM_TREE *lsm_tree;
WT_SESSION_IMPL *session;
+ uint64_t avg_chunks, total_chunks;
clsm = (WT_CURSOR_LSM *)cursor;
lsm_tree = clsm->lsm_tree;
+ chunk = lsm_tree->chunk[0];
session = (WT_SESSION_IMPL *)clsm->iface.session;
/* Close the bulk cursor to ensure the chunk is written to disk. */
@@ -31,7 +34,18 @@ __clsm_close_bulk(WT_CURSOR *cursor)
clsm->nchunks = 0;
/* Set ondisk, and flush the metadata */
- F_SET(lsm_tree->chunk[0], WT_LSM_CHUNK_ONDISK);
+ F_SET(chunk, WT_LSM_CHUNK_ONDISK);
+ /*
+ * Setup a generation in our chunk based on how many chunk_size
+ * pieces fit into a chunk of a given generation. This allows future
+ * LSM merges choose reasonable sets of chunks.
+ */
+ avg_chunks = (lsm_tree->merge_min + lsm_tree->merge_max) / 2;
+ for (total_chunks = chunk->size / lsm_tree->chunk_size;
+ total_chunks > 1;
+ total_chunks /= avg_chunks)
+ ++chunk->generation;
+
WT_RET(__wt_lsm_meta_write(session, lsm_tree));
++lsm_tree->dsk_gen;
@@ -49,15 +63,18 @@ __clsm_insert_bulk(WT_CURSOR *cursor)
{
WT_CURSOR *bulk_cursor;
WT_CURSOR_LSM *clsm;
+ WT_LSM_CHUNK *chunk;
WT_LSM_TREE *lsm_tree;
WT_SESSION_IMPL *session;
clsm = (WT_CURSOR_LSM *)cursor;
lsm_tree = clsm->lsm_tree;
+ chunk = lsm_tree->chunk[0];
session = (WT_SESSION_IMPL *)clsm->iface.session;
WT_ASSERT(session, lsm_tree->nchunks == 1 && clsm->nchunks == 1);
- ++lsm_tree->chunk[0]->count;
+ ++chunk->count;
+ chunk->size += cursor->key.size + cursor->value.size;
bulk_cursor = *clsm->cursors;
bulk_cursor->set_key(bulk_cursor, &cursor->key);
bulk_cursor->set_value(bulk_cursor, &cursor->value);
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
index 4be0cd2a09c..0d3ce5da2d8 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
@@ -652,8 +652,10 @@ int
__wt_lsm_manager_push_entry(WT_SESSION_IMPL *session,
uint32_t type, uint32_t flags, WT_LSM_TREE *lsm_tree)
{
+ WT_DECL_RET;
WT_LSM_MANAGER *manager;
WT_LSM_WORK_UNIT *entry;
+ int pushed;
manager = &S2C(session)->lsm_manager;
@@ -672,13 +674,27 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session,
break;
}
- WT_RET(__wt_epoch(session, &lsm_tree->work_push_ts));
+ /*
+ * Don't allow any work units unless a tree is active, this avoids
+ * races on shutdown between clearing out queues and pushing new
+ * work units.
+ *
+ * Increment the queue reference before checking the flag since
+ * on close, the flag is cleared and then the queue reference count
+ * is checked.
+ */
+ (void)WT_ATOMIC_ADD4(lsm_tree->queue_ref, 1);
+ if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) {
+ (void)WT_ATOMIC_SUB4(lsm_tree->queue_ref, 1);
+ return (0);
+ }
- WT_RET(__wt_calloc_one(session, &entry));
+ pushed = 0;
+ WT_ERR(__wt_epoch(session, &lsm_tree->work_push_ts));
+ WT_ERR(__wt_calloc_one(session, &entry));
entry->type = type;
entry->flags = flags;
entry->lsm_tree = lsm_tree;
- (void)WT_ATOMIC_ADD4(lsm_tree->queue_ref, 1);
WT_STAT_FAST_CONN_INCR(session, lsm_work_units_created);
if (type == WT_LSM_WORK_SWITCH)
@@ -690,8 +706,12 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session,
else
LSM_PUSH_ENTRY(&manager->appqh,
&manager->app_lock, lsm_work_queue_app);
+ pushed = 1;
- WT_RET(__wt_cond_signal(session, manager->work_cond));
-
+ WT_ERR(__wt_cond_signal(session, manager->work_cond));
return (0);
+err:
+ if (!pushed)
+ (void)WT_ATOMIC_SUB4(lsm_tree->queue_ref, 1);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
index b8aecfc89b6..6c6b185f821 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
@@ -27,6 +27,13 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, int final)
WT_UNUSED(final); /* Only used in diagnostic builds */
+ /*
+ * The work unit queue should be empty, but it's worth checking
+ * since work units use a different locking scheme to regular tree
+ * operations.
+ */
+ WT_ASSERT(session, lsm_tree->queue_ref == 0);
+
/* We may be destroying an lsm_tree before it was added. */
if (F_ISSET(lsm_tree, WT_LSM_TREE_OPEN)) {
WT_ASSERT(session, final ||
@@ -1223,13 +1230,13 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip)
"LSM compaction requires active merge threads");
/*
- * We are done if there is a single chunk in the tree and we have
- * already created a bloom filter for it or we are configured not to.
+ * There is no work to do if there is only a single chunk in the tree
+ * and it has a bloom filter or is configured to never have a bloom
+ * filter.
*/
if (lsm_tree->nchunks == 1 &&
- ((FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST) &&
- F_ISSET(lsm_tree->chunk[0], WT_LSM_CHUNK_BLOOM)) ||
- !FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST))) {
+ (!FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OLDEST) ||
+ F_ISSET(lsm_tree->chunk[0], WT_LSM_CHUNK_BLOOM))) {
__wt_lsm_tree_release(session, lsm_tree);
return (0);
}
diff --git a/src/third_party/wiredtiger/src/meta/meta_turtle.c b/src/third_party/wiredtiger/src/meta/meta_turtle.c
index 79a555d0922..a44e647ff08 100644
--- a/src/third_party/wiredtiger/src/meta/meta_turtle.c
+++ b/src/third_party/wiredtiger/src/meta/meta_turtle.c
@@ -175,10 +175,10 @@ __wt_turtle_init(WT_SESSION_IMPL *session)
* creation doesn't fully complete, we won't have a turtle file and we
* will repeat the process until we succeed.
*
- * Incremental backups can occur only run recovery is run and it becomes
- * live. So if there is a turtle file and an incremental backup file
- * that is an error. Otherwise, if there's already a turtle file,
- * we're done.
+ * Incremental backups can occur only if recovery is run and it becomes
+ * live. So, if there is a turtle file and an incremental backup file,
+ * that is an error. Otherwise, if there's already a turtle file, we're
+ * done.
*/
WT_RET(__wt_exist(session, WT_INCREMENTAL_BACKUP, &exist_incr));
WT_RET(__wt_exist(session, WT_METADATA_TURTLE, &exist));
@@ -187,23 +187,23 @@ __wt_turtle_init(WT_SESSION_IMPL *session)
WT_RET_MSG(session, EINVAL,
"Incremental backup after running recovery "
"is not allowed.");
- return (0);
- }
- if (exist_incr)
- F_SET(S2C(session), WT_CONN_WAS_BACKUP);
+ } else {
+ if (exist_incr)
+ F_SET(S2C(session), WT_CONN_WAS_BACKUP);
- /* Create the metadata file. */
- WT_RET(__metadata_init(session));
+ /* Create the metadata file. */
+ WT_RET(__metadata_init(session));
- /* Load any hot-backup information. */
- WT_RET(__metadata_load_hot_backup(session));
+ /* Load any hot-backup information. */
+ WT_RET(__metadata_load_hot_backup(session));
- /* Create any bulk-loaded file stubs. */
- WT_RET(__metadata_load_bulk(session));
+ /* Create any bulk-loaded file stubs. */
+ WT_RET(__metadata_load_bulk(session));
- /* Create the turtle file. */
- WT_RET(__metadata_config(session, &metaconf));
- WT_ERR(__wt_turtle_update(session, WT_METAFILE_URI, metaconf));
+ /* Create the turtle file. */
+ WT_RET(__metadata_config(session, &metaconf));
+ WT_ERR(__wt_turtle_update(session, WT_METAFILE_URI, metaconf));
+ }
/* Remove the backup files, we'll never read them again. */
WT_ERR(__wt_backup_file_remove(session));
diff --git a/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c b/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c
index 3a3b0e0d74f..19183ed9030 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_mtx_rw.c
@@ -82,7 +82,7 @@ __wt_try_readlock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
writers = l->s.writers;
old = (pad << 48) + (users << 32) + (users << 16) + writers;
new = (pad << 48) + ((users + 1) << 32) + ((users + 1) << 16) + writers;
- return (WT_ATOMIC_CAS_VAL8(l->u, old, new) == old ? 0 : EBUSY);
+ return (WT_ATOMIC_CAS8(l->u, old, new) ? 0 : EBUSY);
}
/*
@@ -163,7 +163,7 @@ __wt_try_writelock(WT_SESSION_IMPL *session, WT_RWLOCK *rwlock)
users = l->s.users;
old = (pad << 48) + (users << 32) + (readers << 16) + users;
new = (pad << 48) + ((users + 1) << 32) + (readers << 16) + users;
- return (WT_ATOMIC_CAS_VAL8(l->u, old, new) == old ? 0 : EBUSY);
+ return (WT_ATOMIC_CAS8(l->u, old, new) ? 0 : EBUSY);
}
/*
diff --git a/src/third_party/wiredtiger/src/os_posix/os_open.c b/src/third_party/wiredtiger/src/os_posix/os_open.c
index 2116efc6ea8..7a4f5fdb38d 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_open.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_open.c
@@ -16,16 +16,9 @@ static int
__open_directory(WT_SESSION_IMPL *session, char *path, int *fd)
{
WT_DECL_RET;
- char *dir;
- if ((dir = strrchr(path, '/')) == NULL)
- path = (char *)".";
- else
- *dir = '\0';
WT_SYSCALL_RETRY(((*fd =
open(path, O_RDONLY, 0444)) == -1 ? 1 : 0), ret);
- if (dir != NULL)
- *dir = '/';
if (ret != 0)
WT_RET_MSG(session, ret, "%s: open_directory", path);
return (ret);
diff --git a/src/third_party/wiredtiger/src/os_posix/os_thread.c b/src/third_party/wiredtiger/src/os_posix/os_thread.c
index c70a04c8df7..e4f24cdb44e 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_thread.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_thread.c
@@ -49,12 +49,15 @@ __wt_thread_id(char *buf, size_t buflen)
pthread_t self;
/*
- * POSIX 1003.1 allows pthread_t to be an opaque type, but on systems
- * where it's a pointer, we'd rather print out the pointer and match
- * gdb output. Since we don't yet run on any systems where pthread_t
- * is not a pointer, do it that way for now.
+ * POSIX 1003.1 allows pthread_t to be an opaque type; on systems where
+ * it's a pointer, print the pointer to match gdb output.
*/
self = pthread_self();
+#ifdef __sun
(void)snprintf(buf, buflen,
- "%" PRIu64 ":%p", (uint64_t)getpid(), (void *)self);
+ "%" PRIuMAX ":%u", (uintmax_t)getpid(), self);
+#else
+ (void)snprintf(buf, buflen,
+ "%" PRIuMAX ":%p", (uintmax_t)getpid(), (void *)self);
+#endif
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_track.c b/src/third_party/wiredtiger/src/reconcile/rec_track.c
index e417dbfaa83..36e85713421 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_track.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_track.c
@@ -711,7 +711,7 @@ __ovfl_txnc_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
* visibility check could give different results as the global ID moves
* forward.
*/
- oldest_txn = S2C(session)->txn_global.oldest_id;
+ oldest_txn = __wt_txn_oldest_id(session);
/*
* Discard any transaction-cache records with transaction IDs earlier
diff --git a/src/third_party/wiredtiger/src/schema/schema_open.c b/src/third_party/wiredtiger/src/schema/schema_open.c
index eb706f5ff27..bd4f2012480 100644
--- a/src/third_party/wiredtiger/src/schema/schema_open.c
+++ b/src/third_party/wiredtiger/src/schema/schema_open.c
@@ -155,7 +155,8 @@ __open_index(WT_SESSION_IMPL *session, WT_TABLE *table, WT_INDEX *idx)
}
WT_ERR(__wt_extractor_config(
- session, idx->config, &idx->extractor, &idx->extractor_owned));
+ session, idx->name, idx->config, &idx->extractor,
+ &idx->extractor_owned));
WT_ERR(__wt_config_getones(session, idx->config, "key_format", &cval));
WT_ERR(__wt_strndup(session, cval.str, cval.len, &idx->key_format));
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index 7951b3ff50d..7e331c530fd 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -9,6 +9,7 @@
#include "wt_internal.h"
static int __session_checkpoint(WT_SESSION *, const char *);
+static int __session_snapshot(WT_SESSION *, const char *);
static int __session_rollback_transaction(WT_SESSION *, const char *);
/*
@@ -74,6 +75,7 @@ __session_clear(WT_SESSION_IMPL *session)
memset(session, 0, WT_SESSION_CLEAR_SIZE(session));
session->hazard_size = 0;
session->nhazard = 0;
+ WT_INIT_LSN(&session->bg_sync_lsn);
}
/*
@@ -850,6 +852,89 @@ err: API_END_RET(session, ret);
}
/*
+ * __session_transaction_sync --
+ * WT_SESSION->transaction_sync method.
+ */
+static int
+__session_transaction_sync(WT_SESSION *wt_session, const char *config)
+{
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+ WT_SESSION_IMPL *session;
+ WT_TXN *txn;
+ struct timespec now, start;
+ uint64_t timeout_ms, waited_ms;
+ int forever;
+
+ session = (WT_SESSION_IMPL *)wt_session;
+ conn = S2C(session);
+ txn = &session->txn;
+ if (F_ISSET(txn, WT_TXN_RUNNING))
+ WT_RET_MSG(session, EINVAL, "transaction in progress");
+
+ /*
+ * If logging is not enabled there is nothing to do.
+ */
+ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))
+ return (0);
+ SESSION_API_CALL(session, transaction_sync, config, cfg);
+ WT_STAT_FAST_CONN_INCR(session, txn_sync);
+
+ log = conn->log;
+ ret = 0;
+ timeout_ms = waited_ms = 0;
+ forever = 1;
+
+ /*
+ * If there is no background sync LSN in this session, there
+ * is nothing to do.
+ */
+ if (WT_IS_INIT_LSN(&session->bg_sync_lsn))
+ goto err;
+
+ /*
+ * If our LSN is smaller than the current sync LSN then our
+ * transaction is stable. We're done.
+ */
+ if (WT_LOG_CMP(&session->bg_sync_lsn, &log->sync_lsn) <= 0)
+ goto err;
+
+ /*
+ * Our LSN is not yet stable. Wait and check again depending on the
+ * timeout.
+ */
+ WT_ERR(__wt_config_gets_def(
+ session, cfg, "timeout_ms", (int)UINT_MAX, &cval));
+ if ((unsigned int)cval.len != UINT_MAX) {
+ timeout_ms = (uint64_t)cval.val;
+ forever = 0;
+ }
+
+ if (timeout_ms == 0)
+ WT_ERR(ETIMEDOUT);
+
+ WT_ERR(__wt_epoch(session, &start));
+ /*
+ * Keep checking the LSNs until we find it is stable or we reach
+ * our timeout.
+ */
+ while (WT_LOG_CMP(&session->bg_sync_lsn, &log->sync_lsn) > 0) {
+ WT_ERR(__wt_cond_signal(session, conn->log_file_cond));
+ WT_ERR(__wt_epoch(session, &now));
+ waited_ms = WT_TIMEDIFF(now, start) / WT_MILLION;
+ if (forever || waited_ms < timeout_ms)
+ WT_ERR(__wt_cond_wait(
+ session, log->log_sync_cond, waited_ms));
+ else
+ WT_ERR(ETIMEDOUT);
+ }
+
+err: API_END_RET(session, ret);
+}
+
+/*
* __session_checkpoint --
* WT_SESSION->checkpoint method.
*/
@@ -920,6 +1005,42 @@ err: F_CLR(session, WT_SESSION_CAN_WAIT | WT_SESSION_NO_CACHE_CHECK);
}
/*
+ * __session_snapshot --
+ * WT_SESSION->snapshot method.
+ */
+static int
+__session_snapshot(WT_SESSION *wt_session, const char *config)
+{
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ WT_TXN_GLOBAL *txn_global;
+ int has_create, has_drop;
+
+ has_create = has_drop = 0;
+ session = (WT_SESSION_IMPL *)wt_session;
+ txn_global = &S2C(session)->txn_global;
+
+ SESSION_API_CALL(session, snapshot, config, cfg);
+
+ WT_ERR(__wt_txn_named_snapshot_config(
+ session, cfg, &has_create, &has_drop));
+
+ WT_ERR(__wt_writelock(session, txn_global->nsnap_rwlock));
+
+ /* Drop any snapshots to be removed first. */
+ if (has_drop)
+ WT_ERR(__wt_txn_named_snapshot_drop(session, cfg));
+
+ /* Start the named snapshot if requested. */
+ if (has_create)
+ WT_ERR(__wt_txn_named_snapshot_begin(session, cfg));
+
+err: WT_TRET(__wt_writeunlock(session, txn_global->nsnap_rwlock));
+
+ API_END_RET_NOTFOUND_MAP(session, ret);
+}
+
+/*
* __session_strerror --
* WT_SESSION->strerror method.
*/
@@ -997,7 +1118,9 @@ __wt_open_session(WT_CONNECTION_IMPL *conn,
__session_commit_transaction,
__session_rollback_transaction,
__session_checkpoint,
- __session_transaction_pinned_range
+ __session_snapshot,
+ __session_transaction_pinned_range,
+ __session_transaction_sync
};
WT_DECL_RET;
WT_SESSION_IMPL *session, *session_ret;
diff --git a/src/third_party/wiredtiger/src/session/session_dhandle.c b/src/third_party/wiredtiger/src/session/session_dhandle.c
index 720f40e8d11..6648dd5761f 100644
--- a/src/third_party/wiredtiger/src/session/session_dhandle.c
+++ b/src/third_party/wiredtiger/src/session/session_dhandle.c
@@ -114,9 +114,8 @@ __wt_session_lock_dhandle(
WT_RET(__wt_readlock(session, dhandle->rwlock));
if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
*is_deadp = 1;
- WT_RET(
+ return (
__wt_readunlock(session, dhandle->rwlock));
- return (0);
}
is_open = F_ISSET(dhandle, WT_DHANDLE_OPEN) ? 1 : 0;
@@ -135,9 +134,8 @@ __wt_session_lock_dhandle(
if ((ret = __wt_try_writelock(session, dhandle->rwlock)) == 0) {
if (F_ISSET(dhandle, WT_DHANDLE_DEAD)) {
*is_deadp = 1;
- WT_RET(
+ return (
__wt_writeunlock(session, dhandle->rwlock));
- return (0);
}
/*
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index c30d4b2f029..44c2daa3802 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -549,6 +549,7 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats)
"transaction: transaction range of IDs currently pinned";
stats->txn_pinned_checkpoint_range.desc =
"transaction: transaction range of IDs currently pinned by a checkpoint";
+ stats->txn_sync.desc = "transaction: transaction sync calls";
stats->txn_commit.desc = "transaction: transactions committed";
stats->txn_rollback.desc = "transaction: transactions rolled back";
}
@@ -669,6 +670,7 @@ __wt_stat_refresh_connection_stats(void *stats_arg)
stats->txn_begin.v = 0;
stats->txn_checkpoint.v = 0;
stats->txn_fail_cache.v = 0;
+ stats->txn_sync.v = 0;
stats->txn_commit.v = 0;
stats->txn_rollback.v = 0;
}
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 0492e39342f..63ef4db5092 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -258,6 +258,11 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force)
if (WT_TXNID_LT(snap_min, oldest_id))
oldest_id = snap_min;
+ /* The oldest ID can't move past any named snapshots. */
+ if ((id = txn_global->nsnap_oldest_id) != WT_TXN_NONE &&
+ WT_TXNID_LT(id, oldest_id))
+ oldest_id = id;
+
/* Update the last running ID. */
if (WT_TXNID_LT(txn_global->last_running, snap_min)) {
txn_global->last_running = snap_min;
@@ -329,15 +334,30 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
* The default sync setting is inherited from the connection, but can
* be overridden by an explicit "sync" setting for this transaction.
*
- * !!! This is an unusual use of the config code: the "default" value
- * we pass in is inherited from the connection. If flush is not set in
- * the connection-wide flag and not overridden here, we end up clearing
- * all flags.
+ * We want to distinguish between inheriting implicitly and explicitly.
*/
- WT_RET(__wt_config_gets_def(session, cfg, "sync",
- FLD_ISSET(txn->txn_logsync, WT_LOG_FLUSH) ? 1 : 0, &cval));
- if (!cval.val)
- txn->txn_logsync = 0;
+ F_CLR(txn, WT_TXN_SYNC_SET);
+ WT_RET(__wt_config_gets_def(
+ session, cfg, "sync", (int)UINT_MAX, &cval));
+ if (cval.val == 0 || cval.val == 1)
+ /*
+ * This is an explicit setting of sync. Set the flag so
+ * that we know not to overwrite it in commit_transaction.
+ */
+ F_SET(txn, WT_TXN_SYNC_SET);
+
+ if (cval.val == 0)
+ FLD_CLR(txn->txn_logsync, WT_LOG_FLUSH);
+
+ WT_RET(__wt_config_gets_def(session, cfg, "snapshot", 0, &cval));
+ if (cval.len > 0)
+ /*
+ * The layering here isn't ideal - the named snapshot get
+ * function does both validation and setup. Otherwise we'd
+ * need to walk the list of named snapshots twice during
+ * transaction open.
+ */
+ WT_RET(__wt_txn_named_snapshot_get(session, &cval));
return (0);
}
@@ -381,7 +401,8 @@ __wt_txn_release(WT_SESSION_IMPL *session)
*/
__wt_txn_release_snapshot(session);
txn->isolation = session->isolation;
- F_CLR(txn, WT_TXN_ERROR | WT_TXN_HAS_ID | WT_TXN_RUNNING);
+ /* Ensure the transaction flags are cleared on exit */
+ txn->flags = 0;
}
/*
@@ -391,17 +412,60 @@ __wt_txn_release(WT_SESSION_IMPL *session)
int
__wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
{
+ WT_CONFIG_ITEM cval;
+ WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_TXN *txn;
WT_TXN_OP *op;
u_int i;
txn = &session->txn;
+ conn = S2C(session);
WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR));
if (!F_ISSET(txn, WT_TXN_RUNNING))
WT_RET_MSG(session, EINVAL, "No transaction is active");
+ /*
+ * The default sync setting is inherited from the connection, but can
+ * be overridden by an explicit "sync" setting for this transaction.
+ */
+ WT_RET(__wt_config_gets_def(session, cfg, "sync", 0, &cval));
+
+ /*
+ * If the user chose the default setting, check whether sync is enabled
+ * for this transaction (either inherited or via begin_transaction).
+ * If sync is disabled, clear the field to avoid the log write being
+ * flushed.
+ *
+ * Otherwise check for specific settings. We don't need to check for
+ * "on" because that is the default inherited from the connection. If
+ * the user set anything in begin_transaction, we only override with an
+ * explicit setting.
+ */
+ if (cval.len == 0) {
+ if (!FLD_ISSET(txn->txn_logsync, WT_LOG_FLUSH) &&
+ !F_ISSET(txn, WT_TXN_SYNC_SET))
+ txn->txn_logsync = 0;
+ } else {
+ /*
+ * If the caller already set sync on begin_transaction then
+ * they should not be using sync on commit_transaction.
+ * Flag that as an error.
+ */
+ if (F_ISSET(txn, WT_TXN_SYNC_SET))
+ WT_RET_MSG(session, EINVAL,
+ "Sync already set during begin_transaction.");
+ if (WT_STRING_MATCH("background", cval.str, cval.len))
+ txn->txn_logsync = WT_LOG_BACKGROUND;
+ else if (WT_STRING_MATCH("off", cval.str, cval.len))
+ txn->txn_logsync = 0;
+ /*
+ * We don't need to check for "on" here because that is the
+ * default to inherit from the connection setting.
+ */
+ }
+
/* Commit notification. */
if (txn->notify != NULL)
WT_TRET(txn->notify->notify(txn->notify,
@@ -409,7 +473,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
/* If we are logging, write a commit log record. */
if (ret == 0 && txn->mod_count > 0 &&
- FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED) &&
+ FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) &&
!F_ISSET(session, WT_SESSION_NO_LOGGING)) {
/*
* We are about to block on I/O writing the log.
@@ -596,8 +660,14 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
txn_global->current = txn_global->last_running =
txn_global->oldest_id = WT_TXN_FIRST;
+ WT_RET(__wt_rwlock_alloc(session,
+ &txn_global->nsnap_rwlock, "named snapshot lock"));
+ txn_global->nsnap_oldest_id = WT_TXN_NONE;
+ STAILQ_INIT(&txn_global->nsnaph);
+
WT_RET(__wt_calloc_def(
session, conn->session_size, &txn_global->states));
+
for (i = 0, s = txn_global->states; i < conn->session_size; i++, s++)
s->id = s->snap_min = WT_TXN_NONE;
@@ -608,15 +678,21 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
* __wt_txn_global_destroy --
* Destroy the global transaction state.
*/
-void
+int
__wt_txn_global_destroy(WT_SESSION_IMPL *session)
{
WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
WT_TXN_GLOBAL *txn_global;
conn = S2C(session);
txn_global = &conn->txn_global;
- if (txn_global != NULL)
- __wt_free(session, txn_global->states);
+ if (txn_global == NULL)
+ return (0);
+
+ WT_TRET(__wt_rwlock_destroy(session, &txn_global->nsnap_rwlock));
+ __wt_free(session, txn_global->states);
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 60c427c56b6..d8032b49b17 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -352,14 +352,14 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
const char *txn_cfg[] = { WT_CONFIG_BASE(session,
WT_SESSION_begin_transaction), "isolation=snapshot", NULL };
void *saved_meta_next;
- int full, logging, tracking;
+ int full, idle, logging, tracking;
u_int i;
conn = S2C(session);
txn_global = &conn->txn_global;
saved_isolation = session->isolation;
txn = &session->txn;
- full = logging = tracking = 0;
+ full = idle = logging = tracking = 0;
/* Ensure the metadata table is open before taking any locks. */
WT_RET(__wt_metadata_open(session));
@@ -499,6 +499,10 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_txn_commit(session, NULL));
/*
+ * If any tree was dirty, we will have updated the metadata with the
+ * new checkpoint information. If the metadata is clean, all other
+ * trees must have been clean.
+ *
* Disable metadata tracking during the metadata checkpoint.
*
* We don't lock old checkpoints in the metadata file: there is no way
@@ -553,11 +557,19 @@ err: /*
txn_global->checkpoint_id = WT_TXN_NONE;
txn_global->checkpoint_snap_min = WT_TXN_NONE;
- /* Tell logging that we have finished a database checkpoint. */
- if (logging)
+ /*
+ * Tell logging that we have finished a database checkpoint. Do not
+ * write a log record if the database was idle.
+ */
+ if (logging) {
+ if (ret == 0 && full &&
+ F_ISSET((WT_BTREE *)session->meta_dhandle->handle,
+ WT_BTREE_SKIP_CKPT))
+ idle = 1;
WT_TRET(__wt_txn_checkpoint_log(session, full,
- (ret == 0) ? WT_TXN_LOG_CKPT_STOP : WT_TXN_LOG_CKPT_FAIL,
- NULL));
+ (ret == 0 && !idle) ?
+ WT_TXN_LOG_CKPT_STOP : WT_TXN_LOG_CKPT_CLEANUP, NULL));
+ }
for (i = 0; i < session->ckpt_handle_next; ++i) {
if (session->ckpt_handle[i].dhandle == NULL) {
@@ -577,7 +589,6 @@ err: /*
}
session->isolation = txn->isolation = saved_isolation;
-
return (ret);
}
@@ -788,6 +799,7 @@ __checkpoint_worker(
* means it must exist.
*/
force = 0;
+ F_CLR(btree, WT_BTREE_SKIP_CKPT);
if (!btree->modified && cfg != NULL) {
ret = __wt_config_gets(session, cfg, "force", &cval);
if (ret != 0 && ret != WT_NOTFOUND)
@@ -796,8 +808,10 @@ __checkpoint_worker(
force = 1;
}
if (!btree->modified && !force) {
- if (!is_checkpoint)
+ if (!is_checkpoint) {
+ F_SET(btree, WT_BTREE_SKIP_CKPT);
goto done;
+ }
deleted = 0;
WT_CKPT_FOREACH(ckptbase, ckpt)
@@ -815,8 +829,10 @@ __checkpoint_worker(
(strcmp(name, (ckpt - 1)->name) == 0 ||
(WT_PREFIX_MATCH(name, WT_CHECKPOINT) &&
WT_PREFIX_MATCH((ckpt - 1)->name, WT_CHECKPOINT))) &&
- deleted < 2)
+ deleted < 2) {
+ F_SET(btree, WT_BTREE_SKIP_CKPT);
goto done;
+ }
}
/* Add a new checkpoint entry at the end of the list. */
diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c
index 9b6a3d46014..63e4c50aff5 100644
--- a/src/third_party/wiredtiger/src/txn/txn_log.c
+++ b/src/third_party/wiredtiger/src/txn/txn_log.c
@@ -70,21 +70,21 @@ __txn_commit_printlog(
int firstrecord;
firstrecord = 1;
- fprintf(out, " \"ops\": [\n");
+ WT_RET(__wt_fprintf(out, " \"ops\": [\n"));
/* The logging subsystem zero-pads records. */
while (*pp < end && **pp) {
if (!firstrecord)
- fprintf(out, ",\n");
- fprintf(out, " {");
+ WT_RET(__wt_fprintf(out, ",\n"));
+ WT_RET(__wt_fprintf(out, " {"));
firstrecord = 0;
WT_RET(__wt_txn_op_printlog(session, pp, end, out));
- fprintf(out, "\n }");
+ WT_RET(__wt_fprintf(out, "\n }"));
}
- fprintf(out, "\n ]\n");
+ WT_RET(__wt_fprintf(out, "\n ]\n"));
return (0);
}
@@ -155,12 +155,12 @@ __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
WT_TXN *txn;
WT_TXN_OP *op;
+ txn = &session->txn;
+
if (!FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED) ||
F_ISSET(session, WT_SESSION_NO_LOGGING))
return (0);
- txn = &session->txn;
-
/* We'd better have a transaction. */
WT_ASSERT(session,
F_ISSET(txn, WT_TXN_RUNNING) && F_ISSET(txn, WT_TXN_HAS_ID));
@@ -303,6 +303,12 @@ __wt_txn_checkpoint_log(
case WT_TXN_LOG_CKPT_PREPARE:
txn->full_ckpt = 1;
*ckpt_lsn = S2C(session)->log->write_start_lsn;
+ /*
+ * We need to make sure that the log records in the checkpoint
+ * LSN are on disk. In particular to make sure that the
+ * current log file exists.
+ */
+ WT_ERR(__wt_log_force_sync(session, ckpt_lsn));
break;
case WT_TXN_LOG_CKPT_START:
@@ -355,7 +361,7 @@ __wt_txn_checkpoint_log(
WT_ERR(__wt_log_ckpt(session, ckpt_lsn));
/* FALLTHROUGH */
- case WT_TXN_LOG_CKPT_FAIL:
+ case WT_TXN_LOG_CKPT_CLEANUP:
/* Cleanup any allocated resources */
WT_INIT_LSN(ckpt_lsn);
txn->ckpt_nsnapshot = 0;
@@ -456,7 +462,7 @@ __txn_printlog(WT_SESSION_IMPL *session,
WT_UNUSED(next_lsnp);
out = cookie;
- p = LOG_SKIP_HEADER(rawrec->data);
+ p = WT_LOG_SKIP_HEADER(rawrec->data);
end = (const uint8_t *)rawrec->data + rawrec->size;
logrec = (WT_LOG_RECORD *)rawrec->data;
compressed = F_ISSET(logrec, WT_LOG_RECORD_COMPRESSED);
@@ -465,57 +471,56 @@ __txn_printlog(WT_SESSION_IMPL *session,
WT_RET(__wt_logrec_read(session, &p, end, &rectype));
if (!firstrecord)
- fprintf(out, ",\n");
-
- if (fprintf(out, " { \"lsn\" : [%" PRIu32 ",%" PRId64 "],\n",
- lsnp->file, lsnp->offset) < 0 ||
- fprintf(out, " \"hdr_flags\" : \"%s\",\n",
- compressed ? "compressed" : "") < 0 ||
- fprintf(out, " \"rec_len\" : %" PRIu32 ",\n", logrec->len) < 0 ||
- fprintf(out, " \"mem_len\" : %" PRIu32 ",\n",
- compressed ? logrec->mem_len : logrec->len) < 0)
- return (errno);
+ WT_RET(__wt_fprintf(out, ",\n"));
+
+ WT_RET(__wt_fprintf(out,
+ " { \"lsn\" : [%" PRIu32 ",%" PRId64 "],\n",
+ lsnp->file, lsnp->offset));
+ WT_RET(__wt_fprintf(out,
+ " \"hdr_flags\" : \"%s\",\n", compressed ? "compressed" : ""));
+ WT_RET(__wt_fprintf(out,
+ " \"rec_len\" : %" PRIu32 ",\n", logrec->len));
+ WT_RET(__wt_fprintf(out,
+ " \"mem_len\" : %" PRIu32 ",\n",
+ compressed ? logrec->mem_len : logrec->len));
switch (rectype) {
case WT_LOGREC_CHECKPOINT:
WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p),
WT_UNCHECKED_STRING(IQ), &ckpt_lsn.file, &ckpt_lsn.offset));
- if (fprintf(out, " \"type\" : \"checkpoint\",\n") < 0 ||
- fprintf(
- out, " \"ckpt_lsn\" : [%" PRIu32 ",%" PRId64 "]\n",
- ckpt_lsn.file, ckpt_lsn.offset) < 0)
- return (errno);
+ WT_RET(__wt_fprintf(out, " \"type\" : \"checkpoint\",\n"));
+ WT_RET(__wt_fprintf(out,
+ " \"ckpt_lsn\" : [%" PRIu32 ",%" PRId64 "]\n",
+ ckpt_lsn.file, ckpt_lsn.offset));
break;
case WT_LOGREC_COMMIT:
WT_RET(__wt_vunpack_uint(&p, WT_PTRDIFF(end, p), &txnid));
- if (fprintf(out, " \"type\" : \"commit\",\n") < 0 ||
- fprintf(out, " \"txnid\" : %" PRIu64 ",\n", txnid) < 0)
- return (errno);
+ WT_RET(__wt_fprintf(out, " \"type\" : \"commit\",\n"));
+ WT_RET(__wt_fprintf(out,
+ " \"txnid\" : %" PRIu64 ",\n", txnid));
WT_RET(__txn_commit_printlog(session, &p, end, out));
break;
case WT_LOGREC_FILE_SYNC:
WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p),
WT_UNCHECKED_STRING(Ii), &fileid, &start));
- if (fprintf(out, " \"type\" : \"file_sync\",\n") < 0 ||
- fprintf(out, " \"fileid\" : %" PRIu32 ",\n",
- fileid) < 0 ||
- fprintf(out, " \"start\" : %" PRId32 "\n", start) < 0)
- return (errno);
+ WT_RET(__wt_fprintf(out, " \"type\" : \"file_sync\",\n"));
+ WT_RET(__wt_fprintf(out,
+ " \"fileid\" : %" PRIu32 ",\n", fileid));
+ WT_RET(__wt_fprintf(out,
+ " \"start\" : %" PRId32 "\n", start));
break;
case WT_LOGREC_MESSAGE:
WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p),
WT_UNCHECKED_STRING(S), &msg));
- if (fprintf(out, " \"type\" : \"message\",\n") < 0 ||
- fprintf(out, " \"message\" : \"%s\"\n", msg) < 0)
- return (errno);
+ WT_RET(__wt_fprintf(out, " \"type\" : \"message\",\n"));
+ WT_RET(__wt_fprintf(out, " \"message\" : \"%s\"\n", msg));
break;
}
- if (fprintf(out, " }") < 0)
- return (errno);
+ WT_RET(__wt_fprintf(out, " }"));
return (0);
}
@@ -531,12 +536,10 @@ __wt_txn_printlog(WT_SESSION *wt_session, FILE *out)
session = (WT_SESSION_IMPL *)wt_session;
- if (fprintf(out, "[\n") < 0)
- return (errno);
+ WT_RET(__wt_fprintf(out, "[\n"));
WT_RET(__wt_log_scan(
session, NULL, WT_LOGSCAN_FIRST, __txn_printlog, out));
- if (fprintf(out, "\n]\n") < 0)
- return (errno);
+ WT_RET(__wt_fprintf(out, "\n]\n"));
return (0);
}
diff --git a/src/third_party/wiredtiger/src/txn/txn_nsnap.c b/src/third_party/wiredtiger/src/txn/txn_nsnap.c
new file mode 100644
index 00000000000..7997d707de5
--- /dev/null
+++ b/src/third_party/wiredtiger/src/txn/txn_nsnap.c
@@ -0,0 +1,369 @@
+/*-
+ * Copyright (c) 2014-2015 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __nsnap_destroy --
+ * Destroy a named snapshot structure.
+ */
+static void
+__nsnap_destroy(WT_SESSION_IMPL *session, WT_NAMED_SNAPSHOT *nsnap)
+{
+ __wt_free(session, nsnap->name);
+ __wt_free(session, nsnap->snapshot);
+ __wt_free(session, nsnap);
+}
+
+/*
+ * __nsnap_drop_one --
+ * Drop a single named snapshot. The named snapshot lock must be held
+ * write locked.
+ */
+static int
+__nsnap_drop_one(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *name)
+{
+ WT_DECL_RET;
+ WT_NAMED_SNAPSHOT *found;
+ WT_TXN_GLOBAL *txn_global;
+
+ txn_global = &S2C(session)->txn_global;
+
+ STAILQ_FOREACH(found, &txn_global->nsnaph, q)
+ if (WT_STRING_MATCH(found->name, name->str, name->len))
+ break;
+
+ if (found == NULL)
+ return (WT_NOTFOUND);
+
+ /* Bump the global ID if we are removing the first entry */
+ if (found == STAILQ_FIRST(&txn_global->nsnaph))
+ txn_global->nsnap_oldest_id = (STAILQ_NEXT(found, q) != NULL) ?
+ STAILQ_NEXT(found, q)->snap_min : WT_TXN_NONE;
+ STAILQ_REMOVE(&txn_global->nsnaph, found, __wt_named_snapshot, q);
+ __nsnap_destroy(session, found);
+
+ return (ret);
+}
+
+/*
+ * __nsnap_drop_to --
+ * Drop named snapshots, if the name is NULL all snapshots will be
+ * dropped. The named snapshot lock must be held write locked.
+ */
+static int
+__nsnap_drop_to(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *name, int inclusive)
+{
+ WT_DECL_RET;
+ WT_NAMED_SNAPSHOT *last, *nsnap, *prev;
+ WT_TXN_GLOBAL *txn_global;
+ uint64_t new_nsnap_oldest;
+
+ last = nsnap = prev = NULL;
+ txn_global = &S2C(session)->txn_global;
+
+ if (STAILQ_EMPTY(&txn_global->nsnaph)) {
+ if (name == NULL)
+ return (0);
+ /*
+ * Dropping specific snapshots when there aren't any it's an
+ * error.
+ */
+ WT_RET_MSG(session, EINVAL,
+ "Named snapshot '%.*s' for drop not found",
+ (int)name->len, name->str);
+ }
+
+ /*
+ * The new ID will be none if we are removing all named snapshots
+ * which is the default behavior of this loop.
+ */
+ new_nsnap_oldest = WT_TXN_NONE;
+ if (name != NULL) {
+ STAILQ_FOREACH(last, &txn_global->nsnaph, q) {
+ if (WT_STRING_MATCH(last->name, name->str, name->len))
+ break;
+ prev = last;
+ }
+ if (last == NULL)
+ WT_RET_MSG(session, EINVAL,
+ "Named snapshot '%.*s' for drop not found",
+ (int)name->len, name->str);
+
+ if (!inclusive) {
+ /* We are done if a drop before points to the head */
+ if (prev == 0)
+ return (0);
+ last = prev;
+ }
+
+ if (STAILQ_NEXT(last, q) != NULL)
+ new_nsnap_oldest = STAILQ_NEXT(last, q)->snap_min;
+ }
+
+ do {
+ nsnap = STAILQ_FIRST(&txn_global->nsnaph);
+ WT_ASSERT(session, nsnap != NULL);
+ STAILQ_REMOVE_HEAD(&txn_global->nsnaph, q);
+ __nsnap_destroy(session, nsnap);
+ /* Last will be NULL in the all case so it will never match */
+ } while (nsnap != last && !STAILQ_EMPTY(&txn_global->nsnaph));
+
+ /* Now that the queue of named snapshots is updated, update the ID */
+ txn_global->nsnap_oldest_id = new_nsnap_oldest;
+
+ return (ret);
+}
+
+/*
+ * __wt_txn_named_snapshot_begin --
+ * Begin an named in-memory snapshot.
+ */
+int
+__wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[])
+{
+ WT_CONFIG_ITEM cval;
+ WT_DECL_RET;
+ WT_NAMED_SNAPSHOT *nsnap, *nsnap_new;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ const char *txn_cfg[] =
+ { WT_CONFIG_BASE(session, WT_SESSION_begin_transaction),
+ "isolation=snapshot", NULL };
+ int started_txn;
+
+ started_txn = 0;
+ nsnap_new = NULL;
+ txn_global = &S2C(session)->txn_global;
+ txn = &session->txn;
+
+ WT_RET(__wt_config_gets_def(session, cfg, "name", 0, &cval));
+ WT_ASSERT(session, cval.len != 0);
+
+ if (!F_ISSET(txn, WT_TXN_RUNNING)) {
+ WT_RET(__wt_txn_begin(session, txn_cfg));
+ started_txn = 1;
+ }
+ F_SET(txn, WT_TXN_READONLY);
+
+ /* Save a copy of the transaction's snapshot. */
+ WT_ERR(__wt_calloc_one(session, &nsnap_new));
+ nsnap = nsnap_new;
+ WT_ERR(__wt_strndup(session, cval.str, cval.len, &nsnap->name));
+ nsnap->snap_min = txn->snap_min;
+ nsnap->snap_max = txn->snap_max;
+ if (txn->snapshot_count > 0) {
+ WT_ERR(__wt_calloc_def(
+ session, txn->snapshot_count, &nsnap->snapshot));
+ memcpy(nsnap->snapshot, txn->snapshot,
+ txn->snapshot_count * sizeof(*nsnap->snapshot));
+ }
+ nsnap->snapshot_count = txn->snapshot_count;
+
+ /* Update the list. */
+
+ /*
+ * The semantic is that a new snapshot with the same name as an
+ * existing snapshot will replace the old one.
+ */
+ WT_ERR_NOTFOUND_OK(__nsnap_drop_one(session, &cval));
+
+ if (STAILQ_EMPTY(&txn_global->nsnaph))
+ txn_global->nsnap_oldest_id = nsnap_new->snap_min;
+ STAILQ_INSERT_TAIL(&txn_global->nsnaph, nsnap_new, q);
+ nsnap_new = NULL;
+
+err: if (started_txn)
+ WT_TRET(__wt_txn_rollback(session, NULL));
+ else if (ret == 0)
+ F_SET(txn, WT_TXN_NAMED_SNAPSHOT);
+
+ if (nsnap_new != NULL)
+ __nsnap_destroy(session, nsnap_new);
+
+ return (ret);
+}
+
+/*
+ * __wt_txn_named_snapshot_drop --
+ * Drop named snapshots
+ */
+int
+__wt_txn_named_snapshot_drop(WT_SESSION_IMPL *session, const char *cfg[])
+{
+ WT_CONFIG objectconf;
+ WT_CONFIG_ITEM all_config, k, names_config, to_config, before_config, v;
+ WT_DECL_RET;
+
+ WT_RET(__wt_config_gets_def(session, cfg, "drop.all", 0, &all_config));
+ WT_RET(__wt_config_gets_def(
+ session, cfg, "drop.names", 0, &names_config));
+ WT_RET(__wt_config_gets_def(session, cfg, "drop.to", 0, &to_config));
+ WT_RET(__wt_config_gets_def(
+ session, cfg, "drop.before", 0, &before_config));
+
+ if (all_config.val != 0)
+ WT_RET(__nsnap_drop_to(session, NULL, 1));
+ else if (before_config.len != 0)
+ WT_RET(__nsnap_drop_to(session, &before_config, 0));
+ else if (to_config.len != 0)
+ WT_RET(__nsnap_drop_to(session, &to_config, 1));
+
+ /* We are done if there are no named drops */
+
+ if (names_config.len != 0) {
+ WT_RET(__wt_config_subinit(
+ session, &objectconf, &names_config));
+ while ((ret = __wt_config_next(&objectconf, &k, &v)) == 0) {
+ ret = __nsnap_drop_one(session, &k);
+ if (ret != 0)
+ WT_RET_MSG(session, EINVAL,
+ "Named snapshot '%.*s' for drop not found",
+ (int)k.len, k.str);
+ }
+ if (ret == WT_NOTFOUND)
+ ret = 0;
+ }
+
+ return (ret);
+}
+
+/*
+ * __wt_txn_named_snapshot_get --
+ * Lookup a named snapshot for a transaction.
+ */
+int
+__wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval)
+{
+ WT_NAMED_SNAPSHOT *nsnap;
+ WT_TXN *txn;
+ WT_TXN_GLOBAL *txn_global;
+ WT_TXN_STATE *txn_state;
+
+ txn = &session->txn;
+ txn_global = &S2C(session)->txn_global;
+ txn_state = &S2C(session)->txn_global.states[session->id];
+
+ txn->isolation = WT_ISO_SNAPSHOT;
+ if (session->ncursors > 0)
+ WT_RET(__wt_session_copy_values(session));
+
+ WT_RET(__wt_readlock(session, txn_global->nsnap_rwlock));
+ STAILQ_FOREACH(nsnap, &txn_global->nsnaph, q)
+ if (WT_STRING_MATCH(nsnap->name, nameval->str, nameval->len)) {
+ txn->snap_min = txn_state->snap_min = nsnap->snap_min;
+ txn->snap_max = nsnap->snap_max;
+ if ((txn->snapshot_count = nsnap->snapshot_count) != 0)
+ memcpy(txn->snapshot, nsnap->snapshot,
+ nsnap->snapshot_count *
+ sizeof(*nsnap->snapshot));
+ F_SET(txn, WT_TXN_HAS_SNAPSHOT);
+ break;
+ }
+ WT_RET(__wt_readunlock(session, txn_global->nsnap_rwlock));
+
+ if (nsnap == NULL)
+ WT_RET_MSG(session, EINVAL,
+ "Named snapshot '%.*s' not found",
+ (int)nameval->len, nameval->str);
+
+ /* Flag that this transaction is opened on a named snapshot */
+ F_SET(txn, WT_TXN_NAMED_SNAPSHOT);
+
+ return (0);
+}
+
+/*
+ * __wt_txn_named_snapshot_config --
+ * Check the configuration for a named snapshot
+ */
+int
+__wt_txn_named_snapshot_config(WT_SESSION_IMPL *session,
+ const char *cfg[], int *has_create, int *has_drops)
+{
+ WT_CONFIG_ITEM cval;
+ WT_CONFIG_ITEM all_config, names_config, to_config, before_config;
+ WT_TXN *txn;
+
+ txn = &session->txn;
+ *has_create = *has_drops = 0;
+
+ /* Verify that the name is legal. */
+ WT_RET(__wt_config_gets_def(session, cfg, "name", 0, &cval));
+ if (cval.len != 0) {
+ if (WT_STRING_MATCH("all", cval.str, cval.len))
+ WT_RET_MSG(session, EINVAL,
+ "Can't create snapshot with reserved \"all\" name");
+
+ WT_RET(__wt_name_check(session, cval.str, cval.len));
+
+ if (F_ISSET(txn, WT_TXN_RUNNING) &&
+ txn->isolation != WT_ISO_SNAPSHOT)
+ WT_RET_MSG(session, EINVAL,
+ "Can't create a named snapshot from a running "
+ "transaction that isn't snapshot isolation");
+ else if (F_ISSET(txn, WT_TXN_RUNNING) && txn->mod_count != 0)
+ WT_RET_MSG(session, EINVAL,
+ "Can't create a named snapshot from a running "
+ "transaction that has made updates");
+ *has_create = 1;
+ }
+
+ /* Verify that the drop configuration is sane. */
+ WT_RET(__wt_config_gets_def(session, cfg, "drop.all", 0, &all_config));
+ WT_RET(__wt_config_gets_def(
+ session, cfg, "drop.names", 0, &names_config));
+ WT_RET(__wt_config_gets_def(session, cfg, "drop.to", 0, &to_config));
+ WT_RET(__wt_config_gets_def(
+ session, cfg, "drop.before", 0, &before_config));
+
+ /* Avoid more work if no drops are configured. */
+ if (all_config.val != 0 || names_config.len != 0 ||
+ before_config.len != 0 || to_config.len != 0) {
+ if (before_config.len != 0 && to_config.len != 0)
+ WT_RET_MSG(session, EINVAL,
+ "Illegal configuration; named snapshot drop can't "
+ "specify both before and to options");
+ if (all_config.val != 0 && (names_config.len != 0 ||
+ to_config.len != 0 || before_config.len != 0))
+ WT_RET_MSG(session, EINVAL,
+ "Illegal configuration; named snapshot drop can't "
+ "specify all and any other options");
+ *has_drops = 1;
+ }
+
+ if (!*has_create && !*has_drops)
+ WT_RET_MSG(session, EINVAL,
+ "WT_SESSION::snapshot API called without any drop or "
+ "name option.");
+
+ return (0);
+}
+
+/*
+ * __wt_txn_named_snapshot_destroy --
+ * Destroy all named snapshots on connection close
+ */
+int
+__wt_txn_named_snapshot_destroy(WT_SESSION_IMPL *session)
+{
+ WT_NAMED_SNAPSHOT *nsnap;
+ WT_TXN_GLOBAL *txn_global;
+
+ txn_global = &S2C(session)->txn_global;
+ txn_global->nsnap_oldest_id = WT_TXN_NONE;
+
+ while (!STAILQ_EMPTY(&txn_global->nsnaph)) {
+ nsnap = STAILQ_FIRST(&txn_global->nsnaph);
+ WT_ASSERT(session, nsnap != NULL);
+ STAILQ_REMOVE_HEAD(&txn_global->nsnaph, q);
+ __nsnap_destroy(session, nsnap);
+ }
+
+ return (0);
+}
diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c
index f11b585da1c..0eadcbf3b01 100644
--- a/src/third_party/wiredtiger/src/txn/txn_recover.c
+++ b/src/third_party/wiredtiger/src/txn/txn_recover.c
@@ -273,7 +273,7 @@ __txn_log_recover(WT_SESSION_IMPL *session,
WT_UNUSED(next_lsnp);
r = cookie;
- p = LOG_SKIP_HEADER(logrec->data);
+ p = WT_LOG_SKIP_HEADER(logrec->data);
end = (const uint8_t *)logrec->data + logrec->size;
WT_UNUSED(firstrecord);
@@ -463,8 +463,11 @@ __wt_txn_recover(WT_SESSION_IMPL *session)
* there.
*/
r.ckpt_lsn = metafile->ckpt_lsn;
- WT_ERR(__wt_log_scan(session,
- &metafile->ckpt_lsn, 0, __txn_log_recover, &r));
+ ret = __wt_log_scan(session,
+ &metafile->ckpt_lsn, 0, __txn_log_recover, &r);
+ if (ret == ENOENT)
+ ret = 0;
+ WT_ERR(ret);
}
}
@@ -502,9 +505,13 @@ __wt_txn_recover(WT_SESSION_IMPL *session)
WT_ERR(__wt_log_scan(session, NULL,
WT_LOGSCAN_FIRST | WT_LOGSCAN_RECOVER,
__txn_log_recover, &r));
- else
- WT_ERR(__wt_log_scan(session, &r.ckpt_lsn,
- WT_LOGSCAN_RECOVER, __txn_log_recover, &r));
+ else {
+ ret = __wt_log_scan(session, &r.ckpt_lsn,
+ WT_LOGSCAN_RECOVER, __txn_log_recover, &r);
+ if (ret == ENOENT)
+ ret = 0;
+ WT_ERR(ret);
+ }
conn->next_file_id = r.max_fileid;