summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--SConstruct1
-rw-r--r--bench/wtperf/config.c3
-rw-r--r--bench/wtperf/wtperf.h19
-rw-r--r--bench/wtperf/wtperf_opt.i2
-rw-r--r--build_posix/Make.subdirs1
-rw-r--r--build_posix/aclocal/options.m425
-rw-r--r--build_posix/configure.ac.in2
-rw-r--r--build_win/wiredtiger_config.h3
-rw-r--r--dist/api_data.py9
-rw-r--r--dist/extlist1
-rw-r--r--dist/s_define.list3
-rw-r--r--dist/s_string.ok2
-rw-r--r--examples/c/ex_all.c15
-rw-r--r--examples/c/ex_encrypt.c5
-rw-r--r--examples/c/ex_log.c37
-rw-r--r--examples/java/com/wiredtiger/examples/ex_all.java13
-rw-r--r--ext/compressors/bzip2/Makefile.am6
-rw-r--r--ext/compressors/bzip2/bzip2_compress.c415
-rw-r--r--ext/compressors/lz4/lz4_compress.c35
-rw-r--r--ext/compressors/snappy/snappy_compress.c39
-rw-r--r--src/block/block_map.c14
-rw-r--r--src/block/block_mgr.c129
-rw-r--r--src/block/block_open.c46
-rw-r--r--src/block/block_read.c23
-rw-r--r--src/block/block_slvg.c1
-rw-r--r--src/block/block_write.c23
-rw-r--r--src/btree/bt_handle.c4
-rw-r--r--src/btree/bt_split.c34
-rw-r--r--src/btree/bt_walk.c157
-rw-r--r--src/btree/col_srch.c6
-rw-r--r--src/btree/row_srch.c36
-rw-r--r--src/conn/conn_api.c13
-rw-r--r--src/conn/conn_dhandle.c6
-rw-r--r--src/conn/conn_log.c41
-rw-r--r--src/conn/conn_stat.c2
-rw-r--r--src/conn/conn_sweep.c8
-rw-r--r--src/cursor/cur_backup.c2
-rw-r--r--src/cursor/cur_log.c15
-rw-r--r--src/docs/build-posix.dox4
-rw-r--r--src/docs/compression.dox31
-rw-r--r--src/docs/upgrading.dox7
-rw-r--r--src/docs/wtperf.dox2
-rw-r--r--src/include/block.h46
-rw-r--r--src/include/btmem.h17
-rw-r--r--src/include/btree.i26
-rw-r--r--src/include/connection.h1
-rw-r--r--src/include/extern.h1
-rw-r--r--src/include/log.h88
-rw-r--r--src/include/log.i19
-rw-r--r--src/include/meta.h5
-rw-r--r--src/include/misc.h3
-rw-r--r--src/include/schema.h39
-rw-r--r--src/include/swap.h92
-rw-r--r--src/include/wiredtiger.in39
-rw-r--r--src/include/wt_internal.h9
-rw-r--r--src/log/log.c182
-rw-r--r--src/log/log_slot.c9
-rw-r--r--src/lsm/lsm_cursor.c2
-rw-r--r--src/lsm/lsm_stat.c2
-rw-r--r--src/lsm/lsm_tree.c20
-rw-r--r--src/lsm/lsm_work_unit.c4
-rw-r--r--src/meta/meta_ckpt.c2
-rw-r--r--src/meta/meta_table.c12
-rw-r--r--src/reconcile/rec_write.c26
-rw-r--r--src/schema/schema_drop.c2
-rw-r--r--src/schema/schema_rename.c2
-rw-r--r--src/schema/schema_worker.c4
-rw-r--r--src/session/session_dhandle.c9
-rw-r--r--src/support/crypto.c11
-rw-r--r--src/support/global.c36
-rw-r--r--src/support/hash_city.c52
-rw-r--r--src/support/hash_fnv.c1
-rw-r--r--src/txn/txn_ckpt.c8
-rw-r--r--src/txn/txn_log.c25
-rw-r--r--src/txn/txn_recover.c19
-rw-r--r--test/format/config.c87
-rw-r--r--test/format/config.h3
-rw-r--r--test/format/format.h16
-rw-r--r--test/format/t.c4
-rw-r--r--test/format/wts.c13
-rw-r--r--test/packing/intpack-test.c4
-rw-r--r--test/packing/intpack-test2.c4
-rw-r--r--test/packing/intpack-test3.c4
-rw-r--r--test/packing/packing-test.c4
-rw-r--r--test/recovery/Makefile.am14
-rw-r--r--test/recovery/random-abort.c (renamed from test/recovery/recovery.c)4
-rw-r--r--test/recovery/truncated-log.c268
-rw-r--r--test/suite/test_compress01.py1
-rw-r--r--test/suite/test_cursor08.py1
-rw-r--r--test/suite/test_encrypt01.py2
-rw-r--r--test/suite/test_txn07.py1
-rw-r--r--test/utility/test_util.i23
92 files changed, 1392 insertions, 1114 deletions
diff --git a/SConstruct b/SConstruct
index 3f395d17543..a7306262f82 100644
--- a/SConstruct
+++ b/SConstruct
@@ -456,6 +456,7 @@ if useBdb:
t = env.Program("wtperf", [
"bench/wtperf/config.c",
+ "bench/wtperf/idle_table_cycle.c",
"bench/wtperf/misc.c",
"bench/wtperf/track.c",
"bench/wtperf/wtperf.c",
diff --git a/bench/wtperf/config.c b/bench/wtperf/config.c
index d9a22f4708d..3cb20ff2b26 100644
--- a/bench/wtperf/config.c
+++ b/bench/wtperf/config.c
@@ -156,9 +156,6 @@ config_compress(CONFIG *cfg)
if (strcmp(s, "none") == 0) {
cfg->compress_ext = NULL;
cfg->compress_table = NULL;
- } else if (strcmp(s, "bzip") == 0) {
- cfg->compress_ext = BZIP_EXT;
- cfg->compress_table = BZIP_BLK;
} else if (strcmp(s, "lz4") == 0) {
cfg->compress_ext = LZ4_EXT;
cfg->compress_table = LZ4_BLK;
diff --git a/bench/wtperf/wtperf.h b/bench/wtperf/wtperf.h
index 7dbe1822a26..929880b0aef 100644
--- a/bench/wtperf/wtperf.h
+++ b/bench/wtperf/wtperf.h
@@ -29,6 +29,8 @@
#ifndef HAVE_WTPERF_H
#define HAVE_WTPERF_H
+#include <wt_internal.h>
+
#ifndef _WIN32
#include <sys/time.h>
#endif
@@ -56,8 +58,6 @@
#include <unistd.h>
#endif
-#include <wt_internal.h>
-
#ifdef _WIN32
#include "windows_shim.h"
#endif
@@ -73,9 +73,6 @@ typedef struct __truncate_queue_entry TRUNCATE_QUEUE_ENTRY;
#define EXTPATH "../../ext/compressors/" /* Extensions path */
#define BLKCMP_PFX ",block_compressor="
-#define BZIP_BLK BLKCMP_PFX "bzip2"
-#define BZIP_EXT \
- EXT_PFX EXTPATH "bzip2/.libs/libwiredtiger_bzip2.so" EXT_SFX
#define LZ4_BLK BLKCMP_PFX "lz4"
#define LZ4_EXT \
EXT_PFX EXTPATH "lz4/.libs/libwiredtiger_lz4.so" EXT_SFX
@@ -91,7 +88,7 @@ typedef struct {
int64_t insert; /* Insert ratio */
int64_t read; /* Read ratio */
int64_t update; /* Update ratio */
- uint64_t throttle; /* Maximum operations/second */
+ uint64_t throttle; /* Maximum operations/second */
/* Number of operations per transaction. Zero for autocommit */
int64_t ops_per_txn;
int64_t truncate; /* Truncate ratio */
@@ -406,16 +403,18 @@ dstrdup(const char *str)
/*
* dstrndup --
- * Call strndup, dying on failure.
+ * Call emulating strndup, dying on failure. Don't use actual strndup here
+ * as it is not supported within MSVC.
*/
static inline char *
dstrndup(const char *str, const size_t len)
{
char *p;
+ p = dcalloc(len + 1, 1);
- if ((p = strndup(str, len)) == NULL)
- die(errno, "strndup");
+ strncpy(p, str, len);
+ if (p == NULL)
+ die(errno, "dstrndup");
return (p);
}
-
#endif
diff --git a/bench/wtperf/wtperf_opt.i b/bench/wtperf/wtperf_opt.i
index 6dc2a6d5569..60bbaff56e5 100644
--- a/bench/wtperf/wtperf_opt.i
+++ b/bench/wtperf/wtperf_opt.i
@@ -97,7 +97,7 @@ DEF_OPT_AS_CONFIG_STRING(conn_config, "create",
DEF_OPT_AS_BOOL(compact, 0, "post-populate compact for LSM merging activity")
DEF_OPT_AS_STRING(compression, "none",
"compression extension. Allowed configuration values are: "
- "'none', 'bzip', 'lz4', 'snappy', 'zlib'")
+ "'none', 'lz4', 'snappy', 'zlib'")
DEF_OPT_AS_BOOL(create, 1,
"do population phase; false to use existing database")
DEF_OPT_AS_UINT32(database_count, 1,
diff --git a/build_posix/Make.subdirs b/build_posix/Make.subdirs
index e2f128a48df..e1f8a05c613 100644
--- a/build_posix/Make.subdirs
+++ b/build_posix/Make.subdirs
@@ -6,7 +6,6 @@
# If the directory exists, it is added to AUTO_SUBDIRS.
# If a condition is included, the subdir is made conditional via AM_CONDITIONAL
ext/collators/reverse
-ext/compressors/bzip2 BZIP2
ext/compressors/lz4 LZ4
ext/compressors/nop
ext/compressors/snappy SNAPPY
diff --git a/build_posix/aclocal/options.m4 b/build_posix/aclocal/options.m4
index 01d08ce3d16..0fb49dbf1df 100644
--- a/build_posix/aclocal/options.m4
+++ b/build_posix/aclocal/options.m4
@@ -47,23 +47,6 @@ AM_CONDITIONAL([HAVE_BUILTIN_EXTENSION_ZLIB],
[test "$wt_cv_with_builtin_extension_zlib" = "yes"])
AC_MSG_RESULT($with_builtins)
-AC_MSG_CHECKING(if --enable-bzip2 option specified)
-AC_ARG_ENABLE(bzip2,
- [AS_HELP_STRING([--enable-bzip2],
- [Build the bzip2 compressor extension.])], r=$enableval, r=no)
-case "$r" in
-no) wt_cv_enable_bzip2=no;;
-*) wt_cv_enable_bzip2=yes;;
-esac
-AC_MSG_RESULT($wt_cv_enable_bzip2)
-if test "$wt_cv_enable_bzip2" = "yes"; then
- AC_CHECK_HEADER(bzlib.h,,
- [AC_MSG_ERROR([--enable-bzip2 requires bzlib.h])])
- AC_CHECK_LIB(bz2, BZ2_bzCompress,,
- [AC_MSG_ERROR([--enable-bzip2 requires bz2 library])])
-fi
-AM_CONDITIONAL([BZIP2], [test "$wt_cv_enable_bzip2" = "yes"])
-
AH_TEMPLATE(HAVE_DIAGNOSTIC, [Define to 1 for diagnostic tests.])
AC_MSG_CHECKING(if --enable-diagnostic option specified)
AC_ARG_ENABLE(diagnostic,
@@ -165,10 +148,8 @@ no) if test "$wt_cv_with_builtin_extension_snappy" = "yes"; then
esac
AC_MSG_RESULT($wt_cv_enable_snappy)
if test "$wt_cv_enable_snappy" = "yes"; then
- AC_LANG_PUSH([C++])
- AC_CHECK_HEADER(snappy.h,,
+ AC_CHECK_HEADER(snappy-c.h,,
[AC_MSG_ERROR([--enable-snappy requires snappy.h])])
- AC_LANG_POP([C++])
AC_CHECK_LIB(snappy, snappy_compress,,
[AC_MSG_ERROR([--enable-snappy requires snappy library])])
fi
@@ -193,10 +174,8 @@ no) if test "$wt_cv_with_builtin_extension_lz4" = "yes"; then
esac
AC_MSG_RESULT($wt_cv_enable_lz4)
if test "$wt_cv_enable_lz4" = "yes"; then
- AC_LANG_PUSH([C++])
AC_CHECK_HEADER(lz4.h,,
[AC_MSG_ERROR([--enable-lz4 requires lz4.h])])
- AC_LANG_POP([C++])
AC_CHECK_LIB(lz4, LZ4_compress_destSize,,
[AC_MSG_ERROR([--enable-lz4 requires lz4 library with LZ4_compress_destSize support])])
fi
@@ -212,10 +191,8 @@ no) wt_cv_enable_tcmalloc=no;;
esac
AC_MSG_RESULT($wt_cv_enable_tcmalloc)
if test "$wt_cv_enable_tcmalloc" = "yes"; then
- AC_LANG_PUSH([C++])
AC_CHECK_HEADER(gperftools/tcmalloc.h,,
[AC_MSG_ERROR([--enable-tcmalloc requires gperftools/tcmalloc.h])])
- AC_LANG_POP([C++])
AC_CHECK_LIB(tcmalloc, tc_calloc,,
[AC_MSG_ERROR([--enable-tcmalloc requires tcmalloc library])])
fi
diff --git a/build_posix/configure.ac.in b/build_posix/configure.ac.in
index 5949fb0509c..875c8b436a8 100644
--- a/build_posix/configure.ac.in
+++ b/build_posix/configure.ac.in
@@ -31,7 +31,7 @@ LT_INIT([pic-only])
AC_SUBST([LIBTOOL_DEPS])
AC_PROG_CC(cc gcc)
-# AC_PROG_CXX(c++ g++)
+AC_PROG_CXX(c++ g++)
if test "$GCC" = "yes"; then
# The Solaris gcc compiler gets the additional -pthreads flag.
diff --git a/build_win/wiredtiger_config.h b/build_win/wiredtiger_config.h
index d1ed307cb85..33dbc9b724d 100644
--- a/build_win/wiredtiger_config.h
+++ b/build_win/wiredtiger_config.h
@@ -52,9 +52,6 @@
/* Define to 1 if you have the <inttypes.h> header file. */
#define HAVE_INTTYPES_H 1
-/* Define to 1 if you have the `bz2' library (-lbz2). */
-/* #undef HAVE_LIBBZ2 */
-
/* Define to 1 if you have the `dl' library (-ldl). */
/* #undef HAVE_LIBDL */
diff --git a/dist/api_data.py b/dist/api_data.py
index e4055112d5e..c386c0b345d 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -136,8 +136,8 @@ file_config = format_meta + [
configure a compressor for file blocks. Permitted values are \c "none"
or custom compression engine name created with
WT_CONNECTION::add_compressor. If WiredTiger has builtin support for
- \c "bzip2", \c "snappy", \c "lz4" or \c "zlib" compression, these names
- are also available. See @ref compression for more information'''),
+ \c "snappy", \c "lz4" or \c "zlib" compression, these names are also
+ available. See @ref compression for more information'''),
Config('cache_resident', 'false', r'''
do not ever evict the object's pages from cache. Not compatible with
LSM tables; see @ref tuning_cache_resident for more information''',
@@ -422,9 +422,8 @@ connection_runtime_config = [
configure a compressor for log records. Permitted values are
\c "none" or custom compression engine name created with
WT_CONNECTION::add_compressor. If WiredTiger has builtin support
- for \c "bzip2", \c "snappy", \c "lz4" or \c "zlib" compression,
- these names are also available. See @ref compression for more
- information'''),
+ for \c "snappy", \c "lz4" or \c "zlib" compression, these names
+ are also available. See @ref compression for more information'''),
Config('enabled', 'false', r'''
enable logging subsystem''',
type='boolean'),
diff --git a/dist/extlist b/dist/extlist
index 874d21289d2..a5515642d48 100644
--- a/dist/extlist
+++ b/dist/extlist
@@ -2,7 +2,6 @@
# List of extension source files for WiredTiger library.
ext/collators/reverse/reverse_collator.c
-ext/compressors/bzip2/bzip2_compress.c
ext/compressors/nop/nop_compress.c
ext/compressors/snappy/snappy_compress.c
ext/compressors/zlib/zlib_compress.c
diff --git a/dist/s_define.list b/dist/s_define.list
index 8b0d9a0bdcd..e3f0dc7f181 100644
--- a/dist/s_define.list
+++ b/dist/s_define.list
@@ -52,6 +52,9 @@ WT_STAT_WRITE
WT_TIMEDIFF_US
WT_TRET_ERROR_OK
WT_WITH_LOCK
+WT_WITH_LOCK_WAIT
__F
__WIREDTIGER_EXT_H_
__WIREDTIGER_H_
+__WT_INTERNAL_H
+__wt_bswap16
diff --git a/dist/s_string.ok b/dist/s_string.ok
index b2d5a453441..19fa27cd719 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -396,6 +396,7 @@ bool
boolean
br
breakpoint
+bswap
bt
btcur
btmem
@@ -409,6 +410,7 @@ builtin
builtins
bytelock
bytestring
+byteswap
byvalue
bzCompressEnd
bzCompressInit
diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c
index f7ee857a7c7..418c99ad6a3 100644
--- a/examples/c/ex_all.c
+++ b/examples/c/ex_all.c
@@ -591,13 +591,6 @@ session_ops(WT_SESSION *session)
* the code snippets, use #ifdef's to avoid running it.
*/
#ifdef MIGHT_NOT_RUN
- /*! [Create a bzip2 compressed table] */
- ret = session->create(session,
- "table:mytable",
- "block_compressor=bzip2,key_format=S,value_format=S");
- /*! [Create a bzip2 compressed table] */
- ret = session->drop(session, "table:mytable", NULL);
-
/*! [Create a lz4 compressed table] */
ret = session->create(session,
"table:mytable",
@@ -1084,14 +1077,6 @@ main(void)
* be installed, causing the open to fail. The documentation requires
* the code snippets, use #ifdef's to avoid running it.
*/
- /*! [Configure bzip2 extension] */
- ret = wiredtiger_open(home, NULL,
- "create,"
- "extensions=[/usr/local/lib/libwiredtiger_bzip2.so]", &conn);
- /*! [Configure bzip2 extension] */
- if (ret == 0)
- (void)conn->close(conn, NULL);
-
/*! [Configure lz4 extension] */
ret = wiredtiger_open(home, NULL,
"create,"
diff --git a/examples/c/ex_encrypt.c b/examples/c/ex_encrypt.c
index 425ee6b7287..c53a61c92ea 100644
--- a/examples/c/ex_encrypt.c
+++ b/examples/c/ex_encrypt.c
@@ -389,16 +389,15 @@ simple_walk_log(WT_SESSION *session)
{
WT_CURSOR *cursor;
WT_ITEM logrec_key, logrec_value;
- WT_LSN lsn;
uint64_t txnid;
- uint32_t fileid, opcount, optype, rectype;
+ uint32_t fileid, log_file, log_offset, opcount, optype, rectype;
int found, ret;
ret = session->open_cursor(session, "log:", NULL, NULL, &cursor);
found = 0;
while ((ret = cursor->next(cursor)) == 0) {
- ret = cursor->get_key(cursor, &lsn.file, &lsn.offset, &opcount);
+ ret = cursor->get_key(cursor, &log_file, &log_offset, &opcount);
ret = cursor->get_value(cursor, &txnid,
&rectype, &optype, &fileid, &logrec_key, &logrec_value);
diff --git a/examples/c/ex_log.c b/examples/c/ex_log.c
index cc6a3c46b93..78bd7e683cf 100644
--- a/examples/c/ex_log.c
+++ b/examples/c/ex_log.c
@@ -108,15 +108,15 @@ compare_tables(WT_SESSION *session, WT_SESSION *sess_copy)
/*! [log cursor walk] */
static void
-print_record(WT_LSN *lsn, uint32_t opcount,
+print_record(uint32_t log_file, uint32_t log_offset, uint32_t opcount,
uint32_t rectype, uint32_t optype, uint64_t txnid, uint32_t fileid,
WT_ITEM *key, WT_ITEM *value)
{
printf(
- "LSN [%" PRIu32 "][%" PRIu64 "].%" PRIu32
+ "LSN [%" PRIu32 "][%" PRIu32 "].%" PRIu32
": record type %" PRIu32 " optype %" PRIu32
" txnid %" PRIu64 " fileid %" PRIu32,
- lsn->file, (uint64_t)lsn->offset, opcount,
+ log_file, log_offset, opcount,
rectype, optype, txnid, fileid);
printf(" key size %zu value size %zu\n", key->size, value->size);
if (rectype == WT_LOGREC_MESSAGE)
@@ -131,10 +131,9 @@ static int
simple_walk_log(WT_SESSION *session, int count_min)
{
WT_CURSOR *cursor;
- WT_LSN lsn;
WT_ITEM logrec_key, logrec_value;
uint64_t txnid;
- uint32_t fileid, opcount, optype, rectype;
+ uint32_t fileid, log_file, log_offset, opcount, optype, rectype;
int count, ret;
/*! [log cursor open] */
@@ -145,14 +144,14 @@ simple_walk_log(WT_SESSION *session, int count_min)
while ((ret = cursor->next(cursor)) == 0) {
count++;
/*! [log cursor get_key] */
- ret = cursor->get_key(cursor, &lsn.file, &lsn.offset, &opcount);
+ ret = cursor->get_key(cursor, &log_file, &log_offset, &opcount);
/*! [log cursor get_key] */
/*! [log cursor get_value] */
ret = cursor->get_value(cursor, &txnid,
&rectype, &optype, &fileid, &logrec_key, &logrec_value);
/*! [log cursor get_value] */
- print_record(&lsn, opcount,
+ print_record(log_file, log_offset, opcount,
rectype, optype, txnid, fileid, &logrec_key, &logrec_value);
}
if (ret == WT_NOTFOUND)
@@ -173,11 +172,11 @@ walk_log(WT_SESSION *session)
{
WT_CONNECTION *wt_conn2;
WT_CURSOR *cursor, *cursor2;
- WT_LSN lsn, lsnsave;
WT_ITEM logrec_key, logrec_value;
WT_SESSION *session2;
uint64_t txnid;
uint32_t fileid, opcount, optype, rectype;
+ uint32_t log_file, log_offset, save_file, save_offset;
int first, i, in_txn, ret;
ret = setup_copy(&wt_conn2, &session2);
@@ -186,21 +185,23 @@ walk_log(WT_SESSION *session)
i = 0;
in_txn = 0;
txnid = 0;
- memset(&lsnsave, 0, sizeof(lsnsave));
+ save_file = save_offset = 0;
while ((ret = cursor->next(cursor)) == 0) {
- ret = cursor->get_key(cursor, &lsn.file, &lsn.offset, &opcount);
+ ret = cursor->get_key(cursor, &log_file, &log_offset, &opcount);
/*
* Save one of the LSNs we get back to search for it
* later. Pick a later one because we want to walk from
* that LSN to the end (where the multi-step transaction
* was performed). Just choose the record that is MAX_KEYS.
*/
- if (++i == MAX_KEYS)
- lsnsave = lsn;
+ if (++i == MAX_KEYS) {
+ save_file = log_file;
+ save_offset = log_offset;
+ }
ret = cursor->get_value(cursor, &txnid, &rectype,
&optype, &fileid, &logrec_key, &logrec_value);
- print_record(&lsn, opcount,
+ print_record(log_file, log_offset, opcount,
rectype, optype, txnid, fileid, &logrec_key, &logrec_value);
/*
@@ -245,7 +246,7 @@ walk_log(WT_SESSION *session)
ret = cursor->reset(cursor);
/*! [log cursor set_key] */
- cursor->set_key(cursor, lsnsave.file, lsnsave.offset, 0);
+ cursor->set_key(cursor, save_file, save_offset, 0);
/*! [log cursor set_key] */
/*! [log cursor search] */
ret = cursor->search(cursor);
@@ -256,11 +257,11 @@ walk_log(WT_SESSION *session)
*/
first = 1;
while ((ret = cursor->get_key(cursor,
- &lsn.file, &lsn.offset, &opcount)) == 0) {
+ &log_file, &log_offset, &opcount)) == 0) {
if (first) {
first = 0;
- if (lsnsave.file != lsn.file ||
- lsnsave.offset != lsn.offset) {
+ if (save_file != log_file ||
+ save_offset != log_offset) {
fprintf(stderr,
"search returned the wrong LSN\n");
exit (1);
@@ -269,7 +270,7 @@ walk_log(WT_SESSION *session)
ret = cursor->get_value(cursor, &txnid, &rectype,
&optype, &fileid, &logrec_key, &logrec_value);
- print_record(&lsn, opcount,
+ print_record(log_file, log_offset, opcount,
rectype, optype, txnid, fileid, &logrec_key, &logrec_value);
ret = cursor->next(cursor);
diff --git a/examples/java/com/wiredtiger/examples/ex_all.java b/examples/java/com/wiredtiger/examples/ex_all.java
index 153f12d3e27..09db8e0fd56 100644
--- a/examples/java/com/wiredtiger/examples/ex_all.java
+++ b/examples/java/com/wiredtiger/examples/ex_all.java
@@ -516,12 +516,6 @@ session_ops(Session session)
* the code snippets, use if (false) to avoid running it.
*/
if (false) { // MIGHT_NOT_RUN
- /*! [Create a bzip2 compressed table] */
- ret = session.create("table:mytable",
- "block_compressor=bzip2,key_format=S,value_format=S");
- /*! [Create a bzip2 compressed table] */
- ret = session.drop("table:mytable", null);
-
/*! [Create a lz4 compressed table] */
ret = session.create("table:mytable",
"block_compressor=lz4,key_format=S,value_format=S");
@@ -899,13 +893,6 @@ allExample()
* be installed, causing the open to fail. The documentation requires
* the code snippets, use if (false) to avoid running it.
*/
- /*! [Configure bzip2 extension] */
- conn = wiredtiger.open(home,
- "create," +
- "extensions=[/usr/local/lib/libwiredtiger_bzip2.so]");
- /*! [Configure bzip2 extension] */
- conn.close(null);
-
/*! [Configure lz4 extension] */
conn = wiredtiger.open(home,
"create," +
diff --git a/ext/compressors/bzip2/Makefile.am b/ext/compressors/bzip2/Makefile.am
deleted file mode 100644
index 0aedc2efd80..00000000000
--- a/ext/compressors/bzip2/Makefile.am
+++ /dev/null
@@ -1,6 +0,0 @@
-AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include
-
-lib_LTLIBRARIES = libwiredtiger_bzip2.la
-libwiredtiger_bzip2_la_SOURCES = bzip2_compress.c
-libwiredtiger_bzip2_la_LDFLAGS = -avoid-version -module
-libwiredtiger_bzip2_la_LIBADD = -lbz2
diff --git a/ext/compressors/bzip2/bzip2_compress.c b/ext/compressors/bzip2/bzip2_compress.c
deleted file mode 100644
index 845107c609f..00000000000
--- a/ext/compressors/bzip2/bzip2_compress.c
+++ /dev/null
@@ -1,415 +0,0 @@
-/*-
- * Public Domain 2014-2016 MongoDB, Inc.
- * Public Domain 2008-2014 WiredTiger, Inc.
- *
- * This is free and unencumbered software released into the public domain.
- *
- * Anyone is free to copy, modify, publish, use, compile, sell, or
- * distribute this software, either in source code form or as a compiled
- * binary, for any purpose, commercial or non-commercial, and by any
- * means.
- *
- * In jurisdictions that recognize copyright laws, the author or authors
- * of this software dedicate any and all copyright interest in the
- * software to the public domain. We make this dedication for the benefit
- * of the public at large and to the detriment of our heirs and
- * successors. We intend this dedication to be an overt act of
- * relinquishment in perpetuity of all present and future rights to this
- * software under copyright law.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-#include <bzlib.h>
-#include <errno.h>
-#include <inttypes.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <wiredtiger.h>
-#include <wiredtiger_ext.h>
-
-/* Local compressor structure. */
-typedef struct {
- WT_COMPRESSOR compressor; /* Must come first */
-
- WT_EXTENSION_API *wt_api; /* Extension API */
-
- int bz_verbosity; /* Configuration */
- int bz_blocksize100k;
- int bz_workfactor;
- int bz_small;
-} BZIP_COMPRESSOR;
-
-/*
- * Bzip gives us a cookie to pass to the underlying allocation functions; we
- * we need two handles, package them up.
- */
-typedef struct {
- WT_COMPRESSOR *compressor;
- WT_SESSION *session;
-} BZIP_OPAQUE;
-
-/*
- * bzip2_error --
- * Output an error message, and return a standard error code.
- */
-static int
-bzip2_error(
- WT_COMPRESSOR *compressor, WT_SESSION *session, const char *call, int bzret)
-{
- WT_EXTENSION_API *wt_api;
- const char *msg;
-
- wt_api = ((BZIP_COMPRESSOR *)compressor)->wt_api;
-
- switch (bzret) {
- case BZ_MEM_ERROR:
- msg = "BZ_MEM_ERROR";
- break;
- case BZ_OUTBUFF_FULL:
- msg = "BZ_OUTBUFF_FULL";
- break;
- case BZ_SEQUENCE_ERROR:
- msg = "BZ_SEQUENCE_ERROR";
- break;
- case BZ_PARAM_ERROR:
- msg = "BZ_PARAM_ERROR";
- break;
- case BZ_DATA_ERROR:
- msg = "BZ_DATA_ERROR";
- break;
- case BZ_DATA_ERROR_MAGIC:
- msg = "BZ_DATA_ERROR_MAGIC";
- break;
- case BZ_IO_ERROR:
- msg = "BZ_IO_ERROR";
- break;
- case BZ_UNEXPECTED_EOF:
- msg = "BZ_UNEXPECTED_EOF";
- break;
- case BZ_CONFIG_ERROR:
- msg = "BZ_CONFIG_ERROR";
- break;
- default:
- msg = "unknown error";
- break;
- }
-
- (void)wt_api->err_printf(wt_api, session,
- "bzip2 error: %s: %s: %d", call, msg, bzret);
- return (WT_ERROR);
-}
-
-/*
- * bzalloc --
- * Allocate scratch buffers.
- */
-static void *
-bzalloc(void *cookie, int number, int size)
-{
- BZIP_OPAQUE *opaque;
- WT_EXTENSION_API *wt_api;
-
- opaque = cookie;
- wt_api = ((BZIP_COMPRESSOR *)opaque->compressor)->wt_api;
- return (wt_api->scr_alloc(
- wt_api, opaque->session, (size_t)(number * size)));
-}
-
-/*
- * bzfree --
- * Free scratch buffers.
- */
-static void
-bzfree(void *cookie, void *p)
-{
- BZIP_OPAQUE *opaque;
- WT_EXTENSION_API *wt_api;
-
- opaque = cookie;
- wt_api = ((BZIP_COMPRESSOR *)opaque->compressor)->wt_api;
- wt_api->scr_free(wt_api, opaque->session, p);
-}
-
-/*
- * bzip2_compress --
- * WiredTiger bzip2 compression.
- */
-static int
-bzip2_compress(WT_COMPRESSOR *compressor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp, int *compression_failed)
-{
- BZIP_COMPRESSOR *bzip_compressor;
- BZIP_OPAQUE opaque;
- bz_stream bz;
- int ret;
-
- bzip_compressor = (BZIP_COMPRESSOR *)compressor;
-
- memset(&bz, 0, sizeof(bz));
- bz.bzalloc = bzalloc;
- bz.bzfree = bzfree;
- opaque.compressor = compressor;
- opaque.session = session;
- bz.opaque = &opaque;
-
- if ((ret = BZ2_bzCompressInit(&bz,
- bzip_compressor->bz_blocksize100k,
- bzip_compressor->bz_verbosity,
- bzip_compressor->bz_workfactor)) != BZ_OK)
- return (bzip2_error(
- compressor, session, "BZ2_bzCompressInit", ret));
-
- bz.next_in = (char *)src;
- bz.avail_in = (uint32_t)src_len;
- bz.next_out = (char *)dst;
- bz.avail_out = (uint32_t)dst_len;
- if ((ret = BZ2_bzCompress(&bz, BZ_FINISH)) == BZ_STREAM_END) {
- *compression_failed = 0;
- *result_lenp = dst_len - bz.avail_out;
- } else
- *compression_failed = 1;
-
- if ((ret = BZ2_bzCompressEnd(&bz)) != BZ_OK)
- return (
- bzip2_error(compressor, session, "BZ2_bzCompressEnd", ret));
-
- return (0);
-}
-
-/*
- * __bzip2_compress_raw_random --
- * Return a 32-bit pseudo-random number.
- *
- * This is an implementation of George Marsaglia's multiply-with-carry pseudo-
- * random number generator. Computationally fast, with reasonable randomness
- * properties.
- */
-static uint32_t
-__bzip2_compress_raw_random(void)
-{
- static uint32_t m_w = 521288629;
- static uint32_t m_z = 362436069;
-
- m_z = 36969 * (m_z & 65535) + (m_z >> 16);
- m_w = 18000 * (m_w & 65535) + (m_w >> 16);
- return (m_z << 16) + (m_w & 65535);
-}
-
-/*
- * bzip2_compress_raw --
- * Test function for the test/format utility.
- */
-static int
-bzip2_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session,
- size_t page_max, int split_pct, size_t extra,
- uint8_t *src, uint32_t *offsets, uint32_t slots,
- uint8_t *dst, size_t dst_len, int final,
- size_t *result_lenp, uint32_t *result_slotsp)
-{
- uint32_t take, twenty_pct;
- int compression_failed, ret;
-
- (void)page_max; /* Unused parameters */
- (void)split_pct;
- (void)extra;
- (void)final;
-
- /*
- * This function is used by the test/format utility to test the
- * WT_COMPRESSOR::compress_raw functionality.
- *
- * I'm trying to mimic how a real application is likely to behave: if
- * it's a small number of slots, we're not going to take them because
- * they aren't worth compressing. In all likelihood, that's going to
- * be because the btree is wrapping up a page, but that's OK, that is
- * going to happen a lot. In addition, add a 2% chance of not taking
- * anything at all just because we don't want to take it. Otherwise,
- * select between 80 and 100% of the slots and compress them, stepping
- * down by 5 slots at a time until something works.
- */
- take = slots;
- if (take < 10 || __bzip2_compress_raw_random() % 100 < 2)
- take = 0;
- else {
- twenty_pct = (slots / 10) * 2;
- if (twenty_pct < slots)
- take -= __bzip2_compress_raw_random() % twenty_pct;
-
- for (;;) {
- if ((ret = bzip2_compress(compressor, session,
- src, offsets[take],
- dst, dst_len,
- result_lenp, &compression_failed)) != 0)
- return (ret);
- if (!compression_failed)
- break;
- if (take < 10) {
- take = 0;
- break;
- }
- take -= 5;
- }
- }
-
- *result_slotsp = take;
- if (take == 0)
- *result_lenp = 0;
-
-#if 0
- fprintf(stderr,
- "bzip2_compress_raw (%s): page_max %" PRIuMAX
- ", split_pct %u, extra %" PRIuMAX
- ", slots %" PRIu32 ", take %" PRIu32 ": %" PRIu32 " -> %"
- PRIuMAX "\n",
- final ? "final" : "not final",
- (uintmax_t)page_max, split_pct, (uintmax_t)extra,
- slots, take, offsets[take], (uintmax_t)*result_lenp);
-#endif
- return (take == 0 ? EAGAIN : 0);
-}
-
-/*
- * bzip2_decompress --
- * WiredTiger bzip2 decompression.
- */
-static int
-bzip2_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
- uint8_t *src, size_t src_len,
- uint8_t *dst, size_t dst_len,
- size_t *result_lenp)
-{
- BZIP_COMPRESSOR *bzip_compressor;
- BZIP_OPAQUE opaque;
- bz_stream bz;
- int ret, tret;
-
- bzip_compressor = (BZIP_COMPRESSOR *)compressor;
-
- memset(&bz, 0, sizeof(bz));
- bz.bzalloc = bzalloc;
- bz.bzfree = bzfree;
- opaque.compressor = compressor;
- opaque.session = session;
- bz.opaque = &opaque;
-
- if ((ret = BZ2_bzDecompressInit(&bz,
- bzip_compressor->bz_small, bzip_compressor->bz_verbosity)) != BZ_OK)
- return (bzip2_error(
- compressor, session, "BZ2_bzDecompressInit", ret));
-
- bz.next_in = (char *)src;
- bz.avail_in = (uint32_t)src_len;
- bz.next_out = (char *)dst;
- bz.avail_out = (uint32_t)dst_len;
- if ((ret = BZ2_bzDecompress(&bz)) == BZ_STREAM_END) {
- *result_lenp = dst_len - bz.avail_out;
- ret = 0;
- } else {
- /*
- * If BZ2_bzDecompress returns 0, it expects there to be more
- * data available. There isn't, so treat this as an error.
- */
- if (ret == 0)
- ret = BZ_DATA_ERROR;
- (void)bzip2_error(compressor, session, "BZ2_bzDecompress", ret);
- }
-
- if ((tret = BZ2_bzDecompressEnd(&bz)) != BZ_OK)
- return (bzip2_error(
- compressor, session, "BZ2_bzDecompressEnd", tret));
-
- return (ret == 0 ?
- 0 : bzip2_error(compressor, session, "BZ2_bzDecompressEnd", ret));
-}
-
-/*
- * bzip2_terminate --
- * WiredTiger bzip2 compression termination.
- */
-static int
-bzip2_terminate(WT_COMPRESSOR *compressor, WT_SESSION *session)
-{
- (void)session; /* Unused parameters */
-
- free(compressor);
- return (0);
-}
-
-/*
- * bzip2_add_compressor --
- * Add a bzip2 compressor.
- */
-static int
-bzip2_add_compressor(WT_CONNECTION *connection, int raw, const char *name)
-{
- BZIP_COMPRESSOR *bzip_compressor;
-
- /*
- * There are two almost identical bzip2 compressors: one supporting raw
- * compression (used by test/format to test raw compression), the other
- * without raw compression, that might be useful for real applications.
- */
- if ((bzip_compressor = calloc(1, sizeof(BZIP_COMPRESSOR))) == NULL)
- return (errno);
-
- bzip_compressor->compressor.compress = bzip2_compress;
- bzip_compressor->
- compressor.compress_raw = raw ? bzip2_compress_raw : NULL;
- bzip_compressor->compressor.decompress = bzip2_decompress;
- bzip_compressor->compressor.pre_size = NULL;
- bzip_compressor->compressor.terminate = bzip2_terminate;
-
- bzip_compressor->wt_api = connection->get_extension_api(connection);
-
- /* between 0-4: set the amount of verbosity to stderr */
- bzip_compressor->bz_verbosity = 0;
-
- /*
- * between 1-9: set the block size to 100k x this number (compression
- * only)
- */
- bzip_compressor->bz_blocksize100k = 1;
-
- /*
- * between 0-250: workFactor: see bzip2 manual. 0 is a reasonable
- * default (compression only)
- */
- bzip_compressor->bz_workfactor = 0;
-
- /*
- * if nonzero, decompress using less memory, but slower (decompression
- * only)
- */
- bzip_compressor->bz_small = 0;
-
- return (connection->add_compressor( /* Load the compressor */
- connection, name, (WT_COMPRESSOR *)bzip_compressor, NULL));
-}
-
-/*
- * wiredtiger_extension_init --
- * WiredTiger bzip2 compression extension.
- */
-int
-wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
-{
- int ret;
-
- (void)config; /* Unused parameters */
-
- if ((ret = bzip2_add_compressor(connection, 0, "bzip2")) != 0)
- return (ret);
- if ((ret = bzip2_add_compressor(connection, 1, "bzip2-raw-test")) != 0)
- return (ret);
- return (0);
-}
diff --git a/ext/compressors/lz4/lz4_compress.c b/ext/compressors/lz4/lz4_compress.c
index d070dc3fb79..062307b721a 100644
--- a/ext/compressors/lz4/lz4_compress.c
+++ b/ext/compressors/lz4/lz4_compress.c
@@ -26,23 +26,13 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
+#include <wt_internal.h>
+
#include <lz4.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
-#include <wiredtiger.h>
-#include <wiredtiger_ext.h>
-
-/*
- * We need to include the configuration file to detect whether this extension
- * is being built into the WiredTiger library.
- */
-#include "wiredtiger_config.h"
-#ifdef _MSC_VER
-#define inline __inline
-#endif
-
/* Local compressor structure. */
typedef struct {
WT_COMPRESSOR compressor; /* Must come first */
@@ -73,6 +63,24 @@ typedef struct {
} LZ4_PREFIX;
/*
+ * lz4_prefix_swap --
+ * The additional information is written in little-endian format, handle
+ * the conversion.
+ */
+static inline void
+lz4_prefix_swap(LZ4_PREFIX *prefix)
+{
+#ifdef WORDS_BIGENDIAN
+ prefix->compressed_len = __wt_bswap32(prefix->compressed_len);
+ prefix->uncompressed_len = __wt_bswap32(prefix->uncompressed_len);
+ prefix->useful_len = __wt_bswap32(prefix->useful_len);
+ prefix->unused = __wt_bswap32(prefix->unused);
+#else
+ WT_UNUSED(prefix);
+#endif
+}
+
+/*
* lz4_error --
* Output an error message, and return a standard error code.
*/
@@ -119,6 +127,7 @@ lz4_compress(WT_COMPRESSOR *compressor, WT_SESSION *session,
prefix.uncompressed_len = (uint32_t)src_len;
prefix.useful_len = (uint32_t)src_len;
prefix.unused = 0;
+ lz4_prefix_swap(&prefix);
memcpy(dst, &prefix, sizeof(LZ4_PREFIX));
*result_lenp = (size_t)lz4_len + sizeof(LZ4_PREFIX);
@@ -154,6 +163,7 @@ lz4_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
* decompressed bytes to return from the start of the source buffer.
*/
memcpy(&prefix, src, sizeof(LZ4_PREFIX));
+ lz4_prefix_swap(&prefix);
/*
* Decompress, starting after the prefix bytes. Use safe decompression:
@@ -268,6 +278,7 @@ lz4_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session,
prefix.uncompressed_len = (uint32_t)sourceSize;
prefix.useful_len = offsets[slot];
prefix.unused = 0;
+ lz4_prefix_swap(&prefix);
memcpy(dst, &prefix, sizeof(LZ4_PREFIX));
*result_slotsp = slot;
diff --git a/ext/compressors/snappy/snappy_compress.c b/ext/compressors/snappy/snappy_compress.c
index b5a347fce81..fcefb8bb575 100644
--- a/ext/compressors/snappy/snappy_compress.c
+++ b/ext/compressors/snappy/snappy_compress.c
@@ -26,20 +26,13 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
+#include <wt_internal.h>
+
#include <snappy-c.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
-#include <wiredtiger.h>
-#include <wiredtiger_ext.h>
-
-/*
- * We need to include the configuration file to detect whether this extension
- * is being built into the WiredTiger library.
- */
-#include "wiredtiger_config.h"
-
/* Local compressor structure. */
typedef struct {
WT_COMPRESSOR compressor; /* Must come first */
@@ -103,16 +96,22 @@ wt_snappy_compress(WT_COMPRESSOR *compressor, WT_SESSION *session,
snret = snappy_compress((char *)src, src_len, snapbuf, &snaplen);
if (snret == SNAPPY_OK) {
- /*
- * On decompression, snappy requires the exact compressed byte
- * count (the current value of snaplen). WiredTiger does not
- * preserve that value, so save snaplen at the beginning of the
- * destination buffer.
- */
if (snaplen + sizeof(size_t) < src_len) {
- *(size_t *)dst = snaplen;
*result_lenp = snaplen + sizeof(size_t);
*compression_failed = 0;
+
+ /*
+ * On decompression, snappy requires an exact compressed
+ * byte count (the current value of snaplen). WiredTiger
+ * does not preserve that value, so save snaplen at the
+ * beginning of the destination buffer.
+ *
+ * Store the value in little-endian format.
+ */
+#ifdef WORDS_BIGENDIAN
+ snaplen = __wt_bswap64(snaplen);
+#endif
+ *(size_t *)dst = snaplen;
} else
/* The compressor failed to produce a smaller result. */
*compression_failed = 1;
@@ -137,8 +136,14 @@ wt_snappy_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
wt_api = ((SNAPPY_COMPRESSOR *)compressor)->wt_api;
- /* retrieve the saved length */
+ /*
+ * Retrieve the saved length, handling little- to big-endian conversion
+ * as necessary.
+ */
snaplen = *(size_t *)src;
+#ifdef WORDS_BIGENDIAN
+ snaplen = __wt_bswap64(snaplen);
+#endif
if (snaplen + sizeof(size_t) > src_len) {
(void)wt_api->err_printf(wt_api,
session,
diff --git a/src/block/block_map.c b/src/block/block_map.c
index 3d04a492269..b60623a37d8 100644
--- a/src/block/block_map.c
+++ b/src/block/block_map.c
@@ -20,6 +20,19 @@ __wt_block_map(
*(void **)mapp = NULL;
*maplenp = 0;
+#ifdef WORDS_BIGENDIAN
+ /*
+ * The underlying objects are little-endian, mapping objects isn't
+ * currently supported on big-endian systems.
+ */
+ WT_UNUSED(session);
+ WT_UNUSED(block);
+ WT_UNUSED(mappingcookie);
+#else
+ /* Map support is configurable. */
+ if (!S2C(session)->mmap)
+ return (0);
+
/*
* Turn off mapping when verifying the file, because we can't perform
* checksum validation of mapped segments, and verify has to checksum
@@ -48,6 +61,7 @@ __wt_block_map(
* Ignore errors, we'll read the file through the cache if map fails.
*/
(void)__wt_mmap(session, block->fh, mapp, maplenp, mappingcookie);
+#endif
return (0);
}
diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c
index 6e2dc775362..dceaae8bb99 100644
--- a/src/block/block_mgr.c
+++ b/src/block/block_mgr.c
@@ -69,18 +69,6 @@ __bm_checkpoint(WT_BM *bm,
}
/*
- * __bm_sync --
- * Flush a file to disk.
- */
-static int
-__bm_sync(WT_BM *bm, WT_SESSION_IMPL *session, bool async)
-{
- return (async ?
- __wt_fsync_async(session, bm->block->fh) :
- __wt_fsync(session, bm->block->fh));
-}
-
-/*
* __bm_checkpoint_load --
* Load a checkpoint.
*/
@@ -89,10 +77,6 @@ __bm_checkpoint_load(WT_BM *bm, WT_SESSION_IMPL *session,
const uint8_t *addr, size_t addr_size,
uint8_t *root_addr, size_t *root_addr_sizep, bool checkpoint)
{
- WT_CONNECTION_IMPL *conn;
-
- conn = S2C(session);
-
/* If not opening a checkpoint, we're opening the live system. */
bm->is_live = !checkpoint;
WT_RET(__wt_block_checkpoint_load(session, bm->block,
@@ -103,9 +87,8 @@ __bm_checkpoint_load(WT_BM *bm, WT_SESSION_IMPL *session,
* Read-only objects are optionally mapped into memory instead
* of being read into cache buffers.
*/
- if (conn->mmap)
- WT_RET(__wt_block_map(session, bm->block,
- &bm->map, &bm->maplen, &bm->mappingcookie));
+ WT_RET(__wt_block_map(session,
+ bm->block, &bm->map, &bm->maplen, &bm->mappingcookie));
/*
* If this handle is for a checkpoint, that is, read-only, there
@@ -168,13 +151,13 @@ __bm_close(WT_BM *bm, WT_SESSION_IMPL *session)
}
/*
- * __bm_compact_start --
- * Start a block manager compaction.
+ * __bm_compact_end --
+ * End a block manager compaction.
*/
static int
-__bm_compact_start(WT_BM *bm, WT_SESSION_IMPL *session)
+__bm_compact_end(WT_BM *bm, WT_SESSION_IMPL *session)
{
- return (__wt_block_compact_start(session, bm->block));
+ return (__wt_block_compact_end(session, bm->block));
}
/*
@@ -200,13 +183,13 @@ __bm_compact_skip(WT_BM *bm, WT_SESSION_IMPL *session, bool *skipp)
}
/*
- * __bm_compact_end --
- * End a block manager compaction.
+ * __bm_compact_start --
+ * Start a block manager compaction.
*/
static int
-__bm_compact_end(WT_BM *bm, WT_SESSION_IMPL *session)
+__bm_compact_start(WT_BM *bm, WT_SESSION_IMPL *session)
{
- return (__wt_block_compact_end(session, bm->block));
+ return (__wt_block_compact_start(session, bm->block));
}
/*
@@ -233,36 +216,25 @@ __bm_is_mapped(WT_BM *bm, WT_SESSION_IMPL *session)
}
/*
- * __bm_stat --
- * Block-manager statistics.
- */
-static int
-__bm_stat(WT_BM *bm, WT_SESSION_IMPL *session, WT_DSRC_STATS *stats)
-{
- __wt_block_stat(session, bm->block, stats);
- return (0);
-}
-
-/*
- * __bm_write --
- * Write a buffer into a block, returning the block's address cookie.
+ * __bm_salvage_end --
+ * End a block manager salvage.
*/
static int
-__bm_write(WT_BM *bm, WT_SESSION_IMPL *session,
- WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum)
+__bm_salvage_end(WT_BM *bm, WT_SESSION_IMPL *session)
{
- return (__wt_block_write(
- session, bm->block, buf, addr, addr_sizep, data_cksum));
+ return (__wt_block_salvage_end(session, bm->block));
}
/*
- * __bm_write_size --
- * Return the buffer size required to write a block.
+ * __bm_salvage_next --
+ * Return the next block from the file.
*/
static int
-__bm_write_size(WT_BM *bm, WT_SESSION_IMPL *session, size_t *sizep)
+__bm_salvage_next(WT_BM *bm,
+ WT_SESSION_IMPL *session, uint8_t *addr, size_t *addr_sizep, bool *eofp)
{
- return (__wt_block_write_size(session, bm->block, sizep));
+ return (__wt_block_salvage_next(
+ session, bm->block, addr, addr_sizep, eofp));
}
/*
@@ -288,25 +260,47 @@ __bm_salvage_valid(WT_BM *bm,
}
/*
- * __bm_salvage_next --
- * Return the next block from the file.
+ * __bm_stat --
+ * Block-manager statistics.
*/
static int
-__bm_salvage_next(WT_BM *bm,
- WT_SESSION_IMPL *session, uint8_t *addr, size_t *addr_sizep, bool *eofp)
+__bm_stat(WT_BM *bm, WT_SESSION_IMPL *session, WT_DSRC_STATS *stats)
{
- return (__wt_block_salvage_next(
- session, bm->block, addr, addr_sizep, eofp));
+ __wt_block_stat(session, bm->block, stats);
+ return (0);
}
/*
- * __bm_salvage_end --
- * End a block manager salvage.
+ * __bm_sync --
+ * Flush a file to disk.
*/
static int
-__bm_salvage_end(WT_BM *bm, WT_SESSION_IMPL *session)
+__bm_sync(WT_BM *bm, WT_SESSION_IMPL *session, bool async)
{
- return (__wt_block_salvage_end(session, bm->block));
+ return (async ?
+ __wt_fsync_async(session, bm->block->fh) :
+ __wt_fsync(session, bm->block->fh));
+}
+
+/*
+ * __bm_verify_addr --
+ * Verify an address.
+ */
+static int
+__bm_verify_addr(WT_BM *bm,
+ WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
+{
+ return (__wt_block_verify_addr(session, bm->block, addr, addr_size));
+}
+
+/*
+ * __bm_verify_end --
+ * End a block manager verify.
+ */
+static int
+__bm_verify_end(WT_BM *bm, WT_SESSION_IMPL *session)
+{
+ return (__wt_block_verify_end(session, bm->block));
}
/*
@@ -321,24 +315,25 @@ __bm_verify_start(WT_BM *bm,
}
/*
- * __bm_verify_addr --
- * Verify an address.
+ * __bm_write --
+ * Write a buffer into a block, returning the block's address cookie.
*/
static int
-__bm_verify_addr(WT_BM *bm,
- WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
+__bm_write(WT_BM *bm, WT_SESSION_IMPL *session,
+ WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum)
{
- return (__wt_block_verify_addr(session, bm->block, addr, addr_size));
+ return (__wt_block_write(
+ session, bm->block, buf, addr, addr_sizep, data_cksum));
}
/*
- * __bm_verify_end --
- * End a block manager verify.
+ * __bm_write_size --
+ * Return the buffer size required to write a block.
*/
static int
-__bm_verify_end(WT_BM *bm, WT_SESSION_IMPL *session)
+__bm_write_size(WT_BM *bm, WT_SESSION_IMPL *session, size_t *sizep)
{
- return (__wt_block_verify_end(session, bm->block));
+ return (__wt_block_write_size(session, bm->block, sizep));
}
/*
diff --git a/src/block/block_open.c b/src/block/block_open.c
index dd0f3f0716a..d9b2f908737 100644
--- a/src/block/block_open.c
+++ b/src/block/block_open.c
@@ -296,15 +296,21 @@ __wt_desc_init(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize)
WT_RET(__wt_scr_alloc(session, allocsize, &buf));
memset(buf->mem, 0, allocsize);
+ /*
+ * Checksum a little-endian version of the header, and write everything
+ * in little-endian format. The checksum is (potentially) returned in a
+ * big-endian format, swap it into place in a separate step.
+ */
desc = buf->mem;
desc->magic = WT_BLOCK_MAGIC;
desc->majorv = WT_BLOCK_MAJOR_VERSION;
desc->minorv = WT_BLOCK_MINOR_VERSION;
-
- /* Update the checksum. */
desc->cksum = 0;
+ __wt_block_desc_byteswap(desc);
desc->cksum = __wt_cksum(desc, allocsize);
-
+#ifdef WORDS_BIGENDIAN
+ desc->cksum = __wt_bswap32(desc->cksum);
+#endif
ret = __wt_write(session, fh, (wt_off_t)0, (size_t)allocsize, desc);
__wt_scr_free(session, &buf);
@@ -321,7 +327,7 @@ __desc_read(WT_SESSION_IMPL *session, WT_BLOCK *block)
WT_BLOCK_DESC *desc;
WT_DECL_ITEM(buf);
WT_DECL_RET;
- uint32_t cksum;
+ uint32_t cksum_calculate, cksum_tmp;
/* Use a scratch buffer to get correct alignment for direct I/O. */
WT_RET(__wt_scr_alloc(session, block->allocsize, &buf));
@@ -330,14 +336,19 @@ __desc_read(WT_SESSION_IMPL *session, WT_BLOCK *block)
WT_ERR(__wt_read(session,
block->fh, (wt_off_t)0, (size_t)block->allocsize, buf->mem));
+ /*
+ * Handle little- and big-endian objects. Objects are written in little-
+ * endian format: save the header checksum, and calculate the checksum
+ * for the header in its little-endian form. Then, restore the header's
+ * checksum, and byte-swap the whole thing as necessary, leaving us with
+ * a calculated checksum that should match the checksum in the header.
+ */
desc = buf->mem;
- WT_ERR(__wt_verbose(session, WT_VERB_BLOCK,
- "%s: magic %" PRIu32
- ", major/minor: %" PRIu32 "/%" PRIu32
- ", checksum %#" PRIx32,
- block->name, desc->magic,
- desc->majorv, desc->minorv,
- desc->cksum));
+ cksum_tmp = desc->cksum;
+ desc->cksum = 0;
+ cksum_calculate = __wt_cksum(desc, block->allocsize);
+ desc->cksum = cksum_tmp;
+ __wt_block_desc_byteswap(desc);
/*
* We fail the open if the checksum fails, or the magic number is wrong
@@ -348,10 +359,7 @@ __desc_read(WT_SESSION_IMPL *session, WT_BLOCK *block)
* may have entered the wrong file name, and is now frantically pounding
* their interrupt key.
*/
- cksum = desc->cksum;
- desc->cksum = 0;
- if (desc->magic != WT_BLOCK_MAGIC ||
- cksum != __wt_cksum(desc, block->allocsize))
+ if (desc->magic != WT_BLOCK_MAGIC || desc->cksum != cksum_calculate)
WT_ERR_MSG(session, WT_ERROR,
"%s does not appear to be a WiredTiger file", block->name);
@@ -365,6 +373,14 @@ __desc_read(WT_SESSION_IMPL *session, WT_BLOCK *block)
WT_BLOCK_MAJOR_VERSION, WT_BLOCK_MINOR_VERSION,
desc->majorv, desc->minorv);
+ WT_ERR(__wt_verbose(session, WT_VERB_BLOCK,
+ "%s: magic %" PRIu32
+ ", major/minor: %" PRIu32 "/%" PRIu32
+ ", checksum %#" PRIx32,
+ block->name, desc->magic,
+ desc->majorv, desc->minorv,
+ desc->cksum));
+
err: __wt_scr_free(session, &buf);
return (ret);
}
diff --git a/src/block/block_read.c b/src/block/block_read.c
index 0e5911ecf2a..6e74d7a7793 100644
--- a/src/block/block_read.c
+++ b/src/block/block_read.c
@@ -139,6 +139,7 @@ __wt_block_read_off_blind(
WT_RET(__wt_read(
session, block->fh, offset, (size_t)block->allocsize, buf->mem));
blk = WT_BLOCK_HEADER_REF(buf->mem);
+ __wt_block_header_byteswap(blk);
/*
* Copy out the size and checksum (we're about to re-use the buffer),
@@ -163,7 +164,7 @@ int
__wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
WT_ITEM *buf, wt_off_t offset, uint32_t size, uint32_t cksum)
{
- WT_BLOCK_HEADER *blk;
+ WT_BLOCK_HEADER *blk, swap;
size_t bufsize;
uint32_t page_cksum;
@@ -193,14 +194,26 @@ __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
WT_RET(__wt_read(session, block->fh, offset, size, buf->mem));
buf->size = size;
+ /*
+ * We incrementally read through the structure before doing a checksum,
+ * do little- to big-endian handling early on, and then select from the
+ * original or swapped structure as needed.
+ */
blk = WT_BLOCK_HEADER_REF(buf->mem);
- if (blk->cksum == cksum) {
+ __wt_block_header_byteswap_copy(blk, &swap);
+ if (swap.cksum == cksum) {
blk->cksum = 0;
page_cksum = __wt_cksum(buf->mem,
- F_ISSET(blk, WT_BLOCK_DATA_CKSUM) ?
+ F_ISSET(&swap, WT_BLOCK_DATA_CKSUM) ?
size : WT_BLOCK_COMPRESS_SKIP);
- if (page_cksum == cksum)
+ if (page_cksum == cksum) {
+ /*
+ * Swap the page-header as needed; this doesn't belong
+ * here, but it's the best place to catch all callers.
+ */
+ __wt_page_header_byteswap(buf->mem);
return (0);
+ }
if (!F_ISSET(session, WT_SESSION_QUIET_CORRUPT_FILE))
__wt_errx(session,
@@ -216,7 +229,7 @@ __wt_block_read_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
"offset %" PRIuMAX ": block header checksum "
"of %" PRIu32 " doesn't match expected checksum "
"of %" PRIu32,
- size, (uintmax_t)offset, blk->cksum, cksum);
+ size, (uintmax_t)offset, swap.cksum, cksum);
/* Panic if a checksum fails during an ordinary read. */
return (block->verify ||
diff --git a/src/block/block_slvg.c b/src/block/block_slvg.c
index ef22c727db4..a8cccd53023 100644
--- a/src/block/block_slvg.c
+++ b/src/block/block_slvg.c
@@ -126,6 +126,7 @@ __wt_block_salvage_next(WT_SESSION_IMPL *session,
WT_ERR(__wt_read(
session, fh, offset, (size_t)allocsize, tmp->mem));
blk = WT_BLOCK_HEADER_REF(tmp->mem);
+ __wt_block_header_byteswap(blk);
size = blk->disk_size;
cksum = blk->cksum;
diff --git a/src/block/block_write.c b/src/block/block_write.c
index 23f4d7650b9..4c6ac198fe4 100644
--- a/src/block/block_write.c
+++ b/src/block/block_write.c
@@ -203,11 +203,18 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
WT_FH *fh;
size_t align_size;
wt_off_t offset;
+ uint32_t cksum;
bool local_locked;
blk = WT_BLOCK_HEADER_REF(buf->mem);
fh = block->fh;
+ /*
+ * Swap the page-header as needed; this doesn't belong here, but it's
+ * the best place to catch all callers.
+ */
+ __wt_page_header_byteswap(buf->mem);
+
/* Buffers should be aligned for writing. */
if (!F_ISSET(buf, WT_ITEM_ALIGNED)) {
WT_ASSERT(session, F_ISSET(buf, WT_ITEM_ALIGNED));
@@ -255,13 +262,21 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
* because they're not compressed, both to give salvage a quick test
* of whether a block is useful and to give us a test so we don't lose
* the first WT_BLOCK_COMPRESS_SKIP bytes without noticing.
+ *
+ * Checksum a little-endian version of the header, and write everything
+ * in little-endian format. The checksum is (potentially) returned in a
+ * big-endian format, swap it into place in a separate step.
*/
blk->flags = 0;
if (data_cksum)
F_SET(blk, WT_BLOCK_DATA_CKSUM);
blk->cksum = 0;
- blk->cksum = __wt_cksum(
+ __wt_block_header_byteswap(blk);
+ blk->cksum = cksum = __wt_cksum(
buf->mem, data_cksum ? align_size : WT_BLOCK_COMPRESS_SKIP);
+#ifdef WORDS_BIGENDIAN
+ blk->cksum = __wt_bswap32(blk->cksum);
+#endif
/* Pre-allocate some number of extension structures. */
WT_RET(__wt_block_ext_prealloc(session, 5));
@@ -325,11 +340,11 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
WT_RET(__wt_verbose(session, WT_VERB_WRITE,
"off %" PRIuMAX ", size %" PRIuMAX ", cksum %" PRIu32,
- (uintmax_t)offset, (uintmax_t)align_size, blk->cksum));
+ (uintmax_t)offset, (uintmax_t)align_size, cksum));
*offsetp = offset;
*sizep = WT_STORE_SIZE(align_size);
- *cksump = blk->cksum;
+ *cksump = cksum;
- return (ret);
+ return (0);
}
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index 7f0f37d95d6..2db3ca7d984 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -329,7 +329,7 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
* always inherit from the connection.
*/
WT_RET(__wt_config_gets(session, cfg, "encryption.name", &cval));
- if (WT_IS_METADATA(btree->dhandle) || cval.len == 0)
+ if (WT_IS_METADATA(session, btree->dhandle) || cval.len == 0)
btree->kencryptor = conn->kencryptor;
else if (WT_STRING_MATCH("none", cval.str, cval.len))
btree->kencryptor = NULL;
@@ -420,7 +420,7 @@ __wt_btree_tree_open(
* Failure to open metadata means that the database is unavailable.
* Try to provide a helpful failure message.
*/
- if (ret != 0 && WT_IS_METADATA(session->dhandle)) {
+ if (ret != 0 && WT_IS_METADATA(session, session->dhandle)) {
__wt_errx(session,
"WiredTiger has failed to open its metadata");
__wt_errx(session, "This may be due to the database"
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index ac83a21ac6f..bd38451d5d1 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -875,16 +875,24 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
/* The split is complete and correct, ignore benign errors. */
complete = WT_ERR_IGNORE;
- WT_ERR(__wt_verbose(session, WT_VERB_SPLIT,
- "%p: %s %s" "split into parent %p, %" PRIu32 " -> %" PRIu32
- " (%s%" PRIu32 ")",
- ref->page, ref->page == NULL ?
- "unknown page type" : __wt_page_type_string(ref->page->type),
- ref->page == NULL ? "reverse " : "", parent,
- parent_entries, result_entries,
- ref->page == NULL ? "-" : "+",
- ref->page == NULL ?
- parent_entries - result_entries : result_entries - parent_entries));
+ /*
+ * !!!
+ * Swapping in the new page index released the page for eviction, we can
+ * no longer look inside the page.
+ */
+
+ if (ref->page == NULL)
+ WT_ERR(__wt_verbose(session, WT_VERB_SPLIT,
+ "%p: reverse split into parent %p, %" PRIu32 " -> %" PRIu32
+ " (-%" PRIu32 ")",
+ ref->page, parent, parent_entries, result_entries,
+ parent_entries - result_entries));
+ else
+ WT_ERR(__wt_verbose(session, WT_VERB_SPLIT,
+ "%p: split into parent %p, %" PRIu32 " -> %" PRIu32
+ " (+%" PRIu32 ")",
+ ref->page, parent, parent_entries, result_entries,
+ result_entries - parent_entries));
/*
* The new page index is in place, free the WT_REF we were splitting and
@@ -935,8 +943,10 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
parent_decr += sizeof(WT_REF);
}
- /* We freed the reference that was split in the loop above. */
- ref = NULL;
+ /*
+ * !!!
+ * The original WT_REF has now been freed, we can no longer look at it.
+ */
/*
* We can't free the previous page index, there may be threads using it.
diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c
index 49a59b89552..55b11d7b2d1 100644
--- a/src/btree/bt_walk.c
+++ b/src/btree/bt_walk.c
@@ -89,11 +89,11 @@ __ref_is_leaf(WT_REF *ref)
}
/*
- * __page_ascend --
+ * __ref_ascend --
* Ascend the tree one level.
*/
-static void
-__page_ascend(WT_SESSION_IMPL *session,
+static inline void
+__ref_ascend(WT_SESSION_IMPL *session,
WT_REF **refp, WT_PAGE_INDEX **pindexp, uint32_t *slotp)
{
WT_REF *parent_ref, *ref;
@@ -163,23 +163,20 @@ __page_ascend(WT_SESSION_IMPL *session,
}
/*
- * __page_descend --
- * Descend the tree one level.
+ * __ref_descend_prev --
+ * Descend the tree one level, during a previous-cursor walk.
*/
-static void
-__page_descend(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_PAGE_INDEX **pindexp, uint32_t *slotp, bool prev)
+static inline void
+__ref_descend_prev(
+ WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp)
{
WT_PAGE_INDEX *pindex;
/*
- * Ref is a child page into which we're descending, and on which we
- * have a hazard pointer.
+ * We're passed a child page into which we're descending, and on which
+ * we have a hazard pointer.
*/
for (;; __wt_yield()) {
- WT_INTL_INDEX_GET(session, page, pindex);
- *slotp = prev ? pindex->entries - 1 : 0;
-
/*
* There's a split race when a cursor moving backwards through
* the tree descends the tree. If we're splitting an internal
@@ -233,21 +230,41 @@ __page_descend(WT_SESSION_IMPL *session,
* being split and part of its namespace moved. We have the
* correct page and we don't have to move, all we have to do is
* wait until the split page's page index is updated.
- *
- * No test is necessary for a next-cursor movement because we
- * do right-hand splits on internal pages and the initial part
- * of the page's namespace won't change as part of a split.
- * Instead of testing the direction boolean, do the test the
- * previous cursor movement requires in all cases, even though
- * it will always succeed for a next-cursor movement.
*/
- if (pindex->index[*slotp]->home == page)
+ WT_INTL_INDEX_GET(session, ref->page, pindex);
+ if (pindex->index[pindex->entries - 1]->home == ref->page)
break;
}
*pindexp = pindex;
}
/*
+ * __ref_initial_descent_prev --
+ * Descend the tree one level, when setting up the initial cursor position
+ * for a previous-cursor walk.
+ */
+static inline bool
+__ref_initial_descent_prev(
+ WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp)
+{
+ WT_PAGE_INDEX *pindex;
+
+ /*
+ * We're passed a child page into which we're descending, and on which
+ * we have a hazard pointer.
+ *
+ * Acquire a page index for the child page and then confirm we haven't
+ * raced with a parent split.
+ */
+ WT_INTL_INDEX_GET(session, ref->page, pindex);
+ if (__wt_split_descent_race(session, ref, *pindexp))
+ return (false);
+
+ *pindexp = pindex;
+ return (true);
+}
+
+/*
* __tree_walk_internal --
* Move to the next/previous page in the tree.
*/
@@ -259,11 +276,12 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
WT_DECL_RET;
WT_PAGE_INDEX *pindex;
WT_REF *couple, *couple_orig, *ref;
- bool empty_internal, prev, skip;
+ bool empty_internal, initial_descent, prev, skip;
uint32_t slot;
btree = S2BT(session);
- empty_internal = false;
+ pindex = NULL;
+ empty_internal = initial_descent = false;
/*
* Tree walks are special: they look inside page structures that splits
@@ -323,22 +341,30 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
couple = couple_orig = ref = *refp;
*refp = NULL;
- /* If no page is active, begin a walk from the start of the tree. */
+ /* If no page is active, begin a walk from the start/end of the tree. */
if (ref == NULL) {
- ref = &btree->root;
+restart: /*
+ * We can reach here with a NULL or root reference; the release
+ * function handles them internally, don't complicate this code
+ * by calling them out.
+ */
+ WT_ERR(__wt_page_release(session, couple, flags));
+
+ couple = couple_orig = ref = &btree->root;
if (ref->page == NULL)
goto done;
+
+ initial_descent = true;
goto descend;
}
/*
- * If the active page was the root, we've reached the walk's end.
- * Release any hazard-pointer we're holding.
+ * If the active page was the root, we've reached the walk's end; we
+ * only get here if we've returned the root to our caller, so we're
+ * holding no hazard pointers.
*/
- if (__wt_ref_is_root(ref)) {
- WT_ERR(__wt_page_release(session, couple, flags));
+ if (__wt_ref_is_root(ref))
goto done;
- }
/* Figure out the current slot in the WT_REF array. */
__ref_index_slot(session, ref, &pindex, &slot);
@@ -352,7 +378,7 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
while ((prev && slot == 0) ||
(!prev && slot == pindex->entries - 1)) {
/* Ascend to the parent. */
- __page_ascend(session, &ref, &pindex, &slot);
+ __ref_ascend(session, &ref, &pindex, &slot);
/*
* If we got all the way through an internal page and
@@ -521,16 +547,21 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
ret = 0;
/*
+ * If a cursor is setting up at the end of the
+ * tree, we can't use our parent page's index,
+ * because it may have already split; restart
+ * the walk.
+ */
+ if (prev && initial_descent)
+ goto restart;
+
+ /*
* If a new walk that never coupled from the
* root to a new saved position in the tree,
* restart the walk.
*/
- if (couple == &btree->root) {
- ref = &btree->root;
- if (ref->page == NULL)
- goto done;
- goto descend;
- }
+ if (couple == &btree->root)
+ goto restart;
/*
* If restarting from some original position,
@@ -561,10 +592,56 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
descend: couple = ref;
empty_internal = true;
- __page_descend(
- session, ref->page, &pindex, &slot, prev);
+ /*
+ * There's a split race when a cursor is setting
+ * up at the end of the tree or moving backwards
+ * through the tree and descending a level. When
+ * splitting an internal page into its parent,
+ * we move the WT_REF structures and update the
+ * parent's page index before updating the split
+ * page's page index, and it's not an atomic
+ * update. A thread can read the parent page's
+ * replacement page index, then read the split
+ * page's original index, or the parent page's
+ * original and the split page's replacement.
+ *
+ * This isn't a problem for a cursor setting up
+ * at the start of the tree or moving forwards
+ * through the tree because we do right-hand
+ * splits on internal pages and the initial part
+ * of the split page's namespace won't change as
+ * part of a split. A thread reading the parent
+ * page's and split page's indexes will move to
+ * the same slot no matter what order of indexes
+ * are read.
+ *
+ * Handle a cursor setting up at the end of the
+ * tree or moving backwards through the tree.
+ */
+ if (!prev) {
+ WT_INTL_INDEX_GET(
+ session, ref->page, pindex);
+ slot = 0;
+ } else if (initial_descent) {
+ if (!__ref_initial_descent_prev(
+ session, ref, &pindex))
+ goto restart;
+ slot = pindex->entries - 1;
+ } else {
+ __ref_descend_prev(
+ session, ref, &pindex);
+ slot = pindex->entries - 1;
+ }
} else {
/*
+ * At the lowest tree level (considering a leaf
+ * page), turn off the initial-descent state.
+ * Descent race tests are different when moving
+ * through the tree vs. the initial descent.
+ */
+ initial_descent = false;
+
+ /*
* Optionally skip leaf pages, the second half.
* We didn't have an on-page cell to figure out
* if it was a leaf page, we had to acquire the
@@ -605,7 +682,7 @@ __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags)
/*
* __wt_tree_walk_count --
* Move to the next/previous page in the tree, tracking how many
- * references were visited to get there.
+ * references were visited to get there.
*/
int
__wt_tree_walk_count(WT_SESSION_IMPL *session,
diff --git a/src/btree/col_srch.c b/src/btree/col_srch.c
index cb5a227495f..3aa31044b82 100644
--- a/src/btree/col_srch.c
+++ b/src/btree/col_srch.c
@@ -137,12 +137,12 @@ restart_page: page = current->page;
* If on the last slot (the key is larger than any key
* on the page), check for an internal page split race.
*/
- if (parent_pindex != NULL &&
- __wt_split_intl_race(
- session, current->home, parent_pindex)) {
+ if (__wt_split_descent_race(
+ session, current, parent_pindex)) {
WT_RET(__wt_page_release(session, current, 0));
goto restart_root;
}
+
goto descend;
}
diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c
index c06274cdb17..28c55a4ccd0 100644
--- a/src/btree/row_srch.c
+++ b/src/btree/row_srch.c
@@ -287,9 +287,26 @@ restart_page: page = current->page;
WT_INTL_INDEX_GET(session, page, pindex);
- /* Fast-path appends. */
+ /*
+ * Fast-path appends.
+ *
+ * The 0th key on an internal page is a problem for a couple of
+ * reasons. First, we have to force the 0th key to sort less
+ * than any application key, so internal pages don't have to be
+ * updated if the application stores a new, "smallest" key in
+ * the tree. Second, reconciliation is aware of this and will
+ * store a byte of garbage in the 0th key, so the comparison of
+ * an application key and a 0th key is meaningless (but doing
+ * the comparison could still incorrectly modify our tracking
+ * of the leading bytes in each key that we can skip during the
+ * comparison). For these reasons, special-case the 0th key, and
+ * never pass it to a collator.
+ */
if (append_check) {
descent = pindex->index[pindex->entries - 1];
+
+ if (pindex->entries == 1)
+ goto append;
__wt_ref_key(page, descent, &item->data, &item->size);
WT_ERR(__wt_compare(
session, collator, srch_key, item, &cmp));
@@ -307,16 +324,8 @@ restart_page: page = current->page;
* collation order), because doing the tests and error handling
* inside the loop costs about 5%.
*
- * The 0th key on an internal page is a problem for a couple of
- * reasons. First, we have to force the 0th key to sort less
- * than any application key, so internal pages don't have to be
- * updated if the application stores a new, "smallest" key in
- * the tree. Second, reconciliation is aware of this and will
- * store a byte of garbage in the 0th key, so the comparison of
- * an application key and a 0th key is meaningless (but doing
- * the comparison could still incorrectly modify our tracking
- * of the leading bytes in each key that we can skip during the
- * comparison). For these reasons, skip the 0th key.
+ * Reference the comment above about the 0th key: we continue to
+ * special-case it.
*/
base = 1;
limit = pindex->entries - 1;
@@ -409,9 +418,8 @@ restart_page: page = current->page;
* page), check for an internal page split race.
*/
if (pindex->entries == base) {
-append: if (parent_pindex != NULL &&
- __wt_split_intl_race(
- session, current->home, parent_pindex)) {
+append: if (__wt_split_descent_race(
+ session, current, parent_pindex)) {
if ((ret = __wt_page_release(
session, current, 0)) != 0)
return (ret);
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index 2f62950a36e..27977de63b2 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -402,7 +402,7 @@ __wt_encryptor_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval,
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
- WT_ENCRYPTOR *encryptor;
+ WT_ENCRYPTOR *custom, *encryptor;
WT_KEYED_ENCRYPTOR *kenc;
WT_NAMED_ENCRYPTOR *nenc;
uint64_t bucket, hash;
@@ -440,12 +440,13 @@ __wt_encryptor_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval,
WT_ERR(__wt_strndup(session, keyid->str, keyid->len, &kenc->keyid));
encryptor = nenc->encryptor;
if (encryptor->customize != NULL) {
+ custom = NULL;
WT_ERR(encryptor->customize(encryptor, &session->iface,
- cfg_arg, &encryptor));
- if (encryptor == NULL)
- encryptor = nenc->encryptor;
- else
+ cfg_arg, &custom));
+ if (custom != NULL) {
kenc->owned = 1;
+ encryptor = custom;
+ }
}
WT_ERR(encryptor->sizing(encryptor, &session->iface,
&kenc->size_const));
@@ -2065,6 +2066,8 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
* DATABASE HOME, IT'S WHAT WE USE TO DECIDE IF WE'RE CREATING OR NOT.
*/
WT_ERR(__wt_turtle_init(session));
+
+ __wt_metadata_init(session);
WT_ERR(__wt_metadata_cursor(session, NULL));
/* Start the worker threads and run recovery. */
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c
index dedafc2b102..60136a71b99 100644
--- a/src/conn/conn_dhandle.c
+++ b/src/conn/conn_dhandle.c
@@ -421,7 +421,7 @@ __wt_conn_btree_apply(WT_SESSION_IMPL *session,
(apply_checkpoints ||
dhandle->checkpoint == NULL) &&
WT_PREFIX_MATCH(dhandle->name, "file:") &&
- !WT_IS_METADATA(dhandle))
+ !WT_IS_METADATA(session, dhandle))
WT_RET(__conn_btree_apply_internal(
session, dhandle, func, cfg));
}
@@ -644,7 +644,7 @@ __wt_conn_dhandle_discard_single(
F_SET(S2C(session)->cache, WT_CACHE_CLEAR_WALKS);
/* Try to remove the handle, protected by the data handle lock. */
- WT_WITH_HANDLE_LIST_LOCK(session, tret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
tret = __conn_dhandle_remove(session, final));
WT_TRET(tret);
@@ -686,7 +686,7 @@ __wt_conn_dhandle_discard(WT_SESSION_IMPL *session)
*/
restart:
TAILQ_FOREACH(dhandle, &conn->dhqh, q) {
- if (WT_IS_METADATA(dhandle))
+ if (WT_IS_METADATA(session, dhandle))
continue;
WT_WITH_DHANDLE(session, dhandle,
diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c
index ed226393fb0..60f46288072 100644
--- a/src/conn/conn_log.c
+++ b/src/conn/conn_log.c
@@ -180,9 +180,10 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file)
* disk and the checkpoint LSN.
*/
if (backup_file != 0)
- min_lognum = WT_MIN(log->ckpt_lsn.file, backup_file);
+ min_lognum = WT_MIN(log->ckpt_lsn.l.file, backup_file);
else
- min_lognum = WT_MIN(log->ckpt_lsn.file, log->sync_lsn.file);
+ min_lognum = WT_MIN(
+ log->ckpt_lsn.l.file, log->sync_lsn.l.file);
WT_RET(__wt_verbose(session, WT_VERB_LOG,
"log_archive: archive to log number %" PRIu32, min_lognum));
@@ -218,8 +219,7 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file)
* Indicate what is our new earliest LSN. It is the start
* of the log file containing the last checkpoint.
*/
- log->first_lsn.file = min_lognum;
- log->first_lsn.offset = 0;
+ WT_SET_LSN(&log->first_lsn, min_lognum, 0);
if (0)
err: __wt_err(session, ret, "log archive server error");
@@ -317,7 +317,7 @@ __wt_log_truncate_files(
backup_file = 0;
if (cursor != NULL)
backup_file = WT_CURSOR_BACKUP_ID(cursor);
- WT_ASSERT(session, backup_file <= log->alloc_lsn.file);
+ WT_ASSERT(session, backup_file <= log->alloc_lsn.l.file);
WT_RET(__wt_verbose(session, WT_VERB_LOG,
"log_truncate_files: Archive once up to %" PRIu32,
backup_file));
@@ -367,7 +367,7 @@ __log_file_server(void *arg)
* could see mismatched settings. If we do, yield
* until it is set. This should rarely happen.
*/
- while (log->log_close_lsn.file < filenum)
+ while (log->log_close_lsn.l.file < filenum)
__wt_yield();
if (__wt_log_cmp(
@@ -398,10 +398,10 @@ __log_file_server(void *arg)
* actual data and has minimal pre-allocated
* zeroed space.
*/
- WT_ERR(__wt_ftruncate(
- session, close_fh, close_end_lsn.offset));
- close_end_lsn.file++;
- close_end_lsn.offset = 0;
+ WT_ERR(__wt_ftruncate(session,
+ close_fh, close_end_lsn.l.offset));
+ WT_SET_LSN(&close_end_lsn,
+ close_end_lsn.l.file + 1, 0);
__wt_spin_lock(session, &log->log_sync_lock);
locked = true;
WT_ERR(__wt_close(session, &close_fh));
@@ -440,9 +440,9 @@ __log_file_server(void *arg)
* this worker thread process that older file
* immediately.
*/
- if ((log->sync_lsn.file <
- log->bg_sync_lsn.file) ||
- (log->sync_lsn.file < min_lsn.file))
+ if ((log->sync_lsn.l.file <
+ log->bg_sync_lsn.l.file) ||
+ (log->sync_lsn.l.file < min_lsn.l.file))
continue;
WT_ERR(__wt_fsync(session, log->log_fh));
__wt_spin_lock(session, &log->log_sync_lock);
@@ -454,7 +454,8 @@ __log_file_server(void *arg)
if (__wt_log_cmp(
&log->sync_lsn, &min_lsn) <= 0) {
WT_ASSERT(session,
- min_lsn.file == log->sync_lsn.file);
+ min_lsn.l.file ==
+ log->sync_lsn.l.file);
log->sync_lsn = min_lsn;
WT_ERR(__wt_cond_signal(
session, log->log_sync_cond));
@@ -500,9 +501,9 @@ typedef struct {
* Return comparison of a written slot pair by LSN.
*/
#define WT_WRLSN_ENTRY_CMP_LT(entry1, entry2) \
- ((entry1).lsn.file < (entry2).lsn.file || \
- ((entry1).lsn.file == (entry2).lsn.file && \
- (entry1).lsn.offset < (entry2).lsn.offset))
+ ((entry1).lsn.l.file < (entry2).lsn.l.file || \
+ ((entry1).lsn.l.file == (entry2).lsn.l.file && \
+ (entry1).lsn.l.offset < (entry2).lsn.l.offset))
/*
* __wt_log_wrlsn --
@@ -539,7 +540,7 @@ restart:
save_i = i;
slot = &log->slot_pool[i++];
WT_ASSERT(session, slot->slot_state != 0 ||
- slot->slot_release_lsn.file >= log->write_lsn.file);
+ slot->slot_release_lsn.l.file >= log->write_lsn.l.file);
if (slot->slot_state != WT_LOG_SLOT_WRITTEN)
continue;
written[written_i].slot_index = save_i;
@@ -629,9 +630,9 @@ restart:
* the checkpoint LSN is close to the end of
* the record.
*/
- if (slot->slot_start_lsn.offset !=
+ if (slot->slot_start_lsn.l.offset !=
slot->slot_last_offset)
- slot->slot_start_lsn.offset =
+ slot->slot_start_lsn.l.offset =
slot->slot_last_offset;
log->write_start_lsn = slot->slot_start_lsn;
log->write_lsn = slot->slot_end_lsn;
diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c
index 9edc6091b10..08ad105c725 100644
--- a/src/conn/conn_stat.c
+++ b/src/conn/conn_stat.c
@@ -340,7 +340,7 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp)
* any that match the list of object sources.
*/
if (conn->stat_sources != NULL) {
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_conn_btree_apply(
session, false, NULL, __statlog_apply, NULL));
WT_RET(ret);
diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c
index a15aabdd6fe..7628076e605 100644
--- a/src/conn/conn_sweep.c
+++ b/src/conn/conn_sweep.c
@@ -26,7 +26,7 @@ __sweep_mark(WT_SESSION_IMPL *session, time_t now)
conn = S2C(session);
TAILQ_FOREACH(dhandle, &conn->dhqh, q) {
- if (WT_IS_METADATA(dhandle))
+ if (WT_IS_METADATA(session, dhandle))
continue;
/*
@@ -124,7 +124,7 @@ __sweep_expire(WT_SESSION_IMPL *session, time_t now)
if (conn->open_btree_count < conn->sweep_handles_min)
break;
- if (WT_IS_METADATA(dhandle) ||
+ if (WT_IS_METADATA(session, dhandle) ||
!F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
dhandle->session_inuse != 0 ||
dhandle->timeofdeath == 0 ||
@@ -230,12 +230,12 @@ __sweep_remove_handles(WT_SESSION_IMPL *session)
dhandle != NULL;
dhandle = dhandle_next) {
dhandle_next = TAILQ_NEXT(dhandle, q);
- if (WT_IS_METADATA(dhandle))
+ if (WT_IS_METADATA(session, dhandle))
continue;
if (!WT_DHANDLE_CAN_DISCARD(dhandle))
continue;
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __sweep_remove_one(session, dhandle));
if (ret == 0)
WT_STAT_FAST_CONN_INCR(session, dh_sweep_remove);
diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c
index 6d5d68000ee..d7d74da48d4 100644
--- a/src/cursor/cur_backup.c
+++ b/src/cursor/cur_backup.c
@@ -377,7 +377,7 @@ __backup_all(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
WT_ERR(__wt_metadata_cursor_release(session, &cursor));
/* Build a list of the file objects that need to be copied. */
- WT_WITH_HANDLE_LIST_LOCK(session, ret, ret =
+ WT_WITH_HANDLE_LIST_LOCK(session, ret =
__wt_meta_btree_apply(session, __backup_list_all_append, NULL));
err: WT_TRET(__wt_metadata_cursor_release(session, &cursor));
diff --git a/src/cursor/cur_log.c b/src/cursor/cur_log.c
index 35a2d00e6ec..3fcd8a86066 100644
--- a/src/cursor/cur_log.c
+++ b/src/cursor/cur_log.c
@@ -187,13 +187,13 @@ __curlog_kv(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
if (FLD_ISSET(cursor->flags, WT_CURSTD_RAW)) {
memset(&item, 0, sizeof(item));
WT_RET(wiredtiger_struct_size((WT_SESSION *)session,
- &item.size, WT_LOGC_KEY_FORMAT, cl->cur_lsn->file,
- cl->cur_lsn->offset, key_count));
+ &item.size, WT_LOGC_KEY_FORMAT, cl->cur_lsn->l.file,
+ cl->cur_lsn->l.offset, key_count));
WT_RET(__wt_realloc(session, NULL, item.size, &cl->packed_key));
item.data = cl->packed_key;
WT_RET(wiredtiger_struct_pack((WT_SESSION *)session,
cl->packed_key, item.size, WT_LOGC_KEY_FORMAT,
- cl->cur_lsn->file, cl->cur_lsn->offset, key_count));
+ cl->cur_lsn->l.file, cl->cur_lsn->l.offset, key_count));
__wt_cursor_set_key(cursor, &item);
WT_RET(wiredtiger_struct_size((WT_SESSION *)session,
@@ -208,8 +208,8 @@ __curlog_kv(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
cl->opvalue));
__wt_cursor_set_value(cursor, &item);
} else {
- __wt_cursor_set_key(cursor, cl->cur_lsn->file,
- cl->cur_lsn->offset, key_count);
+ __wt_cursor_set_key(cursor, cl->cur_lsn->l.file,
+ cl->cur_lsn->l.offset, key_count);
__wt_cursor_set_value(cursor, cl->txnid, cl->rectype, optype,
fileid, cl->opkey, cl->opvalue);
}
@@ -264,7 +264,7 @@ __curlog_search(WT_CURSOR *cursor)
WT_DECL_RET;
WT_LSN key;
WT_SESSION_IMPL *session;
- uint32_t counter;
+ uint32_t counter, key_file, key_offset;
cl = (WT_CURSOR_LOG *)cursor;
@@ -274,7 +274,8 @@ __curlog_search(WT_CURSOR *cursor)
* !!! We are ignoring the counter and only searching based on the LSN.
*/
WT_ERR(__wt_cursor_get_key((WT_CURSOR *)cl,
- &key.file, &key.offset, &counter));
+ &key_file, &key_offset, &counter));
+ WT_SET_LSN(&key, key_file, key_offset);
ret = __wt_log_scan(session, &key, WT_LOGSCAN_ONE,
__curlog_logrec, cl);
if (ret == ENOENT)
diff --git a/src/docs/build-posix.dox b/src/docs/build-posix.dox
index f61b199bff2..4889bf931c9 100644
--- a/src/docs/build-posix.dox
+++ b/src/docs/build-posix.dox
@@ -116,10 +116,6 @@ The WiredTiger software supports some additional configuration options:
Configure WiredTiger to sleep and wait for a debugger to attach on failure.
<b>DO NOT</b> configure this option in production environments.
-@par \c --enable-bzip2
-Configure WiredTiger for <a href="http://www.bzip.org/">bzip2</a>
-compression; see @ref compression for more information.
-
@par \c --enable-diagnostic
Configure WiredTiger to perform various run-time diagnostic tests.
<b>DO NOT</b> configure this option in production environments.
diff --git a/src/docs/compression.dox b/src/docs/compression.dox
index 56715e20752..0be96835760 100644
--- a/src/docs/compression.dox
+++ b/src/docs/compression.dox
@@ -1,36 +1,7 @@
/*! @m_page{{c,java},compression,Compressors}
This section explains how to configure WiredTiger's builtin support for
-the bzip2, lz4, snappy and zlib compression engines.
-
-@section compression_bzip2 Using bzip2 compression
-
-To use the builtin support for
-<a href="http://www.bzip.org/">Julian Seward's bzip2</a>
-compression, first check that bzip2 is installed in include and library
-directories searched by the compiler. Once bzip2 is installed, you can
-enable bzip2 using the \c --enable-bzip2 option to configure.
-
-If bzip2 is installed in a location not normally searched by the
-compiler toolchain, you'll need to modify the \c CPPFLAGS and \c LDFLAGS
-to indicate these locations. For example, with the bzip2 includes and
-libraries installed in \c /usr/local/include and \c /usr/local/lib, you
-would run configure with the following additional arguments:
-
-@code
---enable-bzip2 CPPFLAGS="-I/usr/local/include" LDFLAGS="-L/usr/local/include"
-@endcode
-
-When opening the WiredTiger database, load the bzip2 shared library as
-an extension. For example, with the WiredTiger library installed in
-\c /usr/local/lib, you would use the following extension:
-
-@snippet ex_all.c Configure bzip2 extension
-
-Finally, when creating the WiredTiger object, set \c block_compressor
-to \c bzip2:
-
-@snippet ex_all.c Create a bzip2 compressed table
+the lz4, snappy and zlib compression engines.
@section compression_lz4 Using LZ4 compression
diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox
index e0239919f0b..e4d85003a1e 100644
--- a/src/docs/upgrading.dox
+++ b/src/docs/upgrading.dox
@@ -27,6 +27,13 @@ reclaimed when the call returns. The performance of this API may differ
from earlier releases.
</dd>
+<dt>Bzip2 compression support</dt>
+<dd>
+Support for the bzip2 compression/decompression engine has been removed
+from the WiredTiger release; remaining compression engines include LZ4,
+snappy and zlib.
+</dd>
+
</dl><hr>
@section version_270 Upgrading to Version 2.7.0
diff --git a/src/docs/wtperf.dox b/src/docs/wtperf.dox
index 64e25978dd8..1f0d1533ac4 100644
--- a/src/docs/wtperf.dox
+++ b/src/docs/wtperf.dox
@@ -160,7 +160,7 @@ connection configuration string
post-populate compact for LSM merging activity
@par compression (string, default=none)
compression extension. Allowed configuration values are: 'none',
-'bzip', 'lz4', 'snappy', 'zlib'
+'lz4', 'snappy', 'zlib'
@par create (boolean, default=true)
do population phase; false to use existing database
@par database_count (unsigned int, default=1)
diff --git a/src/include/block.h b/src/include/block.h
index 27a140b73a4..10efd35086c 100644
--- a/src/include/block.h
+++ b/src/include/block.h
@@ -289,6 +289,23 @@ struct __wt_block_desc {
#define WT_BLOCK_DESC_SIZE 16
/*
+ * __wt_block_desc_byteswap --
+ * Handle big- and little-endian transformation of a description block.
+ */
+static inline void
+__wt_block_desc_byteswap(WT_BLOCK_DESC *desc)
+{
+#ifdef WORDS_BIGENDIAN
+ desc->magic = __wt_bswap32(desc->magic);
+ desc->majorv = __wt_bswap16(desc->majorv);
+ desc->minorv = __wt_bswap16(desc->minorv);
+ desc->cksum = __wt_bswap32(desc->cksum);
+#else
+ WT_UNUSED(desc);
+#endif
+}
+
+/*
* WT_BLOCK_HEADER --
* Blocks have a common header, a WT_PAGE_HEADER structure followed by a
* block-manager specific structure: WT_BLOCK_HEADER is WiredTiger's default.
@@ -331,6 +348,35 @@ struct __wt_block_header {
#define WT_BLOCK_HEADER_SIZE 12
/*
+ * __wt_block_header_byteswap_copy --
+ * Handle big- and little-endian transformation of a header block,
+ * copying from a source to a target.
+ */
+static inline void
+__wt_block_header_byteswap_copy(WT_BLOCK_HEADER *from, WT_BLOCK_HEADER *to)
+{
+ *to = *from;
+#ifdef WORDS_BIGENDIAN
+ to->disk_size = __wt_bswap32(from->disk_size);
+ to->cksum = __wt_bswap32(from->cksum);
+#endif
+}
+
+/*
+ * __wt_block_header_byteswap --
+ * Handle big- and little-endian transformation of a header block.
+ */
+static inline void
+__wt_block_header_byteswap(WT_BLOCK_HEADER *blk)
+{
+#ifdef WORDS_BIGENDIAN
+ __wt_block_header_byteswap_copy(blk, blk);
+#else
+ WT_UNUSED(blk);
+#endif
+}
+
+/*
* WT_BLOCK_HEADER_BYTE
* WT_BLOCK_HEADER_BYTE_SIZE --
* The first usable data byte on the block (past the combined headers).
diff --git a/src/include/btmem.h b/src/include/btmem.h
index cfbd87f0cae..ee495c52fc8 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -62,6 +62,23 @@ struct __wt_page_header {
#define WT_PAGE_HEADER_SIZE 28
/*
+ * __wt_page_header_byteswap --
+ * Handle big- and little-endian transformation of a page header.
+ */
+static inline void
+__wt_page_header_byteswap(WT_PAGE_HEADER *dsk)
+{
+#ifdef WORDS_BIGENDIAN
+ dsk->recno = __wt_bswap64(dsk->recno);
+ dsk->write_gen = __wt_bswap64(dsk->write_gen);
+ dsk->mem_size = __wt_bswap32(dsk->mem_size);
+ dsk->u.entries = __wt_bswap32(dsk->u.entries);
+#else
+ WT_UNUSED(dsk);
+#endif
+}
+
+/*
* The block-manager specific information immediately follows the WT_PAGE_HEADER
* structure.
*/
diff --git a/src/include/btree.i b/src/include/btree.i
index 94111397abd..b4b4d7f25a2 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -1294,19 +1294,19 @@ __wt_page_swap_func(
bool acquired;
/*
- * In rare cases when walking the tree, we try to swap to the same
- * page. Fast-path that to avoid thinking about error handling.
- */
- if (held == want)
- return (0);
-
- /*
* This function is here to simplify the error handling during hazard
* pointer coupling so we never leave a hazard pointer dangling. The
* assumption is we're holding a hazard pointer on "held", and want to
* acquire a hazard pointer on "want", releasing the hazard pointer on
* "held" when we're done.
+ *
+ * When walking the tree, we sometimes swap to the same page. Fast-path
+ * that to avoid thinking about error handling.
*/
+ if (held == want)
+ return (0);
+
+ /* Get the wanted page. */
ret = __wt_page_in_func(session, want, flags
#ifdef HAVE_DIAGNOSTIC
, file, line
@@ -1446,15 +1446,19 @@ __wt_btree_lsm_over_size(WT_SESSION_IMPL *session, uint64_t maxsize)
}
/*
- * __wt_split_intl_race --
+ * __wt_split_descent_race --
* Return if we raced with an internal page split when descending the tree.
*/
static inline bool
-__wt_split_intl_race(
- WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE_INDEX *saved_pindex)
+__wt_split_descent_race(
+ WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX *saved_pindex)
{
WT_PAGE_INDEX *pindex;
+ /* No test when starting the descent (there's no home to check). */
+ if (__wt_ref_is_root(ref))
+ return (false);
+
/*
* A place to hang this comment...
*
@@ -1509,6 +1513,6 @@ __wt_split_intl_race(
* content the split page retains after the split, and we ignore this
* race.
*/
- WT_INTL_INDEX_GET(session, parent, pindex);
+ WT_INTL_INDEX_GET(session, ref->home, pindex);
return (pindex != saved_pindex);
}
diff --git a/src/include/connection.h b/src/include/connection.h
index 5d61f9456b3..88797e83ad6 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -364,6 +364,7 @@ struct __wt_connection_impl {
uint32_t txn_logsync; /* Log sync configuration */
WT_SESSION_IMPL *meta_ckpt_session;/* Metadata checkpoint session */
+ uint64_t meta_uri_hash; /* Metadata file name hash */
WT_SESSION_IMPL *sweep_session; /* Handle sweep session */
wt_thread_t sweep_tid; /* Handle sweep thread */
diff --git a/src/include/extern.h b/src/include/extern.h
index b71f4b12486..1999ff6b732 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -455,6 +455,7 @@ extern int __wt_ext_metadata_search(WT_EXTENSION_API *wt_api, WT_SESSION *wt_ses
extern int __wt_ext_metadata_update(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value);
extern int __wt_metadata_get_ckptlist( WT_SESSION *session, const char *name, WT_CKPT **ckptbasep);
extern void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase);
+extern void __wt_metadata_init(WT_SESSION_IMPL *session);
extern int __wt_metadata_cursor_open( WT_SESSION_IMPL *session, const char *config, WT_CURSOR **cursorp);
extern int __wt_metadata_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp);
extern int __wt_metadata_cursor_release(WT_SESSION_IMPL *session, WT_CURSOR **cursorp);
diff --git a/src/include/log.h b/src/include/log.h
index 577f6a888a3..0e676d47b66 100644
--- a/src/include/log.h
+++ b/src/include/log.h
@@ -6,6 +6,23 @@
* See the file LICENSE for redistribution information.
*/
+/*
+ * WT_LSN --
+ * A log sequence number, representing a position in the transaction log.
+ */
+union __wt_lsn {
+ struct {
+#ifdef WORDS_BIGENDIAN
+ uint32_t file;
+ uint32_t offset;
+#else
+ uint32_t offset;
+ uint32_t file;
+#endif
+ } l;
+ uint64_t file_offset;
+};
+
#define WT_LOG_FILENAME "WiredTigerLog" /* Log file name */
#define WT_LOG_PREPNAME "WiredTigerPreplog" /* Log pre-allocated name */
#define WT_LOG_TMPNAME "WiredTigerTmplog" /* Log temporary name */
@@ -13,32 +30,33 @@
/* Logging subsystem declarations. */
#define WT_LOG_ALIGN 128
-#define WT_INIT_LSN(l) do { \
- (l)->file = 1; \
- (l)->offset = 0; \
-} while (0)
+/*
+ * Atomically set the two components of the LSN.
+ */
+#define WT_SET_LSN(l, f, o) (l)->file_offset = (((uint64_t)(f) << 32) + (o))
-#define WT_MAX_LSN(l) do { \
- (l)->file = UINT32_MAX; \
- (l)->offset = INT64_MAX; \
-} while (0)
+#define WT_INIT_LSN(l) WT_SET_LSN((l), 1, 0)
-#define WT_ZERO_LSN(l) do { \
- (l)->file = 0; \
- (l)->offset = 0; \
-} while (0)
+#define WT_MAX_LSN(l) WT_SET_LSN((l), UINT32_MAX, INT32_MAX)
-#define WT_IS_INIT_LSN(l) \
- ((l)->file == 1 && (l)->offset == 0)
-#define WT_IS_MAX_LSN(l) \
- ((l)->file == UINT32_MAX && (l)->offset == INT64_MAX)
+#define WT_ZERO_LSN(l) WT_SET_LSN((l), 0, 0)
+
+/*
+ * Initialize LSN is (1,0). We only need to shift the 1 for comparison.
+ */
+#define WT_IS_INIT_LSN(l) ((l)->file_offset == ((uint64_t)1 << 32))
+/*
+ * XXX Original tested INT32_MAX.
+ */
+#define WT_IS_MAX_LSN(lsn) \
+ ((lsn)->l.file == UINT32_MAX && (lsn)->l.offset == INT32_MAX)
/*
* Both of the macros below need to change if the content of __wt_lsn
* ever changes. The value is the following:
* txnid, record type, operation type, file id, operation key, operation value
*/
-#define WT_LOGC_KEY_FORMAT WT_UNCHECKED_STRING(IqI)
+#define WT_LOGC_KEY_FORMAT WT_UNCHECKED_STRING(III)
#define WT_LOGC_VALUE_FORMAT WT_UNCHECKED_STRING(qIIIuu)
#define WT_LOG_SKIP_HEADER(data) \
@@ -253,6 +271,24 @@ struct __wt_log_record {
};
/*
+ * __wt_log_record_byteswap --
+ * Handle big- and little-endian transformation of the log record
+ * header block.
+ */
+static inline void
+__wt_log_record_byteswap(WT_LOG_RECORD *record)
+{
+#ifdef WORDS_BIGENDIAN
+ record->len = __wt_bswap32(record->len);
+ record->checksum = __wt_bswap32(record->checksum);
+ record->flags = __wt_bswap16(record->flags);
+ record->mem_len = __wt_bswap32(record->mem_len);
+#else
+ WT_UNUSED(record);
+#endif
+}
+
+/*
* WT_LOG_DESC --
* The log file's description.
*/
@@ -267,6 +303,24 @@ struct __wt_log_desc {
};
/*
+ * __wt_log_desc_byteswap --
+ * Handle big- and little-endian transformation of the log file
+ * description block.
+ */
+static inline void
+__wt_log_desc_byteswap(WT_LOG_DESC *desc)
+{
+#ifdef WORDS_BIGENDIAN
+ desc->log_magic = __wt_bswap32(desc->log_magic);
+ desc->majorv = __wt_bswap16(desc->majorv);
+ desc->minorv = __wt_bswap16(desc->minorv);
+ desc->log_size = __wt_bswap64(desc->log_size);
+#else
+ WT_UNUSED(desc);
+#endif
+}
+
+/*
* Flags for __wt_txn_op_printlog.
*/
#define WT_TXN_PRINTLOG_HEX 0x0001 /* Add hex output */
diff --git a/src/include/log.i b/src/include/log.i
index fcdbc72c388..9e6c36291f7 100644
--- a/src/include/log.i
+++ b/src/include/log.i
@@ -16,25 +16,14 @@ static inline int __wt_log_cmp(WT_LSN *lsn1, WT_LSN *lsn2);
static inline int
__wt_log_cmp(WT_LSN *lsn1, WT_LSN *lsn2)
{
- WT_LSN l1, l2;
+ uint64_t l1, l2;
/*
* Read LSNs into local variables so that we only read each field
* once and all comparisons are on the same values.
*/
- l1 = *(volatile WT_LSN *)lsn1;
- l2 = *(volatile WT_LSN *)lsn2;
+ l1 = ((volatile WT_LSN *)lsn1)->file_offset;
+ l2 = ((volatile WT_LSN *)lsn2)->file_offset;
- /*
- * If the file numbers are different we don't need to compare the
- * offset.
- */
- if (l1.file != l2.file)
- return (l1.file < l2.file ? -1 : 1);
- /*
- * If the file numbers are the same, compare the offset.
- */
- if (l1.offset != l2.offset)
- return (l1.offset < l2.offset ? -1 : 1);
- return (0);
+ return (l1 < l2 ? -1 : (l1 > l2 ? 1 : 0));
}
diff --git a/src/include/meta.h b/src/include/meta.h
index e29ec4202dc..d61022c0c44 100644
--- a/src/include/meta.h
+++ b/src/include/meta.h
@@ -30,9 +30,8 @@
* against the metafile URI. The validity is checked on connection open
* when diagnostic is enabled.
*/
-#define WT_METAFILE_NAME_HASH 1045034099109282882LLU /* Metadata file hash */
-#define WT_IS_METADATA(dh) \
- ((dh)->name_hash == WT_METAFILE_NAME_HASH && \
+#define WT_IS_METADATA(session, dh) \
+ ((dh)->name_hash == S2C(session)->meta_uri_hash && \
strcmp((dh)->name, WT_METAFILE_URI) == 0)
#define WT_METAFILE_ID 0 /* Metadata file ID */
diff --git a/src/include/misc.h b/src/include/misc.h
index 78997661851..5dadb1b1484 100644
--- a/src/include/misc.h
+++ b/src/include/misc.h
@@ -48,6 +48,9 @@
#define WT_ALIGN(n, v) \
((((uintmax_t)(n)) + ((v) - 1)) & ~(((uintmax_t)(v)) - 1))
+#define WT_ALIGN_NEAREST(n, v) \
+ ((((uintmax_t)(n)) + ((v) / 2)) & ~(((uintmax_t)(v)) - 1))
+
/* Min, max. */
#define WT_MIN(a, b) ((a) < (b) ? (a) : (b))
#define WT_MAX(a, b) ((a) < (b) ? (b) : (a))
diff --git a/src/include/schema.h b/src/include/schema.h
index 88a3a39f8b3..a51030870c1 100644
--- a/src/include/schema.h
+++ b/src/include/schema.h
@@ -79,27 +79,37 @@ struct __wt_table {
#define WT_COLGROUPS(t) WT_MAX((t)->ncolgroups, 1)
/*
+ * WT_WITH_LOCK_WAIT --
+ * Wait for a lock, perform an operation, drop the lock.
+ */
+#define WT_WITH_LOCK_WAIT(session, lock, flag, op) do { \
+ if (F_ISSET(session, (flag))) { \
+ op; \
+ } else { \
+ __wt_spin_lock(session, (lock)); \
+ F_SET(session, (flag)); \
+ op; \
+ F_CLR(session, (flag)); \
+ __wt_spin_unlock(session, (lock)); \
+ } \
+} while (0)
+
+/*
* WT_WITH_LOCK --
* Acquire a lock, perform an operation, drop the lock.
*/
#define WT_WITH_LOCK(session, ret, lock, flag, op) do { \
ret = 0; \
- if (F_ISSET(session, (flag))) { \
- op; \
- } else if (F_ISSET(session, WT_SESSION_LOCK_NO_WAIT)) { \
+ if (!F_ISSET(session, (flag)) && \
+ F_ISSET(session, WT_SESSION_LOCK_NO_WAIT)) { \
if ((ret = __wt_spin_trylock(session, (lock))) == 0) { \
F_SET(session, (flag)); \
op; \
F_CLR(session, (flag)); \
__wt_spin_unlock(session, (lock)); \
} \
- } else { \
- __wt_spin_lock(session, (lock)); \
- F_SET(session, (flag)); \
- op; \
- F_CLR(session, (flag)); \
- __wt_spin_unlock(session, (lock)); \
- } \
+ } else \
+ WT_WITH_LOCK_WAIT(session, lock, flag, op); \
} while (0)
/*
@@ -113,10 +123,15 @@ struct __wt_table {
/*
* WT_WITH_HANDLE_LIST_LOCK --
* Acquire the data handle list lock, perform an operation, drop the lock.
+ *
+ * Note: always waits because some operations need the handle list lock to
+ * discard handles, and we only expect it to be held across short
+ * operations.
*/
-#define WT_WITH_HANDLE_LIST_LOCK(session, ret, op) \
- WT_WITH_LOCK(session, ret, \
+#define WT_WITH_HANDLE_LIST_LOCK(session, op) \
+ WT_WITH_LOCK_WAIT(session, \
&S2C(session)->dhandle_lock, WT_SESSION_LOCKED_HANDLE_LIST, op)
+
/*
* WT_WITH_SCHEMA_LOCK --
* Acquire the schema lock, perform an operation, drop the lock.
diff --git a/src/include/swap.h b/src/include/swap.h
new file mode 100644
index 00000000000..2040ca88a77
--- /dev/null
+++ b/src/include/swap.h
@@ -0,0 +1,92 @@
+/*-
+ * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ * All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1300)
+#include <stdlib.h>
+#define __wt_bswap16(v) _byteswap_ushort(v)
+#define __wt_bswap32(v) _byteswap_ulong(v)
+#define __wt_bswap64(v) _byteswap_uint64(v)
+#elif defined(__clang__) && \
+ defined(__clang_major__) && defined(__clang_minor__) && \
+ (__clang_major__ >= 3) && (__clang_minor__ >= 1)
+#if __has_builtin(__builtin_bswap16)
+#define __wt_bswap16(v) __builtin_bswap16(v)
+#endif
+#if __has_builtin(__builtin_bswap32)
+#define __wt_bswap32(v) __builtin_bswap32(v)
+#endif
+#if __has_builtin(__builtin_bswap64)
+#define __wt_bswap64(v) __builtin_bswap64(v)
+#endif
+#elif defined(__GNUC__) && (__GNUC__ >= 4)
+#if __GNUC__ >= 4 && defined(__GNUC_MINOR__) && __GNUC_MINOR__ >= 3
+#define __wt_bswap32(v) __builtin_bswap32(v)
+#define __wt_bswap64(v) __builtin_bswap64(v)
+#endif
+#if __GNUC__ >= 4 && defined(__GNUC_MINOR__) && __GNUC_MINOR__ >= 8
+#define __wt_bswap16(v) __builtin_bswap16(v)
+#endif
+#elif defined(__sun)
+#include <sys/byteorder.h>
+#define __wt_bswap16(v) BSWAP_16(v)
+#define __wt_bswap32(v) BSWAP_32(v)
+#define __wt_bswap64(v) BSWAP_64(v)
+#endif
+
+#if !defined(__wt_bswap64)
+/*
+ * __wt_bswap64 --
+ * 64-bit unsigned little-endian to/from big-endian value.
+ */
+static inline uint64_t
+__wt_bswap64(uint64_t v)
+{
+ return (
+ ((v << 56) & 0xff00000000000000UL) |
+ ((v << 40) & 0x00ff000000000000UL) |
+ ((v << 24) & 0x0000ff0000000000UL) |
+ ((v << 8) & 0x000000ff00000000UL) |
+ ((v >> 8) & 0x00000000ff000000UL) |
+ ((v >> 24) & 0x0000000000ff0000UL) |
+ ((v >> 40) & 0x000000000000ff00UL) |
+ ((v >> 56) & 0x00000000000000ffUL)
+ );
+}
+#endif
+
+#if !defined(__wt_bswap32)
+/*
+ * __wt_bswap32 --
+ * 32-bit unsigned little-endian to/from big-endian value.
+ */
+static inline uint32_t
+__wt_bswap32(uint32_t v)
+{
+ return (
+ ((v << 24) & 0xff000000) |
+ ((v << 8) & 0x00ff0000) |
+ ((v >> 8) & 0x0000ff00) |
+ ((v >> 24) & 0x000000ff)
+ );
+}
+#endif
+
+#if !defined(__wt_bswap16)
+/*
+ * __wt_bswap16 --
+ * 16-bit unsigned little-endian to/from big-endian value.
+ */
+static inline uint16_t
+__wt_bswap16(uint16_t v)
+{
+ return (
+ ((v << 8) & 0xff00) |
+ ((v >> 8) & 0x00ff)
+ );
+}
+#endif
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 676f95d9b05..767c176b53f 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -72,7 +72,6 @@ struct __wt_event_handler; typedef struct __wt_event_handler WT_EVENT_HANDLER;
struct __wt_extension_api; typedef struct __wt_extension_api WT_EXTENSION_API;
struct __wt_extractor; typedef struct __wt_extractor WT_EXTRACTOR;
struct __wt_item; typedef struct __wt_item WT_ITEM;
-struct __wt_lsn; typedef struct __wt_lsn WT_LSN;
struct __wt_session; typedef struct __wt_session WT_SESSION;
#if defined(SWIGJAVA)
@@ -126,22 +125,6 @@ struct __wt_item {
#endif
};
-/*
- * We rely on this structure being aligned at 64 bits by the compiler,
- * if we were paranoid we could add an unused field to ensure the padding
- * is correct.
- *
- * NOTE: If you change the contents of this structure you must also update
- * the macros in log.h.
- */
-/*!
- * A log sequence number, representing a position in the transaction log.
- */
-struct __wt_lsn {
- uint32_t file; /*!< Log file number */
- wt_off_t offset; /*!< Log file offset */
-};
-
/*!
* The maximum packed size of a 64-bit integer. The ::wiredtiger_struct_pack
* function will pack single long integers into at most this many bytes.
@@ -1006,9 +989,9 @@ struct __wt_session {
* @config{block_compressor, configure a compressor for file blocks.
* Permitted values are \c "none" or custom compression engine name
* created with WT_CONNECTION::add_compressor. If WiredTiger has
- * builtin support for \c "bzip2"\, \c "snappy"\, \c "lz4" or \c "zlib"
- * compression\, these names are also available. See @ref compression
- * for more information., a string; default \c none.}
+ * builtin support for \c "snappy"\, \c "lz4" or \c "zlib" compression\,
+ * these names are also available. See @ref compression for more
+ * information., a string; default \c none.}
* @config{cache_resident, do not ever evict the object's pages from
* cache. Not compatible with LSM tables; see @ref
* tuning_cache_resident for more information., a boolean flag; default
@@ -1839,9 +1822,9 @@ struct __wt_connection {
* @config{&nbsp;&nbsp;&nbsp;&nbsp;compressor, configure a compressor
* for log records. Permitted values are \c "none" or custom
* compression engine name created with WT_CONNECTION::add_compressor.
- * If WiredTiger has builtin support for \c "bzip2"\, \c "snappy"\, \c
- * "lz4" or \c "zlib" compression\, these names are also available. See
- * @ref compression for more information., a string; default \c none.}
+ * If WiredTiger has builtin support for \c "snappy"\, \c "lz4" or \c
+ * "zlib" compression\, these names are also available. See @ref
+ * compression for more information., a string; default \c none.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable logging subsystem., a
* boolean flag; default \c false.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;file_max, the maximum size of log
@@ -2308,11 +2291,11 @@ struct __wt_connection {
* @config{&nbsp;&nbsp;&nbsp;&nbsp;compressor, configure a compressor for log
* records. Permitted values are \c "none" or custom compression engine name
* created with WT_CONNECTION::add_compressor. If WiredTiger has builtin
- * support for \c "bzip2"\, \c "snappy"\, \c "lz4" or \c "zlib" compression\,
- * these names are also available. See @ref compression for more information.,
- * a string; default \c none.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable
- * logging subsystem., a boolean flag; default \c false.}
+ * support for \c "snappy"\, \c "lz4" or \c "zlib" compression\, these names are
+ * also available. See @ref compression for more information., a string;
+ * default \c none.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable logging
+ * subsystem., a boolean flag; default \c false.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;file_max, the maximum size of log files., an
* integer between 100KB and 2GB; default \c 100MB.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the path to a directory into which the
diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h
index 54b5dfd19f4..4533c8cbca0 100644
--- a/src/include/wt_internal.h
+++ b/src/include/wt_internal.h
@@ -6,6 +6,9 @@
* See the file LICENSE for redistribution information.
*/
+#ifndef __WT_INTERNAL_H
+#define __WT_INTERNAL_H
+
#if defined(__cplusplus)
extern "C" {
#endif
@@ -284,6 +287,8 @@ struct __wt_txn_state;
typedef struct __wt_txn_state WT_TXN_STATE;
struct __wt_update;
typedef struct __wt_update WT_UPDATE;
+union __wt_lsn;
+ typedef union __wt_lsn WT_LSN;
union __wt_rand_state;
typedef union __wt_rand_state WT_RAND_STATE;
/*
@@ -302,6 +307,7 @@ union __wt_rand_state;
#include "msvc.h"
#endif
#include "hardware.h"
+#include "swap.h"
#include "queue.h"
@@ -324,8 +330,8 @@ union __wt_rand_state;
#include "btmem.h"
#include "btree.h"
#include "cache.h"
-#include "config.h"
#include "compact.h"
+#include "config.h"
#include "cursor.h"
#include "dlh.h"
#include "error.h"
@@ -365,3 +371,4 @@ union __wt_rand_state;
#if defined(__cplusplus)
}
#endif
+#endif /* !__WT_INTERNAL_H */
diff --git a/src/log/log.c b/src/log/log.c
index 3bf04d025d8..ce2d7191491 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -105,7 +105,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
* LSN has moved into a later log file and there should be a
* log file ready to close.
*/
- while (log->sync_lsn.file < min_lsn->file) {
+ while (log->sync_lsn.l.file < min_lsn->l.file) {
WT_ERR(__wt_cond_signal(session,
S2C(session)->log_file_cond));
WT_ERR(__wt_cond_wait(session, log->log_sync_cond, 10000));
@@ -116,10 +116,11 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
* Sync the directory if the log file entry hasn't been written
* into the directory.
*/
- if (log->sync_dir_lsn.file < min_lsn->file) {
+ if (log->sync_dir_lsn.l.file < min_lsn->l.file) {
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
"log_force_sync: sync directory %s to LSN %d/%lu",
- log->log_dir_fh->name, min_lsn->file, min_lsn->offset));
+ log->log_dir_fh->name,
+ min_lsn->l.file, min_lsn->l.offset));
WT_ERR(__wt_directory_sync_fh(session, log->log_dir_fh));
log->sync_dir_lsn = *min_lsn;
WT_STAT_FAST_CONN_INCR(session, log_sync_dir);
@@ -130,7 +131,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
if (__wt_log_cmp(&log->sync_lsn, min_lsn) < 0) {
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
"log_force_sync: sync %s to LSN %d/%lu",
- log->log_fh->name, min_lsn->file, min_lsn->offset));
+ log->log_fh->name, min_lsn->l.file, min_lsn->l.offset));
WT_ERR(__wt_fsync(session, log->log_fh));
log->sync_lsn = *min_lsn;
WT_STAT_FAST_CONN_INCR(session, log_sync);
@@ -174,7 +175,7 @@ __wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, bool *recp)
* we can skip recovery.
*/
WT_RET(__wt_curlog_open(session, "log:", NULL, &c));
- c->set_key(c, ckp_lsn->file, ckp_lsn->offset, 0);
+ c->set_key(c, ckp_lsn->l.file, ckp_lsn->l.offset, 0);
if ((ret = c->search(c)) == 0) {
while ((ret = c->next(c)) == 0) {
/*
@@ -278,7 +279,7 @@ __wt_log_get_all_files(WT_SESSION_IMPL *session,
/* Filter out any files that are below the checkpoint LSN. */
for (max = 0, i = 0; i < count; ) {
WT_ERR(__wt_log_extract_lognum(session, files[i], &id));
- if (active_only && id < log->ckpt_lsn.file) {
+ if (active_only && id < log->ckpt_lsn.l.file) {
__wt_free(session, files[i]);
files[i] = files[count - 1];
files[--count] = NULL;
@@ -459,8 +460,8 @@ __log_size_fit(WT_SESSION_IMPL *session, WT_LSN *lsn, uint64_t recsize)
conn = S2C(session);
log = conn->log;
- return (lsn->offset == WT_LOG_FIRST_RECORD ||
- lsn->offset + (wt_off_t)recsize < conn->log_file_max);
+ return (lsn->l.offset == WT_LOG_FIRST_RECORD ||
+ lsn->l.offset + (wt_off_t)recsize < conn->log_file_max);
}
/*
@@ -536,9 +537,7 @@ __log_fill(WT_SESSION_IMPL *session,
WT_MYSLOT *myslot, bool force, WT_ITEM *record, WT_LSN *lsnp)
{
WT_DECL_RET;
- WT_LOG_RECORD *logrec;
- logrec = (WT_LOG_RECORD *)record->mem;
/*
* Call __wt_write or copy into the buffer. For now the offset is the
* real byte offset. If the offset becomes a unit of WT_LOG_ALIGN this
@@ -547,19 +546,19 @@ __log_fill(WT_SESSION_IMPL *session,
*/
if (!force && !F_ISSET(myslot, WT_MYSLOT_UNBUFFERED))
memcpy((char *)myslot->slot->slot_buf.mem + myslot->offset,
- logrec, logrec->len);
+ record->mem, record->size);
else
/*
* If this is a force or unbuffered write, write it now.
*/
WT_ERR(__wt_write(session, myslot->slot->slot_fh,
myslot->offset + myslot->slot->slot_start_offset,
- (size_t)logrec->len, (void *)logrec));
+ record->size, record->mem));
- WT_STAT_FAST_CONN_INCRV(session, log_bytes_written, logrec->len);
+ WT_STAT_FAST_CONN_INCRV(session, log_bytes_written, record->size);
if (lsnp != NULL) {
*lsnp = myslot->slot->slot_start_lsn;
- lsnp->offset += (wt_off_t)myslot->offset;
+ lsnp->l.offset += (uint32_t)myslot->offset;
}
err:
if (ret != 0 && myslot->slot->slot_error == 0)
@@ -596,19 +595,31 @@ __log_file_header(
WT_ASSERT(session, sizeof(WT_LOG_DESC) < log->allocsize);
WT_RET(__wt_scr_alloc(session, log->allocsize, &buf));
memset(buf->mem, 0, log->allocsize);
+ buf->size = log->allocsize;
+
logrec = (WT_LOG_RECORD *)buf->mem;
desc = (WT_LOG_DESC *)logrec->record;
desc->log_magic = WT_LOG_MAGIC;
desc->majorv = WT_LOG_MAJOR_VERSION;
desc->minorv = WT_LOG_MINOR_VERSION;
desc->log_size = (uint64_t)conn->log_file_max;
+ __wt_log_desc_byteswap(desc);
/*
* Now that the record is set up, initialize the record header.
+ *
+ * Checksum a little-endian version of the header, and write everything
+ * in little-endian format. The checksum is (potentially) returned in a
+ * big-endian format, swap it into place in a separate step.
*/
logrec->len = log->allocsize;
logrec->checksum = 0;
+ __wt_log_record_byteswap(logrec);
logrec->checksum = __wt_cksum(logrec, log->allocsize);
+#ifdef WORDS_BIGENDIAN
+ logrec->checksum = __wt_bswap32(logrec->checksum);
+#endif
+
WT_CLEAR(tmp);
memset(&myslot, 0, sizeof(myslot));
myslot.slot = &tmp;
@@ -624,7 +635,7 @@ __log_file_header(
tmp.slot_fh = fh;
} else {
WT_ASSERT(session, fh == NULL);
- WT_ERR(__wt_log_acquire(session, logrec->len, &tmp));
+ WT_ERR(__wt_log_acquire(session, log->allocsize, &tmp));
}
WT_ERR(__log_fill(session, &myslot, true, buf, NULL));
/*
@@ -673,7 +684,9 @@ __log_openfile(WT_SESSION_IMPL *session,
memset(buf->mem, 0, allocsize);
WT_ERR(__wt_read(session, *fh, 0, allocsize, buf->mem));
logrec = (WT_LOG_RECORD *)buf->mem;
+ __wt_log_record_byteswap(logrec);
desc = (WT_LOG_DESC *)logrec->record;
+ __wt_log_desc_byteswap(desc);
if (desc->log_magic != WT_LOG_MAGIC)
WT_PANIC_RET(session, WT_ERROR,
"log file %s corrupted: Bad magic number %" PRIu32,
@@ -829,8 +842,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created)
* We need to setup the LSNs. Set the end LSN and alloc LSN to
* the end of the header.
*/
- log->alloc_lsn.file = log->fileid;
- log->alloc_lsn.offset = WT_LOG_FIRST_RECORD;
+ WT_SET_LSN(&log->alloc_lsn, log->fileid, WT_LOG_FIRST_RECORD);
end_lsn = log->alloc_lsn;
/*
@@ -890,7 +902,7 @@ __wt_log_acquire(WT_SESSION_IMPL *session, uint64_t recsize, WT_LOGSLOT *slot)
* Pre-allocate on the first real write into the log file, if it
* was just created (i.e. not pre-allocated).
*/
- if (log->alloc_lsn.offset == WT_LOG_FIRST_RECORD && created_log)
+ if (log->alloc_lsn.l.offset == WT_LOG_FIRST_RECORD && created_log)
WT_RET(__log_prealloc(session, log->log_fh));
/*
* Initialize the slot for activation.
@@ -931,8 +943,9 @@ __log_truncate(WT_SESSION_IMPL *session,
/*
* Truncate the log file to the given LSN.
*/
- WT_ERR(__log_openfile(session, false, &log_fh, file_prefix, lsn->file));
- WT_ERR(__wt_ftruncate(session, log_fh, lsn->offset));
+ WT_ERR(__log_openfile(session,
+ false, &log_fh, file_prefix, lsn->l.file));
+ WT_ERR(__wt_ftruncate(session, log_fh, lsn->l.offset));
WT_ERR(__wt_fsync(session, log_fh));
WT_ERR(__wt_close(session, &log_fh));
@@ -946,7 +959,8 @@ __log_truncate(WT_SESSION_IMPL *session,
WT_LOG_FILENAME, &logfiles, &logcount));
for (i = 0; i < logcount; i++) {
WT_ERR(__wt_log_extract_lognum(session, logfiles[i], &lognum));
- if (lognum > lsn->file && lognum < log->trunc_lsn.file) {
+ if (lognum > lsn->l.file &&
+ lognum < log->trunc_lsn.l.file) {
WT_ERR(__log_openfile(session,
false, &log_fh, file_prefix, lognum));
/*
@@ -1111,10 +1125,8 @@ __wt_log_open(WT_SESSION_IMPL *session)
if (firstlog == UINT32_MAX) {
WT_ASSERT(session, logcount == 0);
WT_INIT_LSN(&log->first_lsn);
- } else {
- log->first_lsn.file = firstlog;
- log->first_lsn.offset = 0;
- }
+ } else
+ WT_SET_LSN(&log->first_lsn, firstlog, 0);
/*
* Start logging at the beginning of the next log file, no matter
@@ -1346,7 +1358,7 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
* sync operations. The most recent one will set the LSN to the
* beginning of our file.
*/
- if (log->sync_lsn.file < slot->slot_end_lsn.file ||
+ if (log->sync_lsn.l.file < slot->slot_end_lsn.l.file ||
__wt_spin_trylock(session, &log->log_sync_lock) != 0) {
WT_ERR(__wt_cond_wait(
session, log->log_sync_cond, 10000));
@@ -1366,12 +1378,12 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
* now if needed.
*/
if (F_ISSET(slot, WT_SLOT_SYNC_DIR) &&
- (log->sync_dir_lsn.file < sync_lsn.file)) {
+ (log->sync_dir_lsn.l.file < sync_lsn.l.file)) {
WT_ASSERT(session, log->log_dir_fh != NULL);
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
- "log_release: sync directory %s to LSN %d/%lu",
+ "log_release: sync directory %s to LSN %u/%lu",
log->log_dir_fh->name,
- sync_lsn.file, sync_lsn.offset));
+ sync_lsn.l.file, sync_lsn.l.offset));
WT_ERR(__wt_directory_sync_fh(
session, log->log_dir_fh));
log->sync_dir_lsn = sync_lsn;
@@ -1384,8 +1396,9 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
if (F_ISSET(slot, WT_SLOT_SYNC) &&
__wt_log_cmp(&log->sync_lsn, &slot->slot_end_lsn) < 0) {
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
- "log_release: sync log %s to LSN %d/%lu",
- log->log_fh->name, sync_lsn.file, sync_lsn.offset));
+ "log_release: sync log %s to LSN %u/%lu",
+ log->log_fh->name,
+ sync_lsn.l.file, sync_lsn.l.offset));
WT_STAT_FAST_CONN_INCR(session, log_sync);
WT_ERR(__wt_fsync(session, log->log_fh));
log->sync_lsn = sync_lsn;
@@ -1426,10 +1439,11 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
WT_LOG_RECORD *logrec;
WT_LSN end_lsn, next_lsn, rd_lsn, start_lsn;
wt_off_t log_size;
- uint32_t allocsize, cksum, firstlog, lastlog, lognum, rdup_len, reclen;
+ uint32_t allocsize, firstlog, lastlog, lognum, rdup_len, reclen;
+ uint32_t cksum_calculate, cksum_tmp;
u_int i, logcount;
int firstrecord;
- bool eol;
+ bool eol, partial_record;
char **logfiles;
conn = S2C(session);
@@ -1449,8 +1463,8 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
if (LF_ISSET(WT_LOGSCAN_RECOVER))
WT_RET(__wt_verbose(session, WT_VERB_LOG,
- "__wt_log_scan truncating to %u/%" PRIuMAX,
- log->trunc_lsn.file, (uintmax_t)log->trunc_lsn.offset));
+ "__wt_log_scan truncating to %u/%u",
+ log->trunc_lsn.l.file, log->trunc_lsn.l.offset));
if (log != NULL) {
allocsize = log->allocsize;
@@ -1468,8 +1482,8 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
"choose either a start LSN or a start flag");
/* Offsets must be on allocation boundaries. */
- if (lsnp->offset % allocsize != 0 ||
- lsnp->file > log->fileid)
+ if (lsnp->l.offset % allocsize != 0 ||
+ lsnp->l.file > log->fileid)
return (WT_NOTFOUND);
/*
@@ -1509,14 +1523,13 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
lastlog = WT_MAX(lastlog, lognum);
firstlog = WT_MIN(firstlog, lognum);
}
- start_lsn.file = firstlog;
- end_lsn.file = lastlog;
- start_lsn.offset = end_lsn.offset = 0;
+ WT_SET_LSN(&start_lsn, firstlog, 0);
+ WT_SET_LSN(&end_lsn, lastlog, 0);
__wt_log_files_free(session, logfiles, logcount);
logfiles = NULL;
}
WT_ERR(__log_openfile(
- session, false, &log_fh, WT_LOG_FILENAME, start_lsn.file));
+ session, false, &log_fh, WT_LOG_FILENAME, start_lsn.l.file));
WT_ERR(__wt_filesize(session, log_fh, &log_size));
rd_lsn = start_lsn;
@@ -1524,8 +1537,17 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
WT_ERR(__wt_scr_alloc(session, 0, &decryptitem));
WT_ERR(__wt_scr_alloc(session, 0, &uncitem));
for (;;) {
- if (rd_lsn.offset + allocsize > log_size) {
+ if (rd_lsn.l.offset + allocsize > log_size) {
advance:
+ if (rd_lsn.l.offset == log_size)
+ partial_record = false;
+ else
+ /*
+ * See if there is anything non-zero at the
+ * end of this log file.
+ */
+ WT_ERR(__log_has_hole(session, log_fh,
+ rd_lsn.l.offset, &partial_record));
/*
* If we read the last record, go to the next file.
*/
@@ -1538,16 +1560,24 @@ advance:
if (LF_ISSET(WT_LOGSCAN_RECOVER))
WT_ERR(__log_truncate(session,
&rd_lsn, WT_LOG_FILENAME, 1));
- rd_lsn.file++;
- rd_lsn.offset = 0;
+ /*
+ * If we had a partial record, we'll want to break
+ * now after closing and truncating. Although for now
+ * log_truncate does not modify the LSN passed in,
+ * this code does not assume it is unmodified after that
+ * call which is why it uses the boolean set earlier.
+ */
+ if (partial_record)
+ break;
+ WT_SET_LSN(&rd_lsn, rd_lsn.l.file + 1, 0);
/*
* Avoid an error message when we reach end of log
* by checking here.
*/
- if (rd_lsn.file > end_lsn.file)
+ if (rd_lsn.l.file > end_lsn.l.file)
break;
WT_ERR(__log_openfile(session,
- false, &log_fh, WT_LOG_FILENAME, rd_lsn.file));
+ false, &log_fh, WT_LOG_FILENAME, rd_lsn.l.file));
WT_ERR(__wt_filesize(session, log_fh, &log_size));
eol = false;
continue;
@@ -1557,14 +1587,16 @@ advance:
*/
WT_ASSERT(session, buf->memsize >= allocsize);
WT_ERR(__wt_read(session,
- log_fh, rd_lsn.offset, (size_t)allocsize, buf->mem));
+ log_fh, rd_lsn.l.offset, (size_t)allocsize, buf->mem));
/*
- * First 4 bytes is the real record length. See if we
- * need to read more than the allocation size. We expect
- * that we rarely will have to read more. Most log records
- * will be fairly small.
+ * See if we need to read more than the allocation size. We
+ * expect that we rarely will have to read more. Most log
+ * records will be fairly small.
*/
- reclen = *(uint32_t *)buf->mem;
+ reclen = ((WT_LOG_RECORD *)buf->mem)->len;
+#ifdef WORDS_BIGENDIAN
+ reclen = __wt_bswap32(reclen);
+#endif
/*
* Log files are pre-allocated. We need to detect the
* difference between a hole in the file (where this location
@@ -1578,7 +1610,7 @@ advance:
*/
if (reclen == 0) {
WT_ERR(__log_has_hole(
- session, log_fh, rd_lsn.offset, &eol));
+ session, log_fh, rd_lsn.l.offset, &eol));
if (eol)
/* Found a hole. This LSN is the end. */
break;
@@ -1590,28 +1622,40 @@ advance:
if (reclen > allocsize) {
/*
* The log file end could be the middle of this
- * log record.
+ * log record. If we have a partially written record
+ * then this is considered the end of the log.
*/
- if (rd_lsn.offset + rdup_len > log_size)
- goto advance;
+ if (rd_lsn.l.offset + rdup_len > log_size) {
+ eol = true;
+ break;
+ }
/*
* We need to round up and read in the full padded
* record, especially for direct I/O.
*/
WT_ERR(__wt_buf_grow(session, buf, rdup_len));
- WT_ERR(__wt_read(session,
- log_fh, rd_lsn.offset, (size_t)rdup_len, buf->mem));
+ WT_ERR(__wt_read(session, log_fh,
+ rd_lsn.l.offset, (size_t)rdup_len, buf->mem));
WT_STAT_FAST_CONN_INCR(session, log_scan_rereads);
}
/*
* We read in the record, verify checksum.
+ *
+ * Handle little- and big-endian objects. Objects are written
+ * in little-endian format: save the header checksum, and
+ * calculate the checksum for the header in its little-endian
+ * form. Then, restore the header's checksum, and byte-swap
+ * the whole thing as necessary, leaving us with a calculated
+ * checksum that should match the checksum in the header.
*/
buf->size = reclen;
logrec = (WT_LOG_RECORD *)buf->mem;
- cksum = logrec->checksum;
+ cksum_tmp = logrec->checksum;
logrec->checksum = 0;
- logrec->checksum = __wt_cksum(logrec, logrec->len);
- if (logrec->checksum != cksum) {
+ cksum_calculate = __wt_cksum(logrec, reclen);
+ logrec->checksum = cksum_tmp;
+ __wt_log_record_byteswap(logrec);
+ if (logrec->checksum != cksum_calculate) {
/*
* A checksum mismatch means we have reached the end of
* the useful part of the log. This should be found on
@@ -1636,8 +1680,8 @@ advance:
*/
WT_STAT_FAST_CONN_INCR(session, log_scan_records);
next_lsn = rd_lsn;
- next_lsn.offset += (wt_off_t)rdup_len;
- if (rd_lsn.offset != 0) {
+ next_lsn.l.offset += rdup_len;
+ if (rd_lsn.l.offset != 0) {
/*
* We need to manage the different buffers here.
* Buf is the buffer this function uses to read from
@@ -1890,10 +1934,19 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp,
rdup_len - record->size);
record->size = rdup_len;
}
+ /*
+ * Checksum a little-endian version of the header, and write everything
+ * in little-endian format. The checksum is (potentially) returned in a
+ * big-endian format, swap it into place in a separate step.
+ */
logrec = (WT_LOG_RECORD *)record->mem;
logrec->len = (uint32_t)record->size;
logrec->checksum = 0;
+ __wt_log_record_byteswap(logrec);
logrec->checksum = __wt_cksum(logrec, record->size);
+#ifdef WORDS_BIGENDIAN
+ logrec->checksum = __wt_bswap32(logrec->checksum);
+#endif
WT_STAT_FAST_CONN_INCR(session, log_writes);
@@ -2061,7 +2114,8 @@ __wt_log_flush(WT_SESSION_IMPL *session, uint32_t flags)
WT_RET(__wt_log_flush_lsn(session, &lsn, false));
WT_RET(__wt_verbose(session, WT_VERB_LOG,
- "log_flush: flags %d LSN %d/%lu", flags, lsn.file, lsn.offset));
+ "log_flush: flags %d LSN %u/%lu",
+ flags, lsn.l.file, lsn.l.offset));
/*
* If the user wants write-no-sync, there is nothing more to do.
* If the user wants background sync, set the LSN and we're done.
diff --git a/src/log/log_slot.c b/src/log/log_slot.c
index 760e8888de6..2844516e78f 100644
--- a/src/log/log_slot.c
+++ b/src/log/log_slot.c
@@ -31,8 +31,8 @@ __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
* are reset when the slot is freed. See log_slot_free.
*/
slot->slot_start_lsn = slot->slot_end_lsn = log->alloc_lsn;
- slot->slot_start_offset = log->alloc_lsn.offset;
- slot->slot_last_offset = log->alloc_lsn.offset;
+ slot->slot_start_offset = log->alloc_lsn.l.offset;
+ slot->slot_last_offset = log->alloc_lsn.l.offset;
slot->slot_fh = log->log_fh;
slot->slot_error = 0;
slot->slot_unbuffered = 0;
@@ -96,14 +96,15 @@ retry:
slot->slot_end_lsn = slot->slot_start_lsn;
end_offset =
WT_LOG_SLOT_JOINED_BUFFERED(old_state) + slot->slot_unbuffered;
- slot->slot_end_lsn.offset += (wt_off_t)end_offset;
+ slot->slot_end_lsn.l.offset += end_offset;
WT_STAT_FAST_CONN_INCRV(session,
log_slot_consolidated, end_offset);
/*
* XXX Would like to change so one piece of code advances the LSN.
*/
log->alloc_lsn = slot->slot_end_lsn;
- WT_ASSERT(session, log->alloc_lsn.file >= log->write_lsn.file);
+ WT_ASSERT(session,
+ log->alloc_lsn.l.file >= log->write_lsn.l.file);
return (0);
}
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
index 1bb9a7238fe..f76b2bfd9ac 100644
--- a/src/lsm/lsm_cursor.c
+++ b/src/lsm/lsm_cursor.c
@@ -1543,7 +1543,7 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
bulk = cval.val != 0;
/* Get the LSM tree. */
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_lsm_tree_get(session, uri, bulk, &lsm_tree));
/*
* Check whether the exclusive open for a bulk load succeeded, and
diff --git a/src/lsm/lsm_stat.c b/src/lsm/lsm_stat.c
index c147cf5774a..76e2ca6185e 100644
--- a/src/lsm/lsm_stat.c
+++ b/src/lsm/lsm_stat.c
@@ -33,7 +33,7 @@ __curstat_lsm_init(
"checkpoint=" WT_CHECKPOINT, NULL, NULL };
locked = false;
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree));
WT_RET(ret);
WT_ERR(__wt_scr_alloc(session, 0, &uribuf));
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index ff6e66fd1a1..ab18e41a2f5 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -315,7 +315,7 @@ __wt_lsm_tree_create(WT_SESSION_IMPL *session,
char *tmpconfig;
/* If the tree is open, it already exists. */
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_lsm_tree_get(session, uri, false, &lsm_tree));
if (ret == 0) {
__wt_lsm_tree_release(session, lsm_tree);
@@ -447,7 +447,7 @@ __wt_lsm_tree_create(WT_SESSION_IMPL *session,
* tracking macros handle cleaning up on failure.
*/
if (ret == 0)
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __lsm_tree_open(session, uri, true, &lsm_tree));
if (ret == 0)
__wt_lsm_tree_release(session, lsm_tree);
@@ -961,7 +961,7 @@ __wt_lsm_tree_drop(
locked = false;
/* Get the LSM tree. */
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_lsm_tree_get(session, name, true, &lsm_tree));
WT_RET(ret);
@@ -997,7 +997,7 @@ __wt_lsm_tree_drop(
err: if (locked)
WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree));
- WT_WITH_HANDLE_LIST_LOCK(session, tret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
tret = __lsm_tree_discard(session, lsm_tree, false));
WT_TRET(tret);
return (ret);
@@ -1023,7 +1023,7 @@ __wt_lsm_tree_rename(WT_SESSION_IMPL *session,
locked = false;
/* Get the LSM tree. */
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_lsm_tree_get(session, olduri, true, &lsm_tree));
WT_RET(ret);
@@ -1073,7 +1073,7 @@ err: if (locked)
* Discard this LSM tree structure. The first operation on the renamed
* tree will create a new one.
*/
- WT_WITH_HANDLE_LIST_LOCK(session, tret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
tret = __lsm_tree_discard(session, lsm_tree, false));
WT_TRET(tret);
return (ret);
@@ -1098,7 +1098,7 @@ __wt_lsm_tree_truncate(
locked = false;
/* Get the LSM tree. */
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_lsm_tree_get(session, name, true, &lsm_tree));
WT_RET(ret);
@@ -1137,7 +1137,7 @@ err: if (locked)
* the last good version of the metadata will be used, resulting
* in a valid (not truncated) tree.
*/
- WT_WITH_HANDLE_LIST_LOCK(session, tret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
tret = __lsm_tree_discard(session, lsm_tree, false));
WT_TRET(tret);
}
@@ -1237,7 +1237,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
/* Tell __wt_schema_worker not to look inside the LSM tree. */
*skipp = true;
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_lsm_tree_get(session, name, false, &lsm_tree));
WT_RET(ret);
@@ -1435,7 +1435,7 @@ __wt_lsm_tree_worker(WT_SESSION_IMPL *session,
locked = false;
exclusive = FLD_ISSET(open_flags, WT_DHANDLE_EXCLUSIVE);
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_lsm_tree_get(session, uri, exclusive, &lsm_tree));
WT_RET(ret);
diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c
index 4faa25967ad..d5d81df6785 100644
--- a/src/lsm/lsm_work_unit.c
+++ b/src/lsm/lsm_work_unit.c
@@ -272,7 +272,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
!F_ISSET(chunk, WT_LSM_CHUNK_STABLE) &&
!chunk->evicted) {
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __lsm_discard_handle(session, chunk->uri, NULL));
if (ret == 0)
chunk->evicted = 1;
@@ -506,7 +506,7 @@ __lsm_drop_file(WT_SESSION_IMPL *session, const char *uri)
*
* This will fail with EBUSY if the file is still in use.
*/
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __lsm_discard_handle(session, uri, WT_CHECKPOINT));
WT_RET(ret);
diff --git a/src/meta/meta_ckpt.c b/src/meta/meta_ckpt.c
index f7da8525639..df4cd2cb4d6 100644
--- a/src/meta/meta_ckpt.c
+++ b/src/meta/meta_ckpt.c
@@ -451,7 +451,7 @@ __wt_meta_ckptlist_set(WT_SESSION_IMPL *session,
if (ckptlsn != NULL)
WT_ERR(__wt_buf_catfmt(session, buf,
",checkpoint_lsn=(%" PRIu32 ",%" PRIuMAX ")",
- ckptlsn->file, (uintmax_t)ckptlsn->offset));
+ ckptlsn->l.file, (uintmax_t)ckptlsn->l.offset));
WT_ERR(__ckpt_set(session, fname, buf->mem));
err: __wt_scr_free(session, &buf);
diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c
index 9938cb07a5c..61cc009c983 100644
--- a/src/meta/meta_table.c
+++ b/src/meta/meta_table.c
@@ -9,6 +9,18 @@
#include "wt_internal.h"
/*
+ * __wt_metadata_init --
+ * Metadata initialization.
+ */
+void
+__wt_metadata_init(WT_SESSION_IMPL *session)
+{
+ /* We cache the metadata file's URI hash for fast detection. */
+ S2C(session)->meta_uri_hash =
+ __wt_hash_city64(WT_METAFILE_URI, strlen(WT_METAFILE_URI));
+}
+
+/*
* __metadata_turtle --
* Return if a key's value should be taken from the turtle file.
*/
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index 332449027a9..c25d7b5e493 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -1889,15 +1889,18 @@ __wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize)
* we don't waste space when we write).
*/
a = maxpagesize; /* Don't overflow. */
- split_size = (uint32_t)
- WT_ALIGN((a * (u_int)btree->split_pct) / 100, btree->allocsize);
+ split_size = (uint32_t)WT_ALIGN_NEAREST(
+ (a * (u_int)btree->split_pct) / 100, btree->allocsize);
/*
- * If the result of that calculation is the same as the allocation unit
- * (that happens if the maximum size is the same size as an allocation
- * unit, use a percentage of the maximum page size).
+ * Respect the configured split percentage if the calculated split
+ * size is either zero or a full page. The user has either configured
+ * an allocation size that matches the page size, or a split
+ * percentage that is close to zero or one hundred. Rounding is going
+ * to provide a worse outcome than having a split point that doesn't
+ * fall on an allocation size boundary in those cases.
*/
- if (split_size == btree->allocsize)
+ if (split_size == 0 || split_size == maxpagesize)
split_size = (uint32_t)((a * (u_int)btree->split_pct) / 100);
return (split_size);
@@ -3286,6 +3289,17 @@ supd_check_complete:
}
}
+ bnd->entries = r->entries;
+ /* Output a verbose message if we create a page without many entries */
+ if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && r->entries < 6)
+ WT_ERR(__wt_verbose(session, WT_VERB_SPLIT,
+ "Reconciliation creating a page with %" PRIu32
+ " entries, memory footprint %" PRIu64
+ ", page count %" PRIu32 ", %s, split state: %d\n",
+ r->entries, r->page->memory_footprint, r->bnd_next,
+ F_ISSET(r, WT_EVICTING) ? "evict" : "checkpoint",
+ r->bnd_state));
+
WT_ERR(__wt_bt_write(session,
buf, addr, &addr_size, false, bnd->already_compressed));
WT_ERR(__wt_strndup(session, addr, addr_size, &bnd->addr.addr));
diff --git a/src/schema/schema_drop.c b/src/schema/schema_drop.c
index 6ac76930c9a..ead8cc45c62 100644
--- a/src/schema/schema_drop.c
+++ b/src/schema/schema_drop.c
@@ -29,7 +29,7 @@ __drop_file(
return (EINVAL);
/* Close all btree handles associated with this file. */
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_conn_dhandle_close_all(session, uri, force));
WT_RET(ret);
diff --git a/src/schema/schema_rename.c b/src/schema/schema_rename.c
index 4ec126394dd..21402ed9332 100644
--- a/src/schema/schema_rename.c
+++ b/src/schema/schema_rename.c
@@ -30,7 +30,7 @@ __rename_file(
return (EINVAL);
/* Close any btree handles in the file. */
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_conn_dhandle_close_all(session, uri, false));
WT_ERR(ret);
diff --git a/src/schema/schema_worker.c b/src/schema/schema_worker.c
index a2fe5244c4d..b5ee3bb7f7d 100644
--- a/src/schema/schema_worker.c
+++ b/src/schema/schema_worker.c
@@ -49,7 +49,7 @@ __wt_schema_worker(WT_SESSION_IMPL *session,
* any open file handles, including checkpoints.
*/
if (FLD_ISSET(open_flags, WT_DHANDLE_EXCLUSIVE)) {
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_conn_dhandle_close_all(
session, uri, false));
WT_ERR(ret);
@@ -63,7 +63,7 @@ __wt_schema_worker(WT_SESSION_IMPL *session,
} else if (ret == EBUSY) {
WT_ASSERT(session, !FLD_ISSET(
open_flags, WT_DHANDLE_EXCLUSIVE));
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_conn_btree_apply_single_ckpt(
session, uri, file_func, cfg));
}
diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c
index 1ac758c0cee..1ee3342442c 100644
--- a/src/session/session_dhandle.c
+++ b/src/session/session_dhandle.c
@@ -72,7 +72,8 @@ __session_find_dhandle(WT_SESSION_IMPL *session,
bucket = __wt_hash_city64(uri, strlen(uri)) % WT_HASH_ARRAY_SIZE;
retry: TAILQ_FOREACH(dhandle_cache, &session->dhhash[bucket], hashq) {
dhandle = dhandle_cache->dhandle;
- if (WT_DHANDLE_INACTIVE(dhandle) && !WT_IS_METADATA(dhandle)) {
+ if (WT_DHANDLE_INACTIVE(dhandle) &&
+ !WT_IS_METADATA(session, dhandle)) {
__session_discard_dhandle(session, dhandle_cache);
/* We deleted our entry, retry from the start. */
goto retry;
@@ -407,7 +408,7 @@ __session_dhandle_sweep(WT_SESSION_IMPL *session)
difftime(now, dhandle->timeofdeath) >
conn->sweep_idle_time))) {
WT_STAT_FAST_CONN_INCR(session, dh_session_handles);
- WT_ASSERT(session, !WT_IS_METADATA(dhandle));
+ WT_ASSERT(session, !WT_IS_METADATA(session, dhandle));
__session_discard_dhandle(session, dhandle_cache);
}
dhandle_cache = dhandle_cache_next;
@@ -453,7 +454,7 @@ __session_get_dhandle(
* We didn't find a match in the session cache, search the shared
* handle list and cache the handle we find.
*/
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __session_find_shared_dhandle(session, uri, checkpoint));
if (ret == 0)
ret = __session_add_dhandle(session, NULL);
@@ -510,7 +511,7 @@ __wt_session_get_btree(WT_SESSION_IMPL *session,
WT_RET(__wt_writeunlock(session, dhandle->rwlock));
WT_WITH_SCHEMA_LOCK(session, ret,
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_session_get_btree(
session, uri, checkpoint, cfg, flags)));
diff --git a/src/support/crypto.c b/src/support/crypto.c
index 1049621fb44..ab94ec2c829 100644
--- a/src/support/crypto.c
+++ b/src/support/crypto.c
@@ -21,8 +21,12 @@ __wt_decrypt(WT_SESSION_IMPL *session,
uint32_t encrypt_len;
uint8_t *dst, *src;
- encrypt_len = WT_STORE_SIZE(*((uint32_t *)
- ((uint8_t *)in->data + skip)));
+ encrypt_len =
+ WT_STORE_SIZE(*((uint32_t *)((uint8_t *)in->data + skip)));
+#ifdef WORDS_BIGENDIAN
+ encrypt_len = __wt_bswap32(encrypt_len);
+#endif
+
if (encrypt_len > in->size)
WT_RET_MSG(session, WT_ERROR,
"corrupted encrypted item: padded size less than "
@@ -104,6 +108,9 @@ __wt_encrypt(WT_SESSION_IMPL *session,
* decryption side.
*/
*unpadded_lenp = WT_STORE_SIZE(result_len);
+#ifdef WORDS_BIGENDIAN
+ *unpadded_lenp = __wt_bswap32(*unpadded_lenp);
+#endif
/*
* Copy in the skipped header bytes, set the final data size.
*/
diff --git a/src/support/global.c b/src/support/global.c
index 0234455b6ce..e0d5bafeaa8 100644
--- a/src/support/global.c
+++ b/src/support/global.c
@@ -12,6 +12,35 @@ WT_PROCESS __wt_process; /* Per-process structure */
static int __wt_pthread_once_failed; /* If initialization failed */
/*
+ * __wt_endian_check --
+ * Check the build matches the machine.
+ */
+static int
+__wt_endian_check(void)
+{
+ uint64_t v;
+ bool big;
+ const char *e;
+
+ v = 1;
+ big = *((uint8_t *)&v) == 0;
+
+#ifdef WORDS_BIGENDIAN
+ if (big)
+ return (0);
+ e = "big-endian";
+#else
+ if (!big)
+ return (0);
+ e = "little-endian";
+#endif
+ fprintf(stderr,
+ "This is a %s build of the WiredTiger data engine, incompatible "
+ "with this system\n", e);
+ return (EINVAL);
+}
+
+/*
* __wt_global_once --
* Global initialization, run once.
*/
@@ -31,10 +60,6 @@ __wt_global_once(void)
TAILQ_INIT(&__wt_process.connqh);
#ifdef HAVE_DIAGNOSTIC
- /* Verify the pre-computed metadata hash. */
- WT_ASSERT(NULL, WT_METAFILE_NAME_HASH ==
- __wt_hash_city64(WT_METAFILE_URI, strlen(WT_METAFILE_URI)));
-
/* Load debugging code the compiler might optimize out. */
(void)__wt_breakpoint();
#endif
@@ -50,6 +75,9 @@ __wt_library_init(void)
static bool first = true;
WT_DECL_RET;
+ /* Check the build matches the machine. */
+ WT_RET(__wt_endian_check());
+
/*
* Do per-process initialization once, before anything else, but only
* once. I don't know how heavy-weight the function (pthread_once, in
diff --git a/src/support/hash_city.c b/src/support/hash_city.c
index 5780cd7b459..7a700aa809c 100644
--- a/src/support/hash_city.c
+++ b/src/support/hash_city.c
@@ -57,7 +57,6 @@
* compromising on hash quality.
*/
-#include <string.h>
#include "wt_internal.h"
/*
@@ -86,33 +85,60 @@ static uint32_t UNALIGNED_LOAD32(const char *p) {
return (result);
}
-#if !defined(WORDS_BIGENDIAN)
+#ifdef _MSC_VER
-#define uint32_in_expected_order(x) (x)
-#define uint64_in_expected_order(x) (x)
+#include <stdlib.h>
+#define bswap_32(x) _byteswap_ulong(x)
+#define bswap_64(x) _byteswap_uint64(x)
-#else
+#elif defined(__APPLE__)
-#ifdef __APPLE__
-/* Mac OS X / Darwin features */
+// Mac OS X / Darwin features
#include <libkern/OSByteOrder.h>
#define bswap_32(x) OSSwapInt32(x)
#define bswap_64(x) OSSwapInt64(x)
-#elif defined(__sun)
+#elif defined(__sun) || defined(sun)
#include <sys/byteorder.h>
-#define bswap_32 BSWAP_32
-#define bswap_64 BSWAP_64
+#define bswap_32(x) BSWAP_32(x)
+#define bswap_64(x) BSWAP_64(x)
+
+#elif defined(__FreeBSD__)
+
+#include <sys/endian.h>
+#define bswap_32(x) bswap32(x)
+#define bswap_64(x) bswap64(x)
+
+#elif defined(__OpenBSD__)
+
+#include <sys/types.h>
+#define bswap_32(x) swap32(x)
+#define bswap_64(x) swap64(x)
+
+#elif defined(__NetBSD__)
+
+#include <sys/types.h>
+#include <machine/bswap.h>
+#if defined(__BSWAP_RENAME) && !defined(__bswap_32)
+#define bswap_32(x) bswap32(x)
+#define bswap_64(x) bswap64(x)
+#endif
#else
-#include <byteswap.h>
+
+#define bswap_32(x) __wt_bswap32(x)
+#define bswap_64(x) __wt_bswap64(x)
+
#endif
+#ifdef WORDS_BIGENDIAN
#define uint32_in_expected_order(x) (bswap_32(x))
#define uint64_in_expected_order(x) (bswap_64(x))
-
-#endif /* WORDS_BIGENDIAN */
+#else
+#define uint32_in_expected_order(x) (x)
+#define uint64_in_expected_order(x) (x)
+#endif
static uint64_t Fetch64(const char *p) {
return uint64_in_expected_order(UNALIGNED_LOAD64(p));
diff --git a/src/support/hash_fnv.c b/src/support/hash_fnv.c
index 35e7e5f3a73..83dd2574099 100644
--- a/src/support/hash_fnv.c
+++ b/src/support/hash_fnv.c
@@ -83,7 +83,6 @@
* Share and Enjoy! :-)
*/
-#include <stdlib.h>
#include "wt_internal.h"
/*
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index 7d4d4d5c27c..6a2c1eef826 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -184,7 +184,7 @@ __checkpoint_apply(WT_SESSION_IMPL *session, const char *cfg[],
session->ckpt_handle[i].dhandle,
ret = (*op)(session, cfg));
else
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __wt_conn_btree_apply_single(session,
session->ckpt_handle[i].name, NULL, op, cfg));
WT_RET(ret);
@@ -387,7 +387,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
*/
WT_WITH_SCHEMA_LOCK(session, ret,
WT_WITH_TABLE_LOCK(session, ret,
- WT_WITH_HANDLE_LIST_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
ret = __checkpoint_apply_all(
session, cfg, __wt_checkpoint_list, NULL))));
WT_ERR(ret);
@@ -812,7 +812,7 @@ __checkpoint_worker(WT_SESSION_IMPL *session,
* - On connection close when we know there can't be any races.
*/
WT_ASSERT(session, !need_tracking ||
- WT_IS_METADATA(dhandle) || WT_META_TRACKING(session));
+ WT_IS_METADATA(session, dhandle) || WT_META_TRACKING(session));
/*
* Set the checkpoint LSN to the maximum LSN so that if logging is
@@ -1135,7 +1135,7 @@ fake: /*
* recovery and open a checkpoint that isn't yet durable.
*/
if (F_ISSET(conn, WT_CONN_CKPT_SYNC) &&
- (WT_IS_METADATA(dhandle) ||
+ (WT_IS_METADATA(session, dhandle) ||
!F_ISSET(&session->txn, WT_TXN_RUNNING)))
WT_ERR(__wt_checkpoint_sync(session, NULL));
diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c
index 4c4a7fb3132..37a6e0b3711 100644
--- a/src/txn/txn_log.c
+++ b/src/txn/txn_log.c
@@ -266,14 +266,16 @@ __wt_txn_checkpoint_logread(
WT_LSN *ckpt_lsn)
{
WT_ITEM ckpt_snapshot;
+ uint32_t ckpt_file, ckpt_offset;
u_int ckpt_nsnapshot;
- const char *fmt = WT_UNCHECKED_STRING(IQIU);
+ const char *fmt = WT_UNCHECKED_STRING(IIIU);
WT_RET(__wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
- &ckpt_lsn->file, &ckpt_lsn->offset,
+ &ckpt_file, &ckpt_offset,
&ckpt_nsnapshot, &ckpt_snapshot));
WT_UNUSED(ckpt_nsnapshot);
WT_UNUSED(ckpt_snapshot);
+ WT_SET_LSN(ckpt_lsn, ckpt_file, ckpt_offset);
*pp = end;
return (0);
}
@@ -294,7 +296,7 @@ __wt_txn_checkpoint_log(
uint8_t *end, *p;
size_t recsize;
uint32_t i, rectype = WT_LOGREC_CHECKPOINT;
- const char *fmt = WT_UNCHECKED_STRING(IIQIU);
+ const char *fmt = WT_UNCHECKED_STRING(IIIIU);
txn = &session->txn;
ckpt_lsn = &txn->ckpt_lsn;
@@ -350,13 +352,13 @@ __wt_txn_checkpoint_log(
/* Write the checkpoint log record. */
WT_ERR(__wt_struct_size(session, &recsize, fmt,
- rectype, ckpt_lsn->file, ckpt_lsn->offset,
+ rectype, ckpt_lsn->l.file, ckpt_lsn->l.offset,
txn->ckpt_nsnapshot, ckpt_snapshot));
WT_ERR(__wt_logrec_alloc(session, recsize, &logrec));
WT_ERR(__wt_struct_pack(session,
(uint8_t *)logrec->data + logrec->size, recsize, fmt,
- rectype, ckpt_lsn->file, ckpt_lsn->offset,
+ rectype, ckpt_lsn->l.file, ckpt_lsn->l.offset,
txn->ckpt_nsnapshot, ckpt_snapshot));
logrec->size += (uint32_t)recsize;
WT_ERR(__wt_log_write(session, logrec, lsnp,
@@ -465,12 +467,11 @@ __txn_printlog(WT_SESSION_IMPL *session,
{
FILE *out;
WT_LOG_RECORD *logrec;
- WT_LSN ckpt_lsn;
WT_TXN_PRINTLOG_ARGS *args;
const uint8_t *end, *p;
const char *msg;
uint64_t txnid;
- uint32_t fileid, rectype;
+ uint32_t fileid, lsnfile, lsnoffset, rectype;
int32_t start;
bool compressed;
@@ -490,8 +491,8 @@ __txn_printlog(WT_SESSION_IMPL *session,
WT_RET(__wt_fprintf(out, ",\n"));
WT_RET(__wt_fprintf(out,
- " { \"lsn\" : [%" PRIu32 ",%" PRId64 "],\n",
- lsnp->file, lsnp->offset));
+ " { \"lsn\" : [%" PRIu32 ",%" PRIu32 "],\n",
+ lsnp->l.file, lsnp->l.offset));
WT_RET(__wt_fprintf(out,
" \"hdr_flags\" : \"%s\",\n", compressed ? "compressed" : ""));
WT_RET(__wt_fprintf(out,
@@ -503,11 +504,11 @@ __txn_printlog(WT_SESSION_IMPL *session,
switch (rectype) {
case WT_LOGREC_CHECKPOINT:
WT_RET(__wt_struct_unpack(session, p, WT_PTRDIFF(end, p),
- WT_UNCHECKED_STRING(IQ), &ckpt_lsn.file, &ckpt_lsn.offset));
+ WT_UNCHECKED_STRING(II), &lsnfile, &lsnoffset));
WT_RET(__wt_fprintf(out, " \"type\" : \"checkpoint\",\n"));
WT_RET(__wt_fprintf(out,
- " \"ckpt_lsn\" : [%" PRIu32 ",%" PRId64 "]\n",
- ckpt_lsn.file, ckpt_lsn.offset));
+ " \"ckpt_lsn\" : [%" PRIu32 ",%" PRIu32 "]\n",
+ lsnfile, lsnoffset));
break;
case WT_LOGREC_COMMIT:
diff --git a/src/txn/txn_recover.c b/src/txn/txn_recover.c
index 8051d059d7e..e6bd8a8d755 100644
--- a/src/txn/txn_recover.c
+++ b/src/txn/txn_recover.c
@@ -91,9 +91,9 @@ __recovery_cursor(WT_SESSION_IMPL *session, WT_RECOVERY *r,
WT_ERR(__recovery_cursor( \
(session), (r), (lsnp), (fileid), false, (cp))); \
WT_ERR(__wt_verbose((session), WT_VERB_RECOVERY, \
- "%s op %d to file %d at LSN %u/%" PRIuMAX, \
+ "%s op %d to file %d at LSN %u/%u", \
(cursor == NULL) ? "Skipping" : "Applying", \
- optype, fileid, lsnp->file, (uintmax_t)lsnp->offset)); \
+ optype, fileid, lsnp->l.file, lsnp->l.offset)); \
if (cursor == NULL) \
break
@@ -303,8 +303,7 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
{
WT_CONFIG_ITEM cval;
WT_LSN lsn;
- intmax_t offset;
- uint32_t fileid;
+ uint32_t fileid, lsnfile, lsnoffset;
WT_RET(__wt_config_getones(r->session, config, "id", &cval));
fileid = (uint32_t)cval.val;
@@ -326,8 +325,8 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
if (cval.type != WT_CONFIG_ITEM_STRUCT)
WT_INIT_LSN(&lsn);
else if (sscanf(cval.str,
- "(%" SCNu32 ",%" SCNdMAX ")", &lsn.file, &offset) == 2)
- lsn.offset = offset;
+ "(%" SCNu32 ",%" SCNu32 ")", &lsnfile, &lsnoffset) == 2)
+ WT_SET_LSN(&lsn, lsnfile, lsnoffset);
else
WT_RET_MSG(r->session, EINVAL,
"Failed to parse checkpoint LSN '%.*s'",
@@ -335,8 +334,8 @@ __recovery_setup_file(WT_RECOVERY *r, const char *uri, const char *config)
r->files[fileid].ckpt_lsn = lsn;
WT_RET(__wt_verbose(r->session, WT_VERB_RECOVERY,
- "Recovering %s with id %u @ (%" PRIu32 ", %" PRIu64 ")",
- uri, fileid, lsn.file, lsn.offset));
+ "Recovering %s with id %u @ (%" PRIu32 ", %" PRIu32 ")",
+ uri, fileid, lsn.l.file, lsn.l.offset));
return (0);
@@ -485,8 +484,8 @@ __wt_txn_recover(WT_SESSION_IMPL *session)
*/
r.metadata_only = false;
WT_ERR(__wt_verbose(session, WT_VERB_RECOVERY,
- "Main recovery loop: starting at %u/%" PRIuMAX,
- r.ckpt_lsn.file, (uintmax_t)r.ckpt_lsn.offset));
+ "Main recovery loop: starting at %u/%u",
+ r.ckpt_lsn.l.file, r.ckpt_lsn.l.offset));
WT_ERR(__wt_log_needs_recovery(session, &r.ckpt_lsn, &needs_rec));
/*
* Check if the database was shut down cleanly. If not
diff --git a/test/format/config.c b/test/format/config.c
index 866e210e556..d431546f254 100644
--- a/test/format/config.c
+++ b/test/format/config.c
@@ -244,45 +244,58 @@ config_compression(const char *conf_name)
const char *cstr;
char confbuf[128];
+ /* Return if already specified. */
+ if (config_is_perm(conf_name))
+ return;
+
/*
- * Compression: choose something if compression wasn't specified,
- * otherwise confirm the appropriate shared library is available.
- * We used to verify that the libraries existed but that's no longer
- * robust, since it's possible to build compression libraries into
- * the WiredTiger library.
+ * Don't configure a compression engine for logging if logging isn't
+ * configured (it won't break, but it's confusing).
*/
- if (!config_is_perm(conf_name)) {
- cstr = "none";
- switch (mmrand(NULL, 1, 20)) {
- case 1: case 2: case 3: case 4: /* 20% no compression */
- break;
- case 5: /* 5% bzip */
- cstr = "bzip";
- break;
- case 6: /* 5% bzip-raw */
- cstr = "bzip-raw";
- break;
- case 7: case 8: case 9: case 10: /* 20% lz4 */
- cstr = "lz4";
- break;
- case 11: /* 5% lz4-no-raw */
- cstr = "lz4-noraw";
- break;
- case 12: case 13: case 14: case 15: /* 20% snappy */
- cstr = "snappy";
- break;
- case 16: case 17: case 18: case 19: /* 20% zlib */
- cstr = "zlib";
- break;
- case 20: /* 5% zlib-no-raw */
- cstr = "zlib-noraw";
- break;
- }
-
- (void)snprintf(confbuf, sizeof(confbuf), "%s=%s", conf_name,
- cstr);
+ cstr = "none";
+ if (strcmp(conf_name, "logging_compression") == 0 && g.c_logging == 0) {
+ (void)snprintf(
+ confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr);
config_single(confbuf, 0);
+ return;
}
+
+ /*
+ * Select a compression type from the list of built-in engines.
+ *
+ * Listed percentages are only correct if all of the possible engines
+ * are compiled in.
+ */
+ switch (mmrand(NULL, 1, 20)) {
+#ifdef HAVE_BUILTIN_EXTENSION_LZ4
+ case 1: case 2: case 3: case 4: /* 20% lz4 */
+ cstr = "lz4";
+ break;
+ case 5: /* 5% lz4-no-raw */
+ cstr = "lz4-noraw";
+ break;
+#endif
+#ifdef HAVE_BUILTIN_EXTENSION_SNAPPY
+ case 6: case 7: case 8: case 9: /* 30% snappy */
+ case 10: case 11:
+ cstr = "snappy";
+ break;
+#endif
+#ifdef HAVE_BUILTIN_EXTENSION_ZLIB
+ case 12: case 13: case 14: case 15: /* 20% zlib */
+ cstr = "zlib";
+ break;
+ case 16: /* 5% zlib-no-raw */
+ cstr = "zlib-noraw";
+ break;
+#endif
+ case 17: case 18: case 19: case 20: /* 20% no compression */
+ default:
+ break;
+ }
+
+ (void)snprintf(confbuf, sizeof(confbuf), "%s=%s", conf_name, cstr);
+ config_single(confbuf, 0);
}
/*
@@ -641,10 +654,6 @@ config_map_compression(const char *s, u_int *vp)
{
if (strcmp(s, "none") == 0)
*vp = COMPRESS_NONE;
- else if (strcmp(s, "bzip") == 0)
- *vp = COMPRESS_BZIP;
- else if (strcmp(s, "bzip-raw") == 0)
- *vp = COMPRESS_BZIP_RAW;
else if (strcmp(s, "lz4") == 0)
*vp = COMPRESS_LZ4;
else if (strcmp(s, "lz4-noraw") == 0)
diff --git a/test/format/config.h b/test/format/config.h
index a5190469c7f..d8b11b005d4 100644
--- a/test/format/config.h
+++ b/test/format/config.h
@@ -58,8 +58,7 @@ typedef struct {
} CONFIG;
#define COMPRESSION_LIST \
- "(none | bzip | bzip-raw | lz4 | lz4-noraw | lzo | none | " \
- "snappy | zlib | zlib-noraw)"
+ "(none | lz4 | lz4-noraw | snappy | zlib | zlib-noraw)"
static CONFIG c[] = {
{ "abort",
diff --git a/test/format/format.h b/test/format/format.h
index bf4d1c0a277..41c9de3dd30 100644
--- a/test/format/format.h
+++ b/test/format/format.h
@@ -64,8 +64,6 @@
#define EXTPATH "../../ext/" /* Extensions path */
-#define BZIP_PATH \
- EXTPATH "compressors/bzip2/.libs/libwiredtiger_bzip2.so"
#define LZ4_PATH \
EXTPATH "compressors/lz4/.libs/libwiredtiger_lz4.so"
#define SNAPPY_PATH \
@@ -241,14 +239,12 @@ typedef struct {
u_int c_checksum_flag; /* Checksum flag value */
#define COMPRESS_NONE 1
-#define COMPRESS_BZIP 2
-#define COMPRESS_BZIP_RAW 3
-#define COMPRESS_LZ4 4
-#define COMPRESS_LZ4_NO_RAW 5
-#define COMPRESS_LZO 6
-#define COMPRESS_SNAPPY 7
-#define COMPRESS_ZLIB 8
-#define COMPRESS_ZLIB_NO_RAW 9
+#define COMPRESS_LZ4 2
+#define COMPRESS_LZ4_NO_RAW 3
+#define COMPRESS_LZO 4
+#define COMPRESS_SNAPPY 5
+#define COMPRESS_ZLIB 6
+#define COMPRESS_ZLIB_NO_RAW 7
u_int c_compression_flag; /* Compression flag value */
u_int c_logging_compression_flag; /* Log compression flag value */
diff --git a/test/format/t.c b/test/format/t.c
index 37ba982c987..ccbc0442e4a 100644
--- a/test/format/t.c
+++ b/test/format/t.c
@@ -45,10 +45,14 @@ main(int argc, char *argv[])
config = NULL;
+#ifdef _WIN32
+ g.progname = "t_format.exe";
+#else
if ((g.progname = strrchr(argv[0], DIR_DELIM)) == NULL)
g.progname = argv[0];
else
++g.progname;
+#endif
#if 0
/* Configure the GNU malloc for debugging. */
diff --git a/test/format/wts.c b/test/format/wts.c
index b75a0c793eb..9d4d3fe5cb8 100644
--- a/test/format/wts.c
+++ b/test/format/wts.c
@@ -38,10 +38,6 @@ compressor(uint32_t compress_flag)
switch (compress_flag) {
case COMPRESS_NONE:
return ("none");
- case COMPRESS_BZIP:
- return ("bzip2");
- case COMPRESS_BZIP_RAW:
- return ("bzip2-raw-test");
case COMPRESS_LZ4:
return ("lz4");
case COMPRESS_LZ4_NO_RAW:
@@ -143,11 +139,7 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp)
"create,checkpoint_sync=false,cache_size=%" PRIu32 "MB",
g.c_cache);
-#ifdef _WIN32
- p += snprintf(p, REMAIN(p, end), ",error_prefix=\"t_format.exe\"");
-#else
p += snprintf(p, REMAIN(p, end), ",error_prefix=\"%s\"", g.progname);
-#endif
/* In-memory configuration. */
if (g.c_in_memory != 0)
@@ -178,7 +170,7 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp)
",encryption=(name=%s)", encryptor(g.c_encryption_flag));
/* Miscellaneous. */
-#ifndef _WIN32
+#ifdef HAVE_POSIX_MEMALIGN
p += snprintf(p, REMAIN(p, end), ",buffer_alignment=512");
#endif
@@ -210,9 +202,8 @@ wts_open(const char *home, int set_api, WT_CONNECTION **connp)
/* Extensions. */
p += snprintf(p, REMAIN(p, end),
",extensions=["
- "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],",
+ "\"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"],",
g.c_reverse ? REVERSE_PATH : "",
- access(BZIP_PATH, R_OK) == 0 ? BZIP_PATH : "",
access(LZ4_PATH, R_OK) == 0 ? LZ4_PATH : "",
access(LZO_PATH, R_OK) == 0 ? LZO_PATH : "",
access(ROTN_PATH, R_OK) == 0 ? ROTN_PATH : "",
diff --git a/test/packing/intpack-test.c b/test/packing/intpack-test.c
index a48f9fc4fe4..08cc3807725 100644
--- a/test/packing/intpack-test.c
+++ b/test/packing/intpack-test.c
@@ -26,10 +26,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <assert.h>
-
#include "wt_internal.h" /* For __wt_XXX */
+#include <assert.h>
+
int
main(void)
{
diff --git a/test/packing/intpack-test2.c b/test/packing/intpack-test2.c
index 68f794f0bf5..7555d2724e7 100644
--- a/test/packing/intpack-test2.c
+++ b/test/packing/intpack-test2.c
@@ -26,10 +26,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <assert.h>
-
#include "wt_internal.h" /* For __wt_XXX */
+#include <assert.h>
+
int
main(void)
{
diff --git a/test/packing/intpack-test3.c b/test/packing/intpack-test3.c
index 2bce88c3568..2ebc01f9e2e 100644
--- a/test/packing/intpack-test3.c
+++ b/test/packing/intpack-test3.c
@@ -26,10 +26,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <assert.h>
-
#include "wt_internal.h" /* For __wt_XXX */
+#include <assert.h>
+
void test_value(int64_t);
void test_spread(int64_t, int64_t, int64_t);
diff --git a/test/packing/packing-test.c b/test/packing/packing-test.c
index 1d2194bee50..9b7105d7d4a 100644
--- a/test/packing/packing-test.c
+++ b/test/packing/packing-test.c
@@ -26,10 +26,10 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
-#include <assert.h>
-
#include "wt_internal.h" /* For __wt_XXX */
+#include <assert.h>
+
static void
check(const char *fmt, ...)
{
diff --git a/test/recovery/Makefile.am b/test/recovery/Makefile.am
index 60f237dad10..35f8dd15823 100644
--- a/test/recovery/Makefile.am
+++ b/test/recovery/Makefile.am
@@ -1,14 +1,18 @@
AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include \
-I$(top_srcdir)/test/utility
-noinst_PROGRAMS = t
-t_SOURCES = recovery.c
-t_LDADD = $(top_builddir)/libwiredtiger.la
-t_LDFLAGS = -static
+noinst_PROGRAMS = random-abort truncated-log
+random_abort_SOURCES = random-abort.c
+random_abort_LDADD = $(top_builddir)/libwiredtiger.la
+random_abort_LDFLAGS = -static
+
+truncated_log_SOURCES = truncated-log.c
+truncated_log_LDADD = $(top_builddir)/libwiredtiger.la
+truncated_log_LDFLAGS = -static
# Run this during a "make check" smoke test.
TESTS = $(noinst_PROGRAMS)
LOG_COMPILER = $(TEST_WRAPPER)
clean-local:
- rm -rf WiredTiger* *.core __*
+ rm -rf WT_TEST* *.core __*
diff --git a/test/recovery/recovery.c b/test/recovery/random-abort.c
index 5772865f030..ddcafbc80fd 100644
--- a/test/recovery/recovery.c
+++ b/test/recovery/random-abort.c
@@ -147,14 +147,14 @@ main(int argc, char *argv[])
uint32_t absent, count, timeout;
int ch, status, ret;
pid_t pid;
- char *working_dir;
+ const char *working_dir;
if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL)
progname = argv[0];
else
++progname;
- working_dir = NULL;
+ working_dir = "WT_TEST.random-abort";
timeout = 10;
while ((ch = __wt_getopt(progname, argc, argv, "h:t:")) != EOF)
switch (ch) {
diff --git a/test/recovery/truncated-log.c b/test/recovery/truncated-log.c
new file mode 100644
index 00000000000..4add7a61f66
--- /dev/null
+++ b/test/recovery/truncated-log.c
@@ -0,0 +1,268 @@
+/*-
+ * Public Domain 2014-2016 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <sys/wait.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifndef _WIN32
+#include <unistd.h>
+#else
+/* snprintf is not supported on <= VS2013 */
+#define snprintf _snprintf
+#endif
+
+#include <wiredtiger.h>
+
+#include "test_util.i"
+
+static char home[512]; /* Program working dir */
+static const char *progname; /* Program name */
+static const char *uri = "table:main";
+
+#define RECORDS_FILE "records"
+
+#define ENV_CONFIG \
+ "create,log=(file_max=100K,archive=false,enabled)," \
+ "transaction_sync=(enabled,method=none)"
+#define ENV_CONFIG_REC "log=(recover=on)"
+#define LOG_FILE_1 "WiredTigerLog.0000000001"
+#define MAX_VAL 4096
+
+#define K_SIZE 16
+#define V_SIZE 256
+
+static void
+usage(void)
+{
+ fprintf(stderr, "usage: %s [-h dir]\n", progname);
+ exit(EXIT_FAILURE);
+}
+
+/*
+ * Child process creates the database and table, and then writes data into
+ * the table until it is killed by the parent.
+ */
+static void
+fill_db(void)
+{
+ FILE *fp;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor, *logc;
+ WT_LSN lsn, save_lsn;
+ WT_SESSION *session;
+ uint32_t i, max_key, min_key, units, unused;
+ int ret;
+ bool first;
+ char k[K_SIZE], v[V_SIZE];
+
+ /*
+ * Run in the home directory so that the records file is in there too.
+ */
+ chdir(home);
+ if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG, &conn)) != 0)
+ testutil_die(ret, "wiredtiger_open");
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "WT_CONNECTION:open_session");
+ if ((ret = session->create(session,
+ uri, "key_format=S,value_format=S")) != 0)
+ testutil_die(ret, "WT_SESSION.create: %s", uri);
+ if ((ret =
+ session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0)
+ testutil_die(ret, "WT_SESSION.open_cursor: %s", uri);
+
+ /*
+ * Keep a separate file with the records we wrote for checking.
+ */
+ (void)unlink(RECORDS_FILE);
+ if ((fp = fopen(RECORDS_FILE, "w")) == NULL)
+ testutil_die(errno, "fopen");
+ /*
+ * Set to no buffering.
+ */
+ setvbuf(fp, NULL, _IONBF, 0);
+ save_lsn.l.file = 0;
+
+ /*
+ * Write data into the table until we move to log file 2.
+ * We do the calculation below so that we don't have to walk the
+ * log for every record.
+ *
+ * Calculate about how many records should fit in the log file.
+ * Subtract a bunch for metadata and file creation records.
+ * Then subtract out a few more records to be conservative.
+ */
+ units = (K_SIZE + V_SIZE) / 128 + 1;
+ min_key = 90000 / (units * 128) - 15;
+ max_key = min_key * 2;
+ first = true;
+ for (i = 0; i < max_key; ++i) {
+ snprintf(k, sizeof(k), "key%03d", (int)i);
+ snprintf(v, sizeof(v), "value%0*d",
+ (int)(V_SIZE - strlen("value")), (int)i);
+ cursor->set_key(cursor, k);
+ cursor->set_value(cursor, v);
+ if ((ret = cursor->insert(cursor)) != 0)
+ testutil_die(ret, "WT_CURSOR.insert");
+
+ if (i > min_key) {
+ if ((ret = session->open_cursor(
+ session, "log:", NULL, NULL, &logc)) != 0)
+ testutil_die(ret, "open_cursor: log");
+ if (save_lsn.l.file != 0) {
+ logc->set_key(logc,
+ save_lsn.l.file, save_lsn.l.offset, 0);
+ if ((ret = logc->search(logc)) != 0)
+ testutil_die(errno, "search");
+ }
+ while ((ret = logc->next(logc)) == 0) {
+ if ((ret = logc->get_key(logc,
+ &lsn.l.file, &lsn.l.offset, &unused)) != 0)
+ testutil_die(errno, "get_key");
+ if (lsn.l.file < 2)
+ save_lsn = lsn;
+ else {
+ if (first)
+ testutil_die(EINVAL,
+ "min_key too high");
+ if (fprintf(fp,
+ "%" PRIu32 " %" PRIu32 "\n",
+ save_lsn.l.offset, i - 1) == -1)
+ testutil_die(errno, "fprintf");
+ fclose(fp);
+ abort();
+ }
+ }
+ first = false;
+ }
+ }
+}
+
+extern int __wt_optind;
+extern char *__wt_optarg;
+
+int
+main(int argc, char *argv[])
+{
+ FILE *fp;
+ WT_CONNECTION *conn;
+ WT_CURSOR *cursor;
+ WT_SESSION *session;
+ uint64_t new_offset, offset;
+ uint32_t count, max_key;
+ int ch, status, ret;
+ pid_t pid;
+ const char *working_dir;
+
+ if ((progname = strrchr(argv[0], DIR_DELIM)) == NULL)
+ progname = argv[0];
+ else
+ ++progname;
+
+ working_dir = "WT_TEST.truncated-log";
+ while ((ch = __wt_getopt(progname, argc, argv, "h:")) != EOF)
+ switch (ch) {
+ case 'h':
+ working_dir = __wt_optarg;
+ break;
+ default:
+ usage();
+ }
+ argc -= __wt_optind;
+ argv += __wt_optind;
+ if (argc != 0)
+ usage();
+
+ testutil_work_dir_from_path(home, 512, working_dir);
+ testutil_make_work_dir(home);
+
+ /*
+ * Fork a child to insert as many items. We will then randomly
+ * kill the child, run recovery and make sure all items we wrote
+ * exist after recovery runs.
+ */
+ if ((pid = fork()) < 0)
+ testutil_die(errno, "fork");
+
+ if (pid == 0) { /* child */
+ fill_db();
+ return (EXIT_SUCCESS);
+ }
+
+ /* parent */
+ /* Wait for child to kill itself. */
+ waitpid(pid, &status, 0);
+
+ /*
+ * !!! If we wanted to take a copy of the directory before recovery,
+ * this is the place to do it.
+ */
+ chdir(home);
+ printf("Open database, run recovery and verify content\n");
+ if ((fp = fopen(RECORDS_FILE, "r")) == NULL)
+ testutil_die(errno, "fopen");
+ ret = fscanf(fp, "%" SCNu64 " %" SCNu32 "\n", &offset, &max_key);
+ fclose(fp);
+ if (ret != 2)
+ testutil_die(errno, "fscanf");
+ /*
+ * The offset is the beginning of the last record. Truncate to
+ * the middle of that last record (i.e. ahead of that offset).
+ */
+ new_offset = offset + V_SIZE;
+ printf("Parent: Truncate to %u\n", (uint32_t)new_offset);
+ if ((ret = truncate(LOG_FILE_1, (wt_off_t)new_offset)) != 0)
+ testutil_die(errno, "truncate");
+
+ if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn)) != 0)
+ testutil_die(ret, "wiredtiger_open");
+ if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+ testutil_die(ret, "WT_CONNECTION:open_session");
+ if ((ret =
+ session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0)
+ testutil_die(ret, "WT_SESSION.open_cursor: %s", uri);
+
+ /*
+ * For every key in the saved file, verify that the key exists
+ * in the table after recovery. Since we did write-no-sync, we
+ * expect every key to have been recovered.
+ */
+ count = 0;
+ while ((ret = cursor->next(cursor)) == 0)
+ ++count;
+ if ((ret = conn->close(conn, NULL)) != 0)
+ testutil_die(ret, "WT_CONNECTION:close");
+ if (count > max_key) {
+ printf("expected %u records found %u\n", max_key, count);
+ return (EXIT_FAILURE);
+ }
+ printf("%u records verified\n", count);
+ return (EXIT_SUCCESS);
+}
diff --git a/test/suite/test_compress01.py b/test/suite/test_compress01.py
index e97953a53cd..94c748fc3e5 100644
--- a/test/suite/test_compress01.py
+++ b/test/suite/test_compress01.py
@@ -42,7 +42,6 @@ class test_compress01(wttest.WiredTigerTestCase):
('table', dict(uri='table:test_compress01')),
]
compress = [
- ('bzip2', dict(compress='bzip2')),
('nop', dict(compress='nop')),
('snappy', dict(compress='snappy')),
('none', dict(compress=None)),
diff --git a/test/suite/test_cursor08.py b/test/suite/test_cursor08.py
index f7bd37a2a3c..1a379518224 100644
--- a/test/suite/test_cursor08.py
+++ b/test/suite/test_cursor08.py
@@ -47,7 +47,6 @@ class test_cursor08(wttest.WiredTigerTestCase, suite_subprocess):
('reopen', dict(reopen=True))
])
compress = check_scenarios([
- ('bzip2', dict(compress='bzip2')),
('nop', dict(compress='nop')),
('snappy', dict(compress='snappy')),
('zlib', dict(compress='zlib')),
diff --git a/test/suite/test_encrypt01.py b/test/suite/test_encrypt01.py
index 0521f7c8e8a..0f2782204d2 100644
--- a/test/suite/test_encrypt01.py
+++ b/test/suite/test_encrypt01.py
@@ -55,10 +55,8 @@ class test_encrypt01(wttest.WiredTigerTestCase):
('none', dict(log_compress=None, block_compress=None)),
('nop', dict(log_compress='nop', block_compress='nop')),
('lz4', dict(log_compress='lz4', block_compress='lz4')),
- ('bzip2', dict(log_compress='bzip2', block_compress='bzip2')),
('snappy', dict(log_compress='snappy', block_compress='snappy')),
('zlib', dict(log_compress='zlib', block_compress='zlib')),
- ('bzip2-none', dict(log_compress='bzip2', block_compress=None)),
('none-snappy', dict(log_compress=None, block_compress='snappy')),
('snappy-lz4', dict(log_compress='snappy', block_compress='lz4')),
]
diff --git a/test/suite/test_txn07.py b/test/suite/test_txn07.py
index 94c26990178..f74120e3590 100644
--- a/test/suite/test_txn07.py
+++ b/test/suite/test_txn07.py
@@ -64,7 +64,6 @@ class test_txn07(wttest.WiredTigerTestCase, suite_subprocess):
]
txn1s = [('t1c', dict(txn1='commit')), ('t1r', dict(txn1='rollback'))]
compress = [
- ('bzip2', dict(compress='bzip2')),
('nop', dict(compress='nop')),
('snappy', dict(compress='snappy')),
('zlib', dict(compress='zlib')),
diff --git a/test/utility/test_util.i b/test/utility/test_util.i
index 73141bc9be7..3b88d375381 100644
--- a/test/utility/test_util.i
+++ b/test/utility/test_util.i
@@ -66,26 +66,17 @@ testutil_die(int e, const char *fmt, ...)
* Creates the full intended work directory in buffer.
*/
static inline void
-testutil_work_dir_from_path(char *buffer, size_t inputSize, char *dir)
+testutil_work_dir_from_path(char *buffer, size_t inputSize, const char *dir)
{
/* If no directory is provided, use the default. */
- if (dir == NULL) {
- if (inputSize < sizeof(DEFAULT_DIR))
- testutil_die(ENOMEM,
- "Not enough memory in buffer for directory %s%c%s",
- dir, DIR_DELIM, DEFAULT_DIR);
-
- snprintf(buffer, inputSize, DEFAULT_DIR);
- return;
- }
-
- /* Additional bytes for the directory and WT_TEST. */
- if (inputSize < strlen(dir) + sizeof(DEFAULT_DIR) + sizeof(DIR_DELIM))
+ if (dir == NULL)
+ dir = DEFAULT_DIR;
+
+ if (inputSize < strlen(dir) + 1)
testutil_die(ENOMEM,
- "Not enough memory in buffer for directory %s%c%s",
- dir, DIR_DELIM, DEFAULT_DIR);
+ "Not enough memory in buffer for directory %s", dir);
- snprintf(buffer, inputSize, "%s%c%s", dir, DIR_DELIM, DEFAULT_DIR);
+ strcpy(buffer, dir);
}
/*