summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDon Anderson <dda@ddanderson.com>2015-03-26 13:48:13 -0400
committerDon Anderson <dda@ddanderson.com>2015-03-26 13:48:13 -0400
commitf8cce524c976118fe367888e64bf60120f3060b0 (patch)
tree0a742346582d47ce6bad14edd0fbbf0d903df1f9
parent1c23661e877ecbc7a067a6e6b9b04489be52ab05 (diff)
parent3e37e1fca16f135a56068996bd37e28165cef0dc (diff)
downloadmongo-f8cce524c976118fe367888e64bf60120f3060b0.tar.gz
Merge branch 'develop' into encryption-api
Conflicts: dist/s_string.ok
-rw-r--r--NEWS82
-rw-r--r--README2
-rw-r--r--SConstruct8
-rw-r--r--build_posix/aclocal/version-set.m42
-rw-r--r--dist/flags.py1
-rw-r--r--dist/s_string.ok2
-rw-r--r--ext/compressors/zlib/zlib_compress.c112
-rw-r--r--lang/java/Makefile.am2
-rw-r--r--lang/python/setup.py2
-rw-r--r--src/async/async_worker.c9
-rw-r--r--src/btree/bt_slvg.c17
-rw-r--r--src/btree/bt_sync.c11
-rw-r--r--src/config/config_collapse.c2
-rw-r--r--src/conn/conn_cache_pool.c4
-rw-r--r--src/conn/conn_ckpt.c4
-rw-r--r--src/conn/conn_dhandle.c12
-rw-r--r--src/conn/conn_log.c14
-rw-r--r--src/conn/conn_stat.c4
-rw-r--r--src/conn/conn_sweep.c50
-rw-r--r--src/docs/top/main.dox6
-rw-r--r--src/docs/upgrading.dox6
-rw-r--r--src/evict/evict_file.c16
-rw-r--r--src/evict/evict_lru.c18
-rw-r--r--src/include/btree.h1
-rw-r--r--src/include/btree.i4
-rw-r--r--src/include/extern.h14
-rw-r--r--src/include/flags.h1
-rw-r--r--src/include/os_windows.h17
-rw-r--r--src/include/packing.i30
-rw-r--r--src/include/posix.h12
-rw-r--r--src/lsm/lsm_manager.c6
-rw-r--r--src/lsm/lsm_worker.c6
-rw-r--r--src/meta/meta_ckpt.c2
-rw-r--r--src/meta/meta_track.c25
-rw-r--r--src/os_posix/os_thread.c2
-rw-r--r--src/os_win/os_fallocate.c2
-rw-r--r--src/os_win/os_mtx_cond.c4
-rw-r--r--src/os_win/os_once.c2
-rw-r--r--src/os_win/os_thread.c8
-rw-r--r--src/os_win/os_time.c4
-rw-r--r--src/schema/schema_create.c2
-rw-r--r--src/schema/schema_drop.c2
-rw-r--r--src/schema/schema_rename.c2
-rw-r--r--src/session/session_dhandle.c2
-rw-r--r--src/support/huffman.c8
-rw-r--r--src/txn/txn.c2
-rw-r--r--src/txn/txn_ckpt.c50
-rw-r--r--src/txn/txn_log.c8
-rw-r--r--test/mciproject.yml16
-rw-r--r--test/suite/test_durability01.py87
-rw-r--r--test/suite/test_sweep01.py22
-rw-r--r--test/windows/windows_shim.c7
52 files changed, 548 insertions, 186 deletions
diff --git a/NEWS b/NEWS
index 9af9a41a5c6..806c153a7ec 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,85 @@
+WiredTiger release 2.5.2, 2015-03-23
+------------------------------------
+
+The WiredTiger 2.5.2 release contains important bug fixes.
+
+API changes:
+
+* Allow memory_page_max to be at most a quater of the cache size not half.
+ This avoids operations getting stalled due to the cache being filled with
+ one or two pages.
+
+Bug fixes and other important changes:
+
+* When skipping a dirty page during a checkpoint, make sure the tree is marked
+ dirty.
+ refs SUPPORT-1248, SERVER-17319, SERVER-17506, #1404, #1643, #1721, #1735
+
+* Fix a bug in range truncate where we could remove the wrong records.
+ refs SERVER-17345
+
+* Fix a bug in LSM management where we could let the cache get full - leading
+ to a operations being blocked.
+ refs #1720
+
+* Fix several bugs in the checkpoint implementation that could lead to a tree
+ being marked clean when it had updates in memory. If shutdown occurred at
+ a specific time those updates would be discarded without being written.
+ refs SUPPORT-1248
+
+* Fix some bugs in logging - where system crashes could leave empty files that
+ would stop recovery working on re-start.
+ refs #1717, #1719, SERVER-17451
+
+* Fix a bug in recovery. Force recovery instead of returning an error if the
+ LSN given doesn't exist.
+ refs #1700, #1704
+
+* Move writing into log worker thread to avoid latency in application threads.
+ refs #1683
+
+* Fix a bug in the reconfigure API related to adhering to shared cache quotas.
+ refs #1712, #1713
+
+* Fix a bug in WiredTiger statistics where we weren't recording overflow
+ record statistics.
+ refs #1520, #1703, #1711
+
+* Several enhancements to eviction of large pages including:
+ - Don't do forced eviction of a page if it is the current walk point.
+ - Don't update the read generation on page in if it's set to oldest.
+ - Clear the walk positions before the eviction server sleeps.
+ - Reverse the direction of the LRU walk regularly.
+ - Add all pages that would block to the eviction queue.
+ - If evicting dirty pages use the worker threads not the server.
+ refs #1706
+
+* Use raw mode when dumping indices.
+ refs #1709
+
+* Fix a bug where we could race opening files while a WT_CONNECTION::close is
+ in progress.
+ refs SERVER-17319
+
+* Fix a bug in LSM where snapshot transaction updates could have the wrong
+ visibility check applied. Leading to invalid updates.
+ refs #1641, #1701, #1702
+
+* Fix a bug in checkpoint where it could get an EBUSY return unnecessarily.
+ refs #1404, #1589, #1705
+
+* Fix a bug when writing a page from memory to disk (reconciling). We could
+ overwrite the end of a temporary buffer in some cases.
+ refs #1697, #1699
+
+* Sometimes we would choose a sub-optimal layout for on disk pages when
+ writing them out from memory.
+ refs #1699
+
+* Improve the performance of in-memory lookups by making the content of the
+ page structure more cache friendly.
+
+
WiredTiger release 2.5.1, 2015-03-07
------------------------------------
diff --git a/README b/README
index 6cb0421f6dc..f304bade13e 100644
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
-WiredTiger 2.5.3: (March 24, 2015)
+WiredTiger 2.5.3: (March 26, 2015)
This is version 2.5.3 of WiredTiger.
diff --git a/SConstruct b/SConstruct
index 5c4b90992f2..8e9fae28ac6 100644
--- a/SConstruct
+++ b/SConstruct
@@ -68,7 +68,11 @@ var.Add('CFLAGS', 'C Compiler Flags', [
"/wd4090", # Ignore warning about mismatched const qualifiers
"/wd4996", # Ignore deprecated functions
"/W3", # Warning level 3
+ #"/we4244", # Possible loss of data
"/we4013", # Error on undefined functions
+ #"/we4047", # Indirection differences in types
+ #"/we4024", # Differences in parameter types
+ #"/we4100", # Unreferenced local parameter
"/TC", # Compile as C code
#"/Od", # Disable optimization
"/Ob1", # inline expansion
@@ -76,7 +80,7 @@ var.Add('CFLAGS', 'C Compiler Flags', [
"/GF", # enable string pooling
"/EHsc", # extern "C" does not throw
#"/RTC1", # enable stack checks
- "/GS", # enable secrutiy checks
+ "/GS", # enable security checks
"/Gy", # separate functions for linker
"/Zc:wchar_t",
"/Gd",
@@ -97,6 +101,8 @@ var.Add('TOOLS', 'SCons tools', [
"textfile"
])
+var.Add('SWIG', 'SWIG binary location', swig_binary)
+
env = Environment(
variables = var
)
diff --git a/build_posix/aclocal/version-set.m4 b/build_posix/aclocal/version-set.m4
index d1102bb1e44..b19418fc29d 100644
--- a/build_posix/aclocal/version-set.m4
+++ b/build_posix/aclocal/version-set.m4
@@ -3,7 +3,7 @@ dnl build by dist/s_version
VERSION_MAJOR=2
VERSION_MINOR=5
VERSION_PATCH=3
-VERSION_STRING='"WiredTiger 2.5.3: (March 24, 2015)"'
+VERSION_STRING='"WiredTiger 2.5.3: (March 26, 2015)"'
AC_SUBST(VERSION_MAJOR)
AC_SUBST(VERSION_MINOR)
diff --git a/dist/flags.py b/dist/flags.py
index f1eb6b24968..34f3ab3e02f 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -54,6 +54,7 @@ flags = {
'TXN_LOG_CKPT_PREPARE',
'TXN_LOG_CKPT_START',
'TXN_LOG_CKPT_STOP',
+ 'TXN_LOG_CKPT_SYNC',
],
'verbose' : [
'VERB_API',
diff --git a/dist/s_string.ok b/dist/s_string.ok
index 0d69c3a66ee..1e2b3a751d6 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -426,6 +426,7 @@ autoheader
bInheritHandle
basecfg
bdb
+beginthreadex
bigram
bitcnt
bitfield
@@ -548,6 +549,7 @@ decrementing
decrypt
decrypted
decrypts
+deflateCopy
deflateEnd
deflateInit
defno
diff --git a/ext/compressors/zlib/zlib_compress.c b/ext/compressors/zlib/zlib_compress.c
index 0d843fd7626..381bf0d5070 100644
--- a/ext/compressors/zlib/zlib_compress.c
+++ b/ext/compressors/zlib/zlib_compress.c
@@ -143,7 +143,7 @@ zlib_compress(WT_COMPRESSOR *compressor, WT_SESSION *session,
zs.avail_out = (uint32_t)dst_len;
if (deflate(&zs, Z_FINISH) == Z_STREAM_END) {
*compression_failed = 0;
- *result_lenp = zs.total_out;
+ *result_lenp = (size_t)zs.total_out;
} else
*compression_failed = 1;
@@ -158,7 +158,7 @@ zlib_compress(WT_COMPRESSOR *compressor, WT_SESSION *session,
* Find the slot containing the target offset (binary search).
*/
static inline uint32_t
-zlib_find_slot(uint32_t target, uint32_t *offsets, uint32_t slots)
+zlib_find_slot(uint64_t target, uint32_t *offsets, uint32_t slots)
{
uint32_t base, indx, limit;
@@ -210,7 +210,7 @@ zlib_decompress(WT_COMPRESSOR *compressor, WT_SESSION *session,
while ((ret = inflate(&zs, Z_FINISH)) == Z_OK)
;
if (ret == Z_STREAM_END) {
- *result_lenp = zs.total_out;
+ *result_lenp = (size_t)zs.total_out;
ret = Z_OK;
}
@@ -234,7 +234,7 @@ zlib_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session,
{
ZLIB_COMPRESSOR *zlib_compressor;
ZLIB_OPAQUE opaque;
- z_stream last_zs, zs;
+ z_stream *best_zs, last_zs, zs;
uint32_t curr_slot, last_slot;
int ret;
@@ -252,8 +252,7 @@ zlib_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session,
opaque.session = session;
zs.opaque = &opaque;
- if ((ret = deflateInit(&zs,
- zlib_compressor->zlib_level)) != Z_OK)
+ if ((ret = deflateInit(&zs, zlib_compressor->zlib_level)) != Z_OK)
return (zlib_error(compressor, session, "deflateInit", ret));
zs.next_in = src;
@@ -264,51 +263,79 @@ zlib_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session,
* inefficient.
*/
#define WT_ZLIB_RESERVED 24
- zs.avail_out = (uint32_t)(page_max - extra - WT_ZLIB_RESERVED);
- last_zs = zs;
+ zs.avail_out = (uint32_t)(page_max - (extra + WT_ZLIB_RESERVED));
+
+ /* Save the stream state in case the chosen data doesn't fit. */
+ if ((ret = deflateCopy(&last_zs, &zs)) != Z_OK)
+ return (zlib_error(
+ compressor, session, "deflateCopy", ret));
/*
* Strategy: take the available output size and compress that much
* input. Continue until there is no input small enough or the
* compression fails to fit.
- *
- * Don't let the compression ratio become insanely good (which can
- * happen with synthetic workloads). Once we hit a limit, stop so that
- * the in-memory size of pages isn't totally different to the on-disk
- * size. Otherwise we can get into trouble where every update to a
- * page results in forced eviction based on in-memory size, even though
- * the data fits into a single on-disk block.
*/
- while (zs.avail_out > 0 && zs.total_in <= zs.total_out * 20) {
- /* Find the slot we will try to compress up to. */
+ for (best_zs = NULL;;) {
+ /* Find the next slot we will try to compress up to. */
if ((curr_slot = zlib_find_slot(
- zs.total_in + zs.avail_out, offsets, slots)) <= last_slot)
- break;
-
- zs.avail_in = offsets[curr_slot] - offsets[last_slot];
- /* Save the stream state in case the chosen data doesn't fit. */
- last_zs = zs;
+ zs.total_in + zs.avail_out, offsets, slots)) > last_slot) {
+ zs.avail_in = offsets[curr_slot] - offsets[last_slot];
+ while (zs.avail_in > 0 && zs.avail_out > 0)
+ if ((ret = deflate(&zs, Z_SYNC_FLUSH)) != Z_OK)
+ return (zlib_error(compressor,
+ session, "deflate", ret));
+ }
- while (zs.avail_in > 0 && zs.avail_out > 0)
- if ((ret = deflate(&zs, Z_SYNC_FLUSH)) != Z_OK)
+ /*
+ * We didn't do a deflate, or it didn't work: use the last saved
+ * position.
+ */
+ if (curr_slot <= last_slot || zs.avail_in > 0) {
+ if ((ret = deflateEnd(&zs)) != Z_OK &&
+ ret != Z_DATA_ERROR)
return (zlib_error(
- compressor, session, "deflate", ret));
+ compressor, session, "deflateEnd", ret));
- /* Roll back if the last deflate didn't complete. */
- if (zs.avail_in > 0) {
- zs = last_zs;
+ best_zs = &last_zs;
break;
- } else
- last_slot = curr_slot;
+ }
+
+ /* The last deflation succeeded, discard the saved one. */
+ if ((ret = deflateEnd(&last_zs)) != Z_OK && ret != Z_DATA_ERROR)
+ return (zlib_error(
+ compressor, session, "deflateEnd", ret));
+
+ /*
+ * If there's more compression to do, save a snapshot and keep
+ * going, otherwise, use the current compression.
+ *
+ * Don't let the compression ratio become insanely good (which
+ * can happen with synthetic workloads). Once we hit a limit,
+ * stop so the in-memory size of pages isn't hugely larger than
+ * the on-disk size, otherwise we can get into trouble where
+ * every update to a page results in forced eviction based on
+ * the in-memory size, even though the data fits into a single
+ * on-disk block.
+ */
+ last_slot = curr_slot;
+ if (zs.avail_out > 0 && zs.total_in <= zs.total_out * 20) {
+ if ((ret = deflateCopy(&last_zs, &zs)) != Z_OK)
+ return (zlib_error(
+ compressor, session, "deflateCopy", ret));
+ continue;
+ }
+
+ best_zs = &zs;
+ break;
}
- zs.avail_out += WT_ZLIB_RESERVED;
- ret = deflate(&zs, Z_FINISH);
+ best_zs->avail_out += WT_ZLIB_RESERVED;
+ ret = deflate(best_zs, Z_FINISH);
/*
- * If the end marker didn't fit, report that we got no work done. WT
- * will compress the (possibly large) page image using ordinary
- * compression instead.
+ * If the end marker didn't fit, report that we got no work done,
+ * WiredTiger will compress the (possibly large) page image using
+ * ordinary compression instead.
*/
if (ret == Z_OK || ret == Z_BUF_ERROR)
last_slot = 0;
@@ -316,12 +343,12 @@ zlib_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session,
return (
zlib_error(compressor, session, "deflate end block", ret));
- if ((ret = deflateEnd(&zs)) != Z_OK && ret != Z_DATA_ERROR)
+ if ((ret = deflateEnd(best_zs)) != Z_OK && ret != Z_DATA_ERROR)
return (zlib_error(compressor, session, "deflateEnd", ret));
if (last_slot > 0) {
*result_slotsp = last_slot;
- *result_lenp = zs.total_out;
+ *result_lenp = (size_t)best_zs->total_out;
} else {
/* We didn't manage to compress anything: don't retry. */
*result_slotsp = 0;
@@ -334,11 +361,12 @@ zlib_compress_raw(WT_COMPRESSOR *compressor, WT_SESSION *session,
void *decomp;
size_t result_len;
- if ((decomp =
- zalloc(&opaque, 1, (uint32_t)zs.total_in + 100)) == NULL)
+ if ((decomp = zalloc(
+ &opaque, 1, (uint32_t)best_zs->total_in + 100)) == NULL)
return (ENOMEM);
- if ((ret = zlib_decompress(compressor, session, dst,
- zs.total_out, decomp, zs.total_in + 100, &result_len)) == 0)
+ if ((ret = zlib_decompress(
+ compressor, session, dst, (size_t)best_zs->total_out,
+ decomp, (size_t)best_zs->total_in + 100, &result_len)) == 0)
if (memcmp(src, decomp, result_len) != 0)
ret = zlib_error(compressor, session,
"deflate compare with original source",
diff --git a/lang/java/Makefile.am b/lang/java/Makefile.am
index 94a7cb2702d..e6e6f748837 100644
--- a/lang/java/Makefile.am
+++ b/lang/java/Makefile.am
@@ -1,4 +1,4 @@
-AM_CPPFLAGS = -I$(top_srcdir)
+AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/src/include
JAVADEST = src/com/wiredtiger/db
JAVADESTFULL = $(srcdir)/$(JAVADEST)
diff --git a/lang/python/setup.py b/lang/python/setup.py
index 28bbe4d07e8..9eb57d55b5c 100644
--- a/lang/python/setup.py
+++ b/lang/python/setup.py
@@ -36,7 +36,7 @@ if not 'ARCHFLAGS' in os.environ:
os.environ['ARCHFLAGS'] = ''
# Suppress warnings building SWIG generated code
-extra_cflags = [ '-w' ]
+extra_cflags = [ '-w', '-I../../src/include']
dir = os.path.dirname(__file__)
diff --git a/src/async/async_worker.c b/src/async/async_worker.c
index ec68598b8c8..543046f7a0c 100644
--- a/src/async/async_worker.c
+++ b/src/async/async_worker.c
@@ -75,7 +75,8 @@ retry:
*/
my_slot = my_consume % async->async_qsize;
prev_slot = last_consume % async->async_qsize;
- *op = WT_ATOMIC_STORE8(async->async_queue[my_slot], NULL);
+ *op = (WT_ASYNC_OP_IMPL*)WT_ATOMIC_STORE8(
+ async->async_queue[my_slot], NULL);
WT_ASSERT(session, async->cur_queue > 0);
WT_ASSERT(session, *op != NULL);
@@ -278,10 +279,10 @@ __async_worker_op(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op,
}
/*
- * __async_worker --
+ * __wt_async_worker --
* The async worker threads.
*/
-void *
+WT_THREAD_RET
__wt_async_worker(void *arg)
{
WT_ASYNC *async;
@@ -354,5 +355,5 @@ err: WT_PANIC_MSG(session, ret, "async worker error");
__wt_free(session, ac);
ac = acnext;
}
- return (NULL);
+ return (WT_THREAD_RET_VALUE);
}
diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c
index 6f0d4946aa5..ba1802116d0 100644
--- a/src/btree/bt_slvg.c
+++ b/src/btree/bt_slvg.c
@@ -124,7 +124,7 @@ static int __slvg_col_range_overlap(
WT_SESSION_IMPL *, uint32_t, uint32_t, WT_STUFF *);
static void __slvg_col_trk_update_start(uint32_t, WT_STUFF *);
static int __slvg_merge_block_free(WT_SESSION_IMPL *, WT_STUFF *);
-static int __slvg_ovfl_compare(const void *, const void *);
+static int WT_CDECL __slvg_ovfl_compare(const void *, const void *);
static int __slvg_ovfl_discard(WT_SESSION_IMPL *, WT_STUFF *);
static int __slvg_ovfl_reconcile(WT_SESSION_IMPL *, WT_STUFF *);
static int __slvg_ovfl_ref(WT_SESSION_IMPL *, WT_TRACK *, int);
@@ -140,9 +140,9 @@ static int __slvg_row_range_overlap(
WT_SESSION_IMPL *, uint32_t, uint32_t, WT_STUFF *);
static int __slvg_row_trk_update_start(
WT_SESSION_IMPL *, WT_ITEM *, uint32_t, WT_STUFF *);
-static int __slvg_trk_compare_addr(const void *, const void *);
-static int __slvg_trk_compare_gen(const void *, const void *);
-static int __slvg_trk_compare_key(const void *, const void *);
+static int WT_CDECL __slvg_trk_compare_addr(const void *, const void *);
+static int WT_CDECL __slvg_trk_compare_gen(const void *, const void *);
+static int WT_CDECL __slvg_trk_compare_key(const void *, const void *);
static int __slvg_trk_free(WT_SESSION_IMPL *, WT_TRACK **, int);
static void __slvg_trk_free_addr(WT_SESSION_IMPL *, WT_TRACK *);
static int __slvg_trk_init(WT_SESSION_IMPL *, uint8_t *,
@@ -2098,7 +2098,7 @@ __slvg_row_ovfl(WT_SESSION_IMPL *session,
* __slvg_trk_compare_addr --
* Compare two WT_TRACK array entries by address cookie.
*/
-static int
+static int WT_CDECL
__slvg_trk_compare_addr(const void *a, const void *b)
{
WT_DECL_RET;
@@ -2124,7 +2124,7 @@ __slvg_trk_compare_addr(const void *a, const void *b)
* __slvg_ovfl_compare --
* Bsearch comparison routine for the overflow array.
*/
-static int
+static int WT_CDECL
__slvg_ovfl_compare(const void *a, const void *b)
{
WT_ADDR *addr;
@@ -2163,6 +2163,7 @@ __slvg_ovfl_reconcile(WT_SESSION_IMPL *session, WT_STUFF *ss)
* with the lowest LSNs until overflow pages are only referenced once.
*
* This requires sorting the page list by LSN, and the overflow array
+
* by address cookie.
*/
qsort(ss->pages,
@@ -2246,7 +2247,7 @@ err: __wt_free(session, slot);
* __slvg_trk_compare_key --
* Compare two WT_TRACK array entries by key, and secondarily, by LSN.
*/
-static int
+static int WT_CDECL
__slvg_trk_compare_key(const void *a, const void *b)
{
WT_SESSION_IMPL *session;
@@ -2303,7 +2304,7 @@ __slvg_trk_compare_key(const void *a, const void *b)
* __slvg_trk_compare_gen --
* Compare two WT_TRACK array entries by LSN.
*/
-static int
+static int WT_CDECL
__slvg_trk_compare_gen(const void *a, const void *b)
{
WT_TRACK *a_trk, *b_trk;
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index 1bbaee4bf1b..dae2dd8d480 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -109,6 +109,17 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
/* Write all dirty in-cache pages. */
flags |= WT_READ_NO_EVICT;
for (walk = NULL;;) {
+ /*
+ * If we have a page, and it was ever modified, track
+ * the highest transaction ID in the tree. We do this
+ * here because we want the value after reconciling
+ * dirty pages.
+ */
+ if (walk != NULL && walk->page != NULL &&
+ (mod = walk->page->modify) != NULL &&
+ TXNID_LT(btree->rec_max_txn, mod->rec_max_txn))
+ btree->rec_max_txn = mod->rec_max_txn;
+
WT_ERR(__wt_tree_walk(session, &walk, NULL, flags));
if (walk == NULL)
break;
diff --git a/src/config/config_collapse.c b/src/config/config_collapse.c
index f54e4fc2074..23cb03c9b3a 100644
--- a/src/config/config_collapse.c
+++ b/src/config/config_collapse.c
@@ -314,7 +314,7 @@ err: __wt_scr_free(session, &build);
* __config_merge_cmp --
* Qsort function: sort the config merge array.
*/
-static int
+static int WT_CDECL
__config_merge_cmp(const void *a, const void *b)
{
WT_CONFIG_MERGE_ENTRY *ae, *be;
diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c
index 7bf090496a8..488864ce351 100644
--- a/src/conn/conn_cache_pool.c
+++ b/src/conn/conn_cache_pool.c
@@ -596,7 +596,7 @@ __cache_pool_adjust(WT_SESSION_IMPL *session,
* __wt_cache_pool_server --
* Thread to manage cache pool among connections.
*/
-void *
+WT_THREAD_RET
__wt_cache_pool_server(void *arg)
{
WT_CACHE *cache;
@@ -642,5 +642,5 @@ __wt_cache_pool_server(void *arg)
if (0) {
err: WT_PANIC_MSG(session, ret, "cache pool manager server error");
}
- return (NULL);
+ return (WT_THREAD_RET_VALUE);
}
diff --git a/src/conn/conn_ckpt.c b/src/conn/conn_ckpt.c
index 503c22110f9..74f27d8bd18 100644
--- a/src/conn/conn_ckpt.c
+++ b/src/conn/conn_ckpt.c
@@ -69,7 +69,7 @@ err: __wt_scr_free(session, &tmp);
* __ckpt_server --
* The checkpoint server thread.
*/
-static void *
+static WT_THREAD_RET
__ckpt_server(void *arg)
{
WT_CONNECTION_IMPL *conn;
@@ -112,7 +112,7 @@ __ckpt_server(void *arg)
if (0) {
err: WT_PANIC_MSG(session, ret, "checkpoint server error");
}
- return (NULL);
+ return (WT_THREAD_RET_VALUE);
}
/*
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c
index 9f308a2569c..63180d64019 100644
--- a/src/conn/conn_dhandle.c
+++ b/src/conn/conn_dhandle.c
@@ -234,7 +234,7 @@ err: WT_TRET(__wt_rwlock_destroy(session, &dhandle->rwlock));
* Sync and close the underlying btree handle.
*/
int
-__wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int force)
+__wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int final, int force)
{
WT_BTREE *btree;
WT_DATA_HANDLE *dhandle;
@@ -273,7 +273,7 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int force)
*/
if (!F_ISSET(btree,
WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY))
- WT_ERR(__wt_checkpoint_close(session, force));
+ WT_ERR(__wt_checkpoint_close(session, final, force));
if (dhandle->checkpoint == NULL)
--S2C(session)->open_btree_count;
@@ -391,7 +391,7 @@ __conn_btree_open(WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags)
* in the tree that can block the close.
*/
if (F_ISSET(dhandle, WT_DHANDLE_OPEN))
- WT_RET(__wt_conn_btree_sync_and_close(session, 0));
+ WT_RET(__wt_conn_btree_sync_and_close(session, 0, 0));
/* Discard any previous configuration, set up the new configuration. */
__conn_btree_config_clear(session);
@@ -423,7 +423,7 @@ __conn_btree_open(WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags)
err: F_CLR(btree, WT_BTREE_SPECIAL_FLAGS);
/* If the open failed, close the handle. */
if (F_ISSET(dhandle, WT_DHANDLE_OPEN))
- WT_TRET(__wt_conn_btree_sync_and_close(session, 0));
+ WT_TRET(__wt_conn_btree_sync_and_close(session, 0, 0));
}
return (ret);
@@ -669,7 +669,7 @@ __wt_conn_dhandle_close_all(
if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
if ((ret = __wt_meta_track_sub_on(session)) == 0)
ret = __wt_conn_btree_sync_and_close(
- session, force);
+ session, 0, force);
/*
* If the close succeeded, drop any locks it acquired.
@@ -731,7 +731,7 @@ __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final)
dhandle = session->dhandle;
if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
- tret = __wt_conn_btree_sync_and_close(session, 0);
+ tret = __wt_conn_btree_sync_and_close(session, final, 0);
if (final && tret != 0) {
__wt_err(session, tret,
"Final close of %s failed", dhandle->name);
diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c
index 4b995114b09..a6d53134ec2 100644
--- a/src/conn/conn_log.c
+++ b/src/conn/conn_log.c
@@ -273,7 +273,7 @@ err:
* __log_close_server --
* The log close server thread.
*/
-static void *
+static WT_THREAD_RET
__log_close_server(void *arg)
{
WT_CONNECTION_IMPL *conn;
@@ -332,7 +332,7 @@ err: __wt_err(session, ret, "log close server error");
}
if (locked)
__wt_spin_unlock(session, &log->log_sync_lock);
- return (NULL);
+ return (WT_THREAD_RET_VALUE);
}
/*
@@ -347,7 +347,7 @@ typedef struct {
* __log_wrlsn_cmp --
* The log wrlsn comparison function for qsort.
*/
-static int
+static int WT_CDECL
__log_wrlsn_cmp(const void *a, const void *b)
{
WT_LOG_WRLSN_ENTRY *ae, *be;
@@ -361,7 +361,7 @@ __log_wrlsn_cmp(const void *a, const void *b)
* __log_wrlsn_server --
* The log wrlsn server thread.
*/
-static void *
+static WT_THREAD_RET
__log_wrlsn_server(void *arg)
{
WT_CONNECTION_IMPL *conn;
@@ -450,14 +450,14 @@ __log_wrlsn_server(void *arg)
if (0)
err: __wt_err(session, ret, "log wrlsn server error");
- return (NULL);
+ return (WT_THREAD_RET_VALUE);
}
/*
* __log_server --
* The log server thread.
*/
-static void *
+static WT_THREAD_RET
__log_server(void *arg)
{
WT_CONNECTION_IMPL *conn;
@@ -502,7 +502,7 @@ err: __wt_err(session, ret, "log server error");
}
if (locked)
(void)__wt_writeunlock(session, log->log_archive_lock);
- return (NULL);
+ return (WT_THREAD_RET_VALUE);
}
/*
diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c
index 83c8d539662..0d008939d8c 100644
--- a/src/conn/conn_stat.c
+++ b/src/conn/conn_stat.c
@@ -380,7 +380,7 @@ err: __wt_scr_free(session, &tmp);
* __statlog_server --
* The statistics server thread.
*/
-static void *
+static WT_THREAD_RET
__statlog_server(void *arg)
{
WT_CONNECTION_IMPL *conn;
@@ -419,7 +419,7 @@ err: WT_PANIC_MSG(session, ret, "statistics log server error");
}
__wt_buf_free(session, &path);
__wt_buf_free(session, &tmp);
- return (NULL);
+ return (WT_THREAD_RET_VALUE);
}
/*
diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c
index 0e6b4b61c93..90773a621e2 100644
--- a/src/conn/conn_sweep.c
+++ b/src/conn/conn_sweep.c
@@ -26,22 +26,36 @@ __sweep_remove_handles(WT_SESSION_IMPL *session)
dhandle_next = SLIST_NEXT(dhandle, l);
if (WT_IS_METADATA(dhandle))
continue;
+ if (F_ISSET(dhandle, WT_DHANDLE_OPEN))
+ continue;
+
+ /* Make sure we get exclusive access. */
+ if ((ret =
+ __wt_try_writelock(session, dhandle->rwlock)) == EBUSY)
+ continue;
+ WT_RET(ret);
/*
* If there are no longer any references to the handle in any
* sessions, attempt to discard it.
*/
if (F_ISSET(dhandle, WT_DHANDLE_OPEN) ||
- dhandle->session_inuse != 0 || dhandle->session_ref != 0)
+ dhandle->session_inuse != 0 || dhandle->session_ref != 0) {
+ WT_RET(__wt_writeunlock(session, dhandle->rwlock));
continue;
+ }
WT_WITH_DHANDLE(session, dhandle,
ret = __wt_conn_dhandle_discard_single(session, 0));
+
+ /* If the handle was not successfully discarded, unlock it. */
+ if (ret != 0)
+ WT_TRET(__wt_writeunlock(session, dhandle->rwlock));
WT_RET_BUSY_OK(ret);
WT_STAT_FAST_CONN_INCR(session, dh_conn_ref);
}
- return (ret);
+ return (ret == EBUSY ? 0 : ret);
}
/*
@@ -51,6 +65,7 @@ __sweep_remove_handles(WT_SESSION_IMPL *session)
static int
__sweep(WT_SESSION_IMPL *session)
{
+ WT_BTREE *btree;
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
@@ -83,15 +98,13 @@ __sweep(WT_SESSION_IMPL *session)
/*
* We have a candidate for closing; if it's open, acquire an
- * exclusive lock on the handle and close it. We might be
- * blocking opens for a long time (over disk I/O), but the
- * handle was quiescent for awhile.
+ * exclusive lock on the handle and close it.
*
- * The close can fail if an update cannot be written (updates
- * in a no-longer-referenced file might not yet be globally
- * visible if sessions have disjoint sets of files open). If
- * the handle is busy, skip it, we'll retry the close the next
- * time, after the transaction state has progressed.
+ * The close would require I/O if an update cannot be written
+ * (updates in a no-longer-referenced file might not yet be
+ * globally visible if sessions have disjoint sets of files
+ * open). In that case, skip it: we'll retry the close the
+ * next time, after the transaction state has progressed.
*
* We don't set WT_DHANDLE_EXCLUSIVE deliberately, we want
* opens to block on us rather than returning an EBUSY error to
@@ -100,11 +113,18 @@ __sweep(WT_SESSION_IMPL *session)
if ((ret =
__wt_try_writelock(session, dhandle->rwlock)) == EBUSY)
continue;
+ WT_RET(ret);
+
+ /* Only sweep clean trees where all updates are visible. */
+ btree = dhandle->handle;
+ if (btree->modified ||
+ !__wt_txn_visible_all(session, btree->rec_max_txn))
+ goto unlock;
/* If the handle is open, try to close it. */
if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
- WT_WITH_DHANDLE(session, dhandle,
- ret = __wt_conn_btree_sync_and_close(session, 0));
+ WT_WITH_DHANDLE(session, dhandle, ret =
+ __wt_conn_btree_sync_and_close(session, 0, 0));
/* We closed the btree handle, bump the statistic. */
if (ret == 0)
@@ -115,7 +135,7 @@ __sweep(WT_SESSION_IMPL *session)
if (dhandle->session_inuse == 0 && dhandle->session_ref == 0)
++closed_handles;
- WT_TRET(__wt_writeunlock(session, dhandle->rwlock));
+unlock: WT_TRET(__wt_writeunlock(session, dhandle->rwlock));
WT_RET_BUSY_OK(ret);
}
@@ -132,7 +152,7 @@ __sweep(WT_SESSION_IMPL *session)
* __sweep_server --
* The handle sweep server thread.
*/
-static void *
+static WT_THREAD_RET
__sweep_server(void *arg)
{
WT_CONNECTION_IMPL *conn;
@@ -158,7 +178,7 @@ __sweep_server(void *arg)
if (0) {
err: WT_PANIC_MSG(session, ret, "handle sweep server error");
}
- return (NULL);
+ return (WT_THREAD_RET_VALUE);
}
/*
diff --git a/src/docs/top/main.dox b/src/docs/top/main.dox
index 9b3e5fe48f8..75bf6644393 100644
--- a/src/docs/top/main.dox
+++ b/src/docs/top/main.dox
@@ -6,9 +6,9 @@ WiredTiger is an high performance, scalable, production quality, NoSQL,
@section releases Releases
<table>
-@row{<b>WiredTiger 2.5.0</b> (current),
- <a href="releases/wiredtiger-2.5.0.tar.bz2"><b>[Release package]</b></a>,
- <a href="2.5.0/index.html"><b>[Documentation]</b></a>}
+@row{<b>WiredTiger 2.5.2</b> (current),
+ <a href="releases/wiredtiger-2.5.2.tar.bz2"><b>[Release package]</b></a>,
+ <a href="2.5.2/index.html"><b>[Documentation]</b></a>}
@row{<b>WiredTiger 2.4.1</b> (previous),
<a href="releases/wiredtiger-2.4.1.tar.bz2"><b>[Release package]</b></a>,
<a href="2.4.1/index.html"><b>[Documentation]</b></a>}
diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox
index e4b4cacd850..6ccb17fbdab 100644
--- a/src/docs/upgrading.dox
+++ b/src/docs/upgrading.dox
@@ -1,6 +1,6 @@
/*! @page upgrading Upgrading WiredTiger applications
-@section version_252 Upgrading to Version 2.5.2
+@section version_253 Upgrading to Version 2.5.3
<dl>
<dt>Configuration string case-sensitivity</dt>
@@ -30,6 +30,10 @@ is only enforced when direct I/O is configured.
</dd>
</dl>
+@section version_252 Upgrading to Version 2.5.2
+
+There are no special upgrade steps required.
+
@section version_251 Upgrading to Version 2.5.1
<dl>
diff --git a/src/evict/evict_file.c b/src/evict/evict_file.c
index f546a5adae1..864c116a380 100644
--- a/src/evict/evict_file.c
+++ b/src/evict/evict_file.c
@@ -72,23 +72,17 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop)
WT_READ_CACHE | WT_READ_NO_EVICT));
switch (syncop) {
- case WT_SYNC_DISCARD:
- /*
- * Check that the page is clean: if we see a dirty page
- * (including a dirty parent page after evicting a
- * child), give up. The higher level can try to
- * checkpoint, but during discard we aren't set up to
- * manage checkpoints.
- */
- if (__wt_page_is_modified(page))
- WT_ERR(EBUSY);
- /* FALLTHROUGH */
case WT_SYNC_CLOSE:
/*
* Evict the page.
*/
WT_ERR(__wt_evict(session, ref, 1));
break;
+ case WT_SYNC_DISCARD:
+ WT_ASSERT(session,
+ __wt_page_can_evict(session, page, 0));
+ __wt_evict_page_clean_update(session, ref);
+ break;
case WT_SYNC_DISCARD_FORCE:
/*
* Forced discard of the page, whether clean or dirty.
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 2ebd699c579..62326015d2c 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -10,13 +10,13 @@
static int __evict_clear_walks(WT_SESSION_IMPL *);
static int __evict_has_work(WT_SESSION_IMPL *, uint32_t *);
-static int __evict_lru_cmp(const void *, const void *);
+static int WT_CDECL __evict_lru_cmp(const void *, const void *);
static int __evict_lru_pages(WT_SESSION_IMPL *, int);
static int __evict_lru_walk(WT_SESSION_IMPL *, uint32_t);
static int __evict_pass(WT_SESSION_IMPL *);
static int __evict_walk(WT_SESSION_IMPL *, uint32_t);
static int __evict_walk_file(WT_SESSION_IMPL *, u_int *, uint32_t);
-static void *__evict_worker(void *);
+static WT_THREAD_RET __evict_worker(void *);
static int __evict_server_work(WT_SESSION_IMPL *);
/*
@@ -54,7 +54,7 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry)
* __evict_lru_cmp --
* Qsort function: sort the eviction array.
*/
-static int
+static int WT_CDECL
__evict_lru_cmp(const void *a, const void *b)
{
uint64_t a_lru, b_lru;
@@ -94,7 +94,7 @@ __wt_evict_list_clear_page(WT_SESSION_IMPL *session, WT_REF *ref)
WT_EVICT_ENTRY *evict;
uint32_t i, elem;
- WT_ASSERT(session,
+ WT_ASSERT(session,
__wt_ref_is_root(ref) || ref->state == WT_REF_LOCKED);
/* Fast path: if the page isn't on the queue, don't bother searching. */
@@ -150,7 +150,7 @@ __wt_evict_server_wake(WT_SESSION_IMPL *session)
* __evict_server --
* Thread to evict pages from the cache.
*/
-static void *
+static WT_THREAD_RET
__evict_server(void *arg)
{
WT_CACHE *cache;
@@ -232,7 +232,7 @@ __evict_server(void *arg)
if (0) {
err: WT_PANIC_MSG(session, ret, "cache eviction server error");
}
- return (NULL);
+ return (WT_THREAD_RET_VALUE);
}
/*
@@ -384,7 +384,7 @@ __wt_evict_destroy(WT_SESSION_IMPL *session)
* __evict_worker --
* Thread to help evict pages from the cache.
*/
-static void *
+static WT_THREAD_RET
__evict_worker(void *arg)
{
WT_CACHE *cache;
@@ -413,7 +413,7 @@ __evict_worker(void *arg)
if (0) {
err: WT_PANIC_MSG(session, ret, "cache eviction worker error");
}
- return (NULL);
+ return (WT_THREAD_RET_VALUE);
}
/*
@@ -1211,7 +1211,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
}
fast: /* If the page can't be evicted, give up. */
- if (!__wt_page_can_evict(session, page, 0))
+ if (!__wt_page_can_evict(session, page, 1))
continue;
/*
diff --git a/src/include/btree.h b/src/include/btree.h
index f00a7ac9a8e..cc571124207 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -124,6 +124,7 @@ struct __wt_btree {
u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */
uint64_t checkpoint_gen; /* Checkpoint generation */
+ uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */
uint64_t write_gen; /* Write generation */
WT_REF *evict_ref; /* Eviction thread's location */
diff --git a/src/include/btree.i b/src/include/btree.i
index e933ce46930..7d9a3095a0c 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -980,7 +980,7 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_PAGE *page, int check_splits)
* a transaction value, once that's globally visible, we know we can
* evict the created page.
*/
- if (WT_PAGE_IS_INTERNAL(page) &&
+ if (check_splits && WT_PAGE_IS_INTERNAL(page) &&
!__wt_txn_visible_all(session, mod->mod_split_txn))
return (0);
@@ -1023,7 +1023,7 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_PAGE *page, int check_splits)
/*
* If the page was recently split in-memory, don't force it out: we
- * hope eviction will find it first.
+ * hope an eviction thread will find it first.
*/
if (check_splits &&
!__wt_txn_visible_all(session, mod->inmem_split_txn))
diff --git a/src/include/extern.h b/src/include/extern.h
index 27ef978beae..b471d82ceb6 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -8,7 +8,7 @@ extern int __wt_async_flush(WT_SESSION_IMPL *session);
extern int __wt_async_new_op(WT_SESSION_IMPL *session, const char *uri, const char *config, const char *cfg[], WT_ASYNC_CALLBACK *cb, WT_ASYNC_OP_IMPL **opp);
extern int __wt_async_op_enqueue(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op);
extern int __wt_async_op_init(WT_SESSION_IMPL *session);
-extern void *__wt_async_worker(void *arg);
+extern WT_THREAD_RET __wt_async_worker(void *arg);
extern int __wt_block_addr_to_buffer(WT_BLOCK *block, uint8_t **pp, wt_off_t offset, uint32_t size, uint32_t cksum);
extern int __wt_block_buffer_to_addr(WT_BLOCK *block, const uint8_t *p, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump);
extern int __wt_block_addr_valid(WT_SESSION_IMPL *session, WT_BLOCK *block, const uint8_t *addr, size_t addr_size, int live);
@@ -223,12 +223,12 @@ extern int __wt_cache_destroy(WT_SESSION_IMPL *session);
extern int __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg);
extern int __wt_conn_cache_pool_open(WT_SESSION_IMPL *session);
extern int __wt_conn_cache_pool_destroy(WT_SESSION_IMPL *session);
-extern void *__wt_cache_pool_server(void *arg);
+extern WT_THREAD_RET __wt_cache_pool_server(void *arg);
extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session);
extern int __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize);
extern int __wt_conn_dhandle_find(WT_SESSION_IMPL *session, const char *name, const char *ckpt, uint32_t flags);
-extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int force);
+extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int final, int force);
extern int __wt_conn_btree_get(WT_SESSION_IMPL *session, const char *name, const char *ckpt, const char *cfg[], uint32_t flags);
extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, int apply_checkpoints, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
extern int __wt_conn_btree_apply_single_ckpt(WT_SESSION_IMPL *session, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
@@ -428,7 +428,7 @@ extern int __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key);
extern int __wt_metadata_search( WT_SESSION_IMPL *session, const char *key, char **valuep);
extern void __wt_meta_track_discard(WT_SESSION_IMPL *session);
extern int __wt_meta_track_on(WT_SESSION_IMPL *session);
-extern int __wt_meta_track_off(WT_SESSION_IMPL *session, int unroll);
+extern int __wt_meta_track_off(WT_SESSION_IMPL *session, int need_sync, int unroll);
extern int __wt_meta_track_sub_on(WT_SESSION_IMPL *session);
extern int __wt_meta_track_sub_off(WT_SESSION_IMPL *session);
extern int __wt_meta_track_checkpoint(WT_SESSION_IMPL *session);
@@ -499,7 +499,7 @@ extern int __wt_fprintf(WT_SESSION_IMPL *session, FILE *fp, const char *fmt, ...
extern int __wt_fflush(WT_SESSION_IMPL *session, FILE *fp);
extern int __wt_fclose(WT_SESSION_IMPL *session, FILE **fpp, WT_FHANDLE_MODE mode_flag);
extern uint64_t __wt_strtouq(const char *nptr, char **endptr, int base);
-extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, void *(*func)(void *), void *arg);
+extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg);
extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid);
extern void __wt_thread_id(char *buf, size_t buflen);
extern int __wt_seconds(WT_SESSION_IMPL *session, time_t *timep);
@@ -657,7 +657,7 @@ extern void __wt_stat_refresh_dsrc_stats(void *stats_arg);
extern void __wt_stat_aggregate_dsrc_stats(const void *child, const void *parent);
extern void __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats);
extern void __wt_stat_refresh_connection_stats(void *stats_arg);
-extern int __wt_txnid_cmp(const void *v1, const void *v2);
+extern int WT_CDECL __wt_txnid_cmp(const void *v1, const void *v2);
extern void __wt_txn_release_snapshot(WT_SESSION_IMPL *session);
extern void __wt_txn_update_oldest(WT_SESSION_IMPL *session);
extern void __wt_txn_refresh(WT_SESSION_IMPL *session, int get_snapshot);
@@ -675,7 +675,7 @@ extern int __wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]);
-extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, int force);
+extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force);
extern uint64_t __wt_ext_transaction_id(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session);
extern int __wt_ext_transaction_isolation_level( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session);
extern int __wt_ext_transaction_notify( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_TXN_NOTIFY *notify);
diff --git a/src/include/flags.h b/src/include/flags.h
index 30b2ab1c0e3..99c77c94f49 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -65,6 +65,7 @@
#define WT_TXN_LOG_CKPT_PREPARE 0x00000002
#define WT_TXN_LOG_CKPT_START 0x00000004
#define WT_TXN_LOG_CKPT_STOP 0x00000008
+#define WT_TXN_LOG_CKPT_SYNC 0x00000010
#define WT_VERB_API 0x00000001
#define WT_VERB_BLOCK 0x00000002
#define WT_VERB_CHECKPOINT 0x00000004
diff --git a/src/include/os_windows.h b/src/include/os_windows.h
index a9c1cf5f65a..de97143335f 100644
--- a/src/include/os_windows.h
+++ b/src/include/os_windows.h
@@ -14,6 +14,18 @@ typedef CONDITION_VARIABLE wt_cond_t;
typedef CRITICAL_SECTION wt_mutex_t;
typedef HANDLE wt_thread_t;
+/*
+ * Thread callbacks need to match the return signature of _beginthreadex.
+ */
+#define WT_THREAD_CALLBACK(x) unsigned (__stdcall x)
+#define WT_THREAD_RET unsigned __stdcall
+#define WT_THREAD_RET_VALUE 0
+
+/*
+ * WT declaration for calling convention type
+ */
+#define WT_CDECL __cdecl
+
#if _MSC_VER < 1900
/* Timespec is a POSIX structure not defined in Windows */
struct timespec {
@@ -29,7 +41,7 @@ struct timespec {
*/
typedef uint32_t u_int;
typedef unsigned char u_char;
-typedef unsigned long u_long;
+typedef uint64_t u_long;
/* <= VS 2013 is not C99 compat */
#if _MSC_VER < 1900
@@ -63,3 +75,6 @@ _Check_return_opt_ int __cdecl _wt_vsnprintf(
/* Provide a custom version of localtime_r */
struct tm *localtime_r(const time_t* timer, struct tm* result);
+
+/* Windows does not provide fsync */
+#define fsync _commit
diff --git a/src/include/packing.i b/src/include/packing.i
index 9caa58ed2e1..b97b3a322ce 100644
--- a/src/include/packing.i
+++ b/src/include/packing.i
@@ -376,6 +376,11 @@ __pack_write(
pad = pv->size - s;
if (pv->type == 'U') {
oldp = *pp;
+ /*
+ * Check that there is at least one byte available: the
+ * low-level routines treat zero length as unchecked.
+ */
+ WT_SIZE_CHECK(1, maxlen);
WT_RET(__wt_vpack_uint(pp, maxlen, s + pad));
maxlen -= (size_t)(*pp - oldp);
}
@@ -404,6 +409,11 @@ __pack_write(
case 'i':
case 'l':
case 'q':
+ /*
+ * Check that there is at least one byte available: the
+ * low-level routines treat zero length as unchecked.
+ */
+ WT_SIZE_CHECK(1, maxlen);
WT_RET(__wt_vpack_int(pp, maxlen, pv->u.i));
break;
case 'H':
@@ -411,6 +421,11 @@ __pack_write(
case 'L':
case 'Q':
case 'r':
+ /*
+ * Check that there is at least one byte available: the
+ * low-level routines treat zero length as unchecked.
+ */
+ WT_SIZE_CHECK(1, maxlen);
WT_RET(__wt_vpack_uint(pp, maxlen, pv->u.u));
break;
case 'R':
@@ -453,6 +468,11 @@ __unpack_read(WT_SESSION_IMPL *session,
*pp += s;
break;
case 'U':
+ /*
+ * Check that there is at least one byte available: the
+ * low-level routines treat zero length as unchecked.
+ */
+ WT_SIZE_CHECK(1, maxlen);
WT_RET(__wt_vunpack_uint(pp, maxlen, &pv->u.u));
/* FALLTHROUGH */
case 'u':
@@ -481,6 +501,11 @@ __unpack_read(WT_SESSION_IMPL *session,
case 'i':
case 'l':
case 'q':
+ /*
+ * Check that there is at least one byte available: the
+ * low-level routines treat zero length as unchecked.
+ */
+ WT_SIZE_CHECK(1, maxlen);
WT_RET(__wt_vunpack_int(pp, maxlen, &pv->u.i));
break;
case 'H':
@@ -488,6 +513,11 @@ __unpack_read(WT_SESSION_IMPL *session,
case 'L':
case 'Q':
case 'r':
+ /*
+ * Check that there is at least one byte available: the
+ * low-level routines treat zero length as unchecked.
+ */
+ WT_SIZE_CHECK(1, maxlen);
WT_RET(__wt_vunpack_uint(pp, maxlen, &pv->u.u));
break;
case 'R':
diff --git a/src/include/posix.h b/src/include/posix.h
index 14249e3ed37..1aa629c98e7 100644
--- a/src/include/posix.h
+++ b/src/include/posix.h
@@ -26,3 +26,15 @@
typedef pthread_cond_t wt_cond_t;
typedef pthread_mutex_t wt_mutex_t;
typedef pthread_t wt_thread_t;
+
+/*
+ * Thread callbacks need to match the platform specific callback types
+ */
+#define WT_THREAD_CALLBACK(x) void* (x)
+#define WT_THREAD_RET void*
+#define WT_THREAD_RET_VALUE NULL
+
+/*
+ * WT declaration for calling convention type
+ */
+#define WT_CDECL
diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c
index 75d3e8ef6e8..12b24984fcb 100644
--- a/src/lsm/lsm_manager.c
+++ b/src/lsm/lsm_manager.c
@@ -11,7 +11,7 @@
static int __lsm_manager_aggressive_update(WT_SESSION_IMPL *, WT_LSM_TREE *);
static int __lsm_manager_run_server(WT_SESSION_IMPL *);
-static void * __lsm_worker_manager(void *);
+static WT_THREAD_RET __lsm_worker_manager(void *);
/*
* __wt_lsm_manager_config --
@@ -500,7 +500,7 @@ err: if (dhandle_locked) {
* A thread that manages all open LSM trees, and the shared LSM worker
* threads.
*/
-static void *
+static WT_THREAD_RET
__lsm_worker_manager(void *arg)
{
WT_DECL_RET;
@@ -518,7 +518,7 @@ __lsm_worker_manager(void *arg)
err: WT_PANIC_MSG(session, ret, "LSM worker manager thread error");
}
F_CLR(S2C(session), WT_CONN_SERVER_LSM);
- return (NULL);
+ return (WT_THREAD_RET_VALUE);
}
/*
diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c
index a376a81f4eb..d1272df763d 100644
--- a/src/lsm/lsm_worker.c
+++ b/src/lsm/lsm_worker.c
@@ -10,7 +10,7 @@
static int __lsm_worker_general_op(
WT_SESSION_IMPL *, WT_LSM_WORKER_ARGS *, int *);
-static void * __lsm_worker(void *);
+static WT_THREAD_RET __lsm_worker(void *);
/*
* __wt_lsm_worker_start --
@@ -82,7 +82,7 @@ err: __wt_lsm_manager_free_work_unit(session, entry);
* __lsm_worker --
* A thread that executes work units for all open LSM trees.
*/
-static void *
+static WT_THREAD_RET
__lsm_worker(void *arg)
{
WT_CONNECTION_IMPL *conn;
@@ -166,5 +166,5 @@ err: __wt_lsm_manager_free_work_unit(session, entry);
WT_PANIC_MSG(session, ret,
"Error in LSM worker thread %d", cookie->id);
}
- return (NULL);
+ return (WT_THREAD_RET_VALUE);
}
diff --git a/src/meta/meta_ckpt.c b/src/meta/meta_ckpt.c
index 0a8557c7a33..70c9bf8dfcd 100644
--- a/src/meta/meta_ckpt.c
+++ b/src/meta/meta_ckpt.c
@@ -230,7 +230,7 @@ err: __wt_free(session, namep);
* __ckpt_compare_order --
* Qsort comparison routine for the checkpoint list.
*/
-static int
+static int WT_CDECL
__ckpt_compare_order(const void *a, const void *b)
{
WT_CKPT *ackpt, *bckpt;
diff --git a/src/meta/meta_track.c b/src/meta/meta_track.c
index 85ca1732586..3bc6a1f9d60 100644
--- a/src/meta/meta_track.c
+++ b/src/meta/meta_track.c
@@ -188,7 +188,7 @@ free: trk->op = WT_ST_EMPTY;
* Turn off metadata operation tracking, unrolling on error.
*/
int
-__wt_meta_track_off(WT_SESSION_IMPL *session, int unroll)
+__wt_meta_track_off(WT_SESSION_IMPL *session, int need_sync, int unroll)
{
WT_DECL_RET;
WT_META_TRACK *trk, *trk_orig;
@@ -218,13 +218,28 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, int unroll)
WT_TRET(__meta_track_apply(session, trk, unroll));
/*
- * If the operation succeeded and we aren't relying on the log for
- * durability, checkpoint the metadata.
+ * Unroll operations don't need to flush the metadata.
+ *
+ * Also, if we don't have the metadata handle (e.g, we're in the
+ * process of creating the metadata), we can't sync it.
*/
- if (!unroll && ret == 0 && session->meta_dhandle != NULL &&
- !FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED))
+ if (unroll || ret != 0 || !need_sync || session->meta_dhandle == NULL)
+ return (ret);
+
+ /* If we're logging, make sure the metadata update was flushed. */
+ if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED)) {
+ if (!FLD_ISSET(S2C(session)->txn_logsync,
+ WT_LOG_DSYNC | WT_LOG_FSYNC))
+ WT_WITH_DHANDLE(session, session->meta_dhandle,
+ ret = __wt_txn_checkpoint_log(session,
+ 0, WT_TXN_LOG_CKPT_SYNC, NULL));
+ } else {
WT_WITH_DHANDLE(session, session->meta_dhandle,
ret = __wt_checkpoint(session, NULL));
+ WT_RET(ret);
+ WT_WITH_DHANDLE(session, session->meta_dhandle,
+ ret = __wt_checkpoint_sync(session, NULL));
+ }
return (ret);
}
diff --git a/src/os_posix/os_thread.c b/src/os_posix/os_thread.c
index 392f997f1ac..c70a04c8df7 100644
--- a/src/os_posix/os_thread.c
+++ b/src/os_posix/os_thread.c
@@ -14,7 +14,7 @@
*/
int
__wt_thread_create(WT_SESSION_IMPL *session,
- wt_thread_t *tidret, void *(*func)(void *), void *arg)
+ wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg)
{
WT_DECL_RET;
diff --git a/src/os_win/os_fallocate.c b/src/os_win/os_fallocate.c
index 9d0a86882c6..f01ef0e101a 100644
--- a/src/os_win/os_fallocate.c
+++ b/src/os_win/os_fallocate.c
@@ -15,6 +15,8 @@
void
__wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh)
{
+ WT_UNUSED(session);
+
fh->fallocate_available = WT_FALLOCATE_AVAILABLE;
/*
diff --git a/src/os_win/os_mtx_cond.c b/src/os_win/os_mtx_cond.c
index a09b744720f..51f6d6533c8 100644
--- a/src/os_win/os_mtx_cond.c
+++ b/src/os_win/os_mtx_cond.c
@@ -46,7 +46,7 @@ __wt_cond_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs)
DWORD milliseconds;
WT_DECL_RET;
uint64_t milliseconds64;
- int lasterror, locked;
+ int locked;
locked = 0;
@@ -76,7 +76,7 @@ __wt_cond_wait(WT_SESSION_IMPL *session, WT_CONDVAR *cond, uint64_t usecs)
*/
if (milliseconds64 >= INFINITE)
milliseconds64 = INFINITE - 1;
- milliseconds = milliseconds64;
+ milliseconds = (DWORD)milliseconds64;
/*
* 0 would mean the CV sleep becomes a TryCV which we do not
diff --git a/src/os_win/os_once.c b/src/os_win/os_once.c
index 179c1bc97f7..bec8c08777c 100644
--- a/src/os_win/os_once.c
+++ b/src/os_win/os_once.c
@@ -19,6 +19,8 @@ BOOL CALLBACK _wt_init_once_callback(
)
{
void(*init_routine)(void) = Parameter;
+ WT_UNUSED(InitOnce);
+ WT_UNUSED(Context);
init_routine();
diff --git a/src/os_win/os_thread.c b/src/os_win/os_thread.c
index 05f7dc15914..b5f13aea4e9 100644
--- a/src/os_win/os_thread.c
+++ b/src/os_win/os_thread.c
@@ -14,14 +14,14 @@
*/
int
__wt_thread_create(WT_SESSION_IMPL *session,
- wt_thread_t *tidret, void *(*func)(void *), void *arg)
+ wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg)
{
/* Spawn a new thread of control. */
- *tidret = CreateThread(NULL, 0, func, arg, 0, NULL);
- if (*tidret != NULL)
+ *tidret = (HANDLE)_beginthreadex(NULL, 0, func, arg, 0, NULL);
+ if (*tidret != 0)
return (0);
- WT_RET_MSG(session, __wt_errno(), "CreateThread");
+ WT_RET_MSG(session, errno, "_beginthreadex");
}
/*
diff --git a/src/os_win/os_time.c b/src/os_win/os_time.c
index 30fde045c54..c51db118ce1 100644
--- a/src/os_win/os_time.c
+++ b/src/os_win/os_time.c
@@ -32,8 +32,10 @@ int
__wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
{
uint64_t ns100;
-
FILETIME time;
+
+ WT_UNUSED(session);
+
GetSystemTimeAsFileTime(&time);
ns100 = (((int64_t)time.dwHighDateTime << 32) + time.dwLowDateTime)
diff --git a/src/schema/schema_create.c b/src/schema/schema_create.c
index 720b6fc6412..80e443d8a21 100644
--- a/src/schema/schema_create.c
+++ b/src/schema/schema_create.c
@@ -637,7 +637,7 @@ __wt_schema_create(
ret = __wt_bad_object_type(session, uri);
session->dhandle = NULL;
- WT_TRET(__wt_meta_track_off(session, ret != 0));
+ WT_TRET(__wt_meta_track_off(session, 1, ret != 0));
return (ret);
}
diff --git a/src/schema/schema_drop.c b/src/schema/schema_drop.c
index 03dece47722..03097128ec2 100644
--- a/src/schema/schema_drop.c
+++ b/src/schema/schema_drop.c
@@ -192,7 +192,7 @@ __wt_schema_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
/* Bump the schema generation so that stale data is ignored. */
++S2C(session)->schema_gen;
- WT_TRET(__wt_meta_track_off(session, ret != 0));
+ WT_TRET(__wt_meta_track_off(session, 1, ret != 0));
return (ret);
}
diff --git a/src/schema/schema_rename.c b/src/schema/schema_rename.c
index 38124754cd5..51281eccec5 100644
--- a/src/schema/schema_rename.c
+++ b/src/schema/schema_rename.c
@@ -274,7 +274,7 @@ __wt_schema_rename(WT_SESSION_IMPL *session,
/* Bump the schema generation so that stale data is ignored. */
++S2C(session)->schema_gen;
- WT_TRET(__wt_meta_track_off(session, ret != 0));
+ WT_TRET(__wt_meta_track_off(session, 1, ret != 0));
/* If we didn't find a metadata entry, map that error to ENOENT. */
return (ret == WT_NOTFOUND ? ENOENT : ret);
diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c
index 833d098efeb..0825f783ca3 100644
--- a/src/session/session_dhandle.c
+++ b/src/session/session_dhandle.c
@@ -168,7 +168,7 @@ __wt_session_release_btree(WT_SESSION_IMPL *session)
WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE));
F_CLR(dhandle, WT_DHANDLE_DISCARD);
- WT_TRET(__wt_conn_btree_sync_and_close(session, 0));
+ WT_TRET(__wt_conn_btree_sync_and_close(session, 0, 0));
}
if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE))
diff --git a/src/support/huffman.c b/src/support/huffman.c
index 12f98184b5c..48361551ba1 100644
--- a/src/support/huffman.c
+++ b/src/support/huffman.c
@@ -96,8 +96,8 @@ typedef struct __indexed_byte {
uint32_t frequency;
} INDEXED_SYMBOL;
-static int indexed_freq_compare(const void *, const void *);
-static int indexed_symbol_compare(const void *, const void *);
+static int WT_CDECL indexed_freq_compare(const void *, const void *);
+static int WT_CDECL indexed_symbol_compare(const void *, const void *);
static void make_table(
WT_SESSION_IMPL *, uint8_t *, uint16_t, WT_HUFFMAN_CODE *, u_int);
static void node_queue_close(WT_SESSION_IMPL *, NODE_QUEUE *);
@@ -117,7 +117,7 @@ static void set_codes(WT_FREQTREE_NODE *, WT_HUFFMAN_CODE *, uint16_t, uint8_t);
* indexed_symbol_compare --
* Qsort comparator to order the table by symbol, lowest to highest.
*/
-static int
+static int WT_CDECL
indexed_symbol_compare(const void *a, const void *b)
{
return (((INDEXED_SYMBOL *)a)->symbol >
@@ -131,7 +131,7 @@ indexed_symbol_compare(const void *a, const void *b)
* Qsort comparator to order the table by frequency (the most frequent
* symbols will be at the end of the array).
*/
-static int
+static int WT_CDECL
indexed_freq_compare(const void *a, const void *b)
{
return (((INDEXED_SYMBOL *)a)->frequency >
diff --git a/src/txn/txn.c b/src/txn/txn.c
index 6c06a0af820..a1bec569ce7 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -12,7 +12,7 @@
* __wt_txnid_cmp --
* Compare transaction IDs for sorting / searching.
*/
-int
+int WT_CDECL
__wt_txnid_cmp(const void *v1, const void *v2)
{
uint64_t id1, id2;
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index fa77d2b5fa5..7c1532390f9 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -534,7 +534,7 @@ err: /*
*/
session->isolation = txn->isolation = TXN_ISO_READ_UNCOMMITTED;
if (tracking)
- WT_TRET(__wt_meta_track_off(session, ret != 0));
+ WT_TRET(__wt_meta_track_off(session, 0, ret != 0));
if (F_ISSET(txn, TXN_RUNNING)) {
/*
@@ -1090,30 +1090,50 @@ __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[])
* Checkpoint a single file as part of closing the handle.
*/
int
-__wt_checkpoint_close(WT_SESSION_IMPL *session, int force)
+__wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force)
{
+ WT_BTREE *btree;
WT_DECL_RET;
+ int bulk, need_tracking;
+
+ btree = S2BT(session);
+ bulk = F_ISSET(btree, WT_BTREE_BULK) ? 1 : 0;
/* Handle forced discard (when dropping a file). */
if (force)
return (__wt_cache_op(session, NULL, WT_SYNC_DISCARD_FORCE));
- /* If closing an unmodified file, try to evict its pages. */
- if (!S2BT(session)->modified) {
- ret = __wt_cache_op(session, NULL, WT_SYNC_DISCARD);
- if (ret != EBUSY)
- return (ret);
+ /*
+ * If closing an unmodified file, check that no update is required
+ * for active readers.
+ */
+ if (!btree->modified && !bulk) {
+ __wt_txn_update_oldest(session);
+ return (__wt_txn_visible_all(session, btree->rec_max_txn) ?
+ __wt_cache_op(session, NULL, WT_SYNC_DISCARD) : EBUSY);
}
/*
- * If closing a modified file, or closing an unmodified file was blocked
- * for any reason, checkpoint the file and optionally flush the writes
- * (the checkpoint call will discard the blocks, there's no additional
- * step needed).
+ * If closing a modified file, checkpoint the file and optionally flush
+ * the writes (the checkpoint call will discard the blocks, there's no
+ * additional step needed).
+ *
+ * We should already have the schema lock unless we're finishing a bulk
+ * load -- the only other paths to closing files (sweep and LSM) have
+ * already checked for read-only trees.
*/
- WT_RET(__checkpoint_worker(session, NULL, 0));
- if (F_ISSET(S2C(session), WT_CONN_CKPT_SYNC))
- WT_RET(__wt_checkpoint_sync(session, NULL));
+ if (!final)
+ WT_ASSERT(session,
+ bulk || F_ISSET(session, WT_SESSION_SCHEMA_LOCKED));
- return (0);
+ need_tracking = !bulk && !final && !WT_META_TRACKING(session);
+ if (need_tracking)
+ WT_RET(__wt_meta_track_on(session));
+
+ WT_TRET(__checkpoint_worker(session, NULL, 0));
+
+ if (need_tracking)
+ WT_RET(__wt_meta_track_off(session, 1, ret != 0));
+
+ return (ret);
}
diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c
index afe98d95c6f..d3e010cf401 100644
--- a/src/txn/txn_log.c
+++ b/src/txn/txn_log.c
@@ -221,11 +221,12 @@ __txn_log_file_sync(WT_SESSION_IMPL *session, uint32_t flags, WT_LSN *lsnp)
WT_DECL_RET;
size_t header_size;
uint32_t rectype = WT_LOGREC_FILE_SYNC;
- int start;
+ int start, need_sync;
const char *fmt = WT_UNCHECKED_STRING(III);
btree = S2BT(session);
start = LF_ISSET(WT_TXN_LOG_CKPT_START);
+ need_sync = LF_ISSET(WT_TXN_LOG_CKPT_SYNC);
WT_RET(__wt_struct_size(
session, &header_size, fmt, rectype, btree->id, start));
@@ -236,7 +237,8 @@ __txn_log_file_sync(WT_SESSION_IMPL *session, uint32_t flags, WT_LSN *lsnp)
fmt, rectype, btree->id, start));
logrec->size += (uint32_t)header_size;
- WT_ERR(__wt_log_write(session, logrec, lsnp, 0));
+ WT_ERR(__wt_log_write(
+ session, logrec, lsnp, need_sync ? WT_LOG_FSYNC : 0));
err: __wt_logrec_free(session, &logrec);
return (ret);
}
@@ -360,6 +362,8 @@ __wt_txn_checkpoint_log(
__wt_scr_free(session, &txn->ckpt_snapshot);
txn->full_ckpt = 0;
break;
+
+ WT_ILLEGAL_VALUE_ERR(session);
}
err: __wt_logrec_free(session, &logrec);
diff --git a/test/mciproject.yml b/test/mciproject.yml
index d7f66212b2a..64007bdbd92 100644
--- a/test/mciproject.yml
+++ b/test/mciproject.yml
@@ -57,6 +57,21 @@ tasks:
${test_env_vars|} python ./test/suite/run.py -v 2
+ - name: compile-windows-alt
+ commands:
+ - func: "fetch source"
+ - command: git.apply_patch
+ params:
+ directory: wiredtiger
+ - command: shell.exec
+ params:
+ working_dir: "wiredtiger"
+ script: |
+ set -o errexit
+ set -o verbose
+
+ scons.bat ${smp_command|} "CFLAGS=/Gv /wd4090 /wd4996 /we4047 /we4024 /TC /we4100" wiredtiger.dll libwiredtiger.lib
+
buildvariants:
- name: ubuntu1404
display_name: Ubuntu 14.04
@@ -89,6 +104,7 @@ buildvariants:
smp_command: -j$(grep -c ^processor /proc/cpuinfo)
tasks:
- name: compile-windows
+ - name: compile-windows-alt
- name: osx-108
display_name: OS X 10.8
diff --git a/test/suite/test_durability01.py b/test/suite/test_durability01.py
new file mode 100644
index 00000000000..53529c7e89f
--- /dev/null
+++ b/test/suite/test_durability01.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2015 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_durability01.py
+# Durability: make sure the metadata is stable after exclusive operations
+# cause files to be closed.
+#
+
+import fnmatch, os, shutil, time
+from suite_subprocess import suite_subprocess
+from wiredtiger import wiredtiger_open
+from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
+import wttest
+
+class test_durability01(wttest.WiredTigerTestCase, suite_subprocess):
+ uri = 'table:test_durability01'
+ create_params = 'key_format=i,value_format=i'
+
+ def check_crash_restart(self, olddir, newdir):
+ ''' Simulate a crash from olddir and restart in newdir. '''
+ # with the connection still open, copy files to new directory
+ shutil.rmtree(newdir, ignore_errors=True)
+ os.mkdir(newdir)
+ for fname in os.listdir(olddir):
+ fullname = os.path.join(olddir, fname)
+ # Skip lock file on Windows since it is locked
+ if os.path.isfile(fullname) and "WiredTiger.lock" not in fullname:
+ shutil.copy(fullname, newdir)
+
+ # Open the new directory
+ conn = self.setUpConnectionOpen(newdir)
+ session = self.setUpSessionOpen(conn)
+ session.verify(self.uri)
+ conn.close()
+
+ def test_durability(self):
+ '''Check for missing metadata checkpoints'''
+
+ # Here's the strategy:
+ # - update the table
+ # - verify, which causes the table to be flushed
+ # - copy the database directory (live, simulating a crash)
+ # - verify in the copy
+ # - repeat
+ #
+ # If the metadata isn't flushed, eventually the metadata we copy will
+ # be sufficiently out-of-sync with the data file that it won't verify.
+ self.session.create(self.uri, self.create_params)
+ for i in range(100):
+ c = self.session.open_cursor(self.uri)
+ c.set_key(i)
+ c.set_value(i)
+ c.insert()
+ c.close()
+ if i % 5 == 0:
+ self.session.checkpoint()
+ else:
+ self.session.verify(self.uri)
+ self.check_crash_restart(".", "RESTART")
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/test/suite/test_sweep01.py b/test/suite/test_sweep01.py
index a1a89c58838..8b5e0c74660 100644
--- a/test/suite/test_sweep01.py
+++ b/test/suite/test_sweep01.py
@@ -42,10 +42,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
uri = 'table:' + tablebase
numfiles = 50
numkv = 1000
- ckpt_list = [
- ('off', dict(ckpt=0)),
- ('on', dict(ckpt=10)),
- ]
+ ckpt = 5
types = [
('row', dict(tabletype='row',
@@ -56,8 +53,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
create_params = 'key_format=r,value_format=8t')),
]
- scenarios = number_scenarios(
- prune_scenarios(multiply_scenarios('.', types, ckpt_list), 1, 100))
+ scenarios = types
# Overrides WiredTigerTestCase
def setUpConnectionOpen(self, dir):
@@ -107,12 +103,16 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
ref1 = stat_cursor[stat.conn.dh_conn_ref][2]
nfile1 = stat_cursor[stat.conn.file_open][2]
stat_cursor.close()
- # Inactive time on a handle must be a minute or more.
- # We've configured the sweep server to run every 2 seconds and idle
- # time to be 6 seconds. It should take at most 8 seconds for a handle
- # to be closed. Sleep for 12 seconds to be safe.
+
+ #
+ # We've configured checkpoints to run every 5 seconds, sweep server to
+ # run every 2 seconds and idle time to be 6 seconds. It should take
+ # about 8 seconds for a handle to be closed. Sleep for 12 seconds to be
+ # safe.
+ #
uri = '%s.test' % self.uri
self.session.create(uri, self.create_params)
+
#
# Keep inserting data to keep at least one handle active and give
# checkpoint something to do. Make sure checkpoint doesn't adjust
@@ -120,7 +120,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
#
c = self.session.open_cursor(uri, None)
k = 0
- sleep=0
+ sleep = 0
while sleep < 12:
k = k+1
c.set_key(k)
diff --git a/test/windows/windows_shim.c b/test/windows/windows_shim.c
index 646ebf0c441..8b0f05bfe2d 100644
--- a/test/windows/windows_shim.c
+++ b/test/windows/windows_shim.c
@@ -59,6 +59,7 @@ usleep(useconds_t useconds)
int
pthread_rwlock_destroy(pthread_rwlock_t *lock)
{
+ lock = lock;
return (0);
}
@@ -66,6 +67,7 @@ int
pthread_rwlock_init(pthread_rwlock_t *rwlock,
const pthread_rwlockattr_t *ignored)
{
+ ignored = ignored;
InitializeSRWLock(&rwlock->rwlock);
rwlock->exclusive_locked = 0;
@@ -99,13 +101,15 @@ pthread_rwlock_wrlock(pthread_rwlock_t *rwlock)
rwlock->exclusive_locked = GetCurrentThreadId();
return (0);
-
}
+#pragma warning( once : 4024 )
+#pragma warning( once : 4047 )
int
pthread_create(pthread_t *tidret, const pthread_attr_t *ignored,
void *(*func)(void *), void * arg)
{
+ ignored = ignored;
*tidret = CreateThread(NULL, 0, func, arg, 0, NULL);
if (*tidret != NULL)
@@ -117,6 +121,7 @@ pthread_create(pthread_t *tidret, const pthread_attr_t *ignored,
int
pthread_join(pthread_t thread, void **ignored)
{
+ ignored = ignored;
WaitForSingleObject(thread, INFINITE);
return (0);
}