summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexander.gorrod@mongodb.com>2015-03-26 15:38:45 +1100
committerAlex Gorrod <alexander.gorrod@mongodb.com>2015-03-26 15:38:45 +1100
commit87e3f4466bbf2597f1beecdb9ce87745381de232 (patch)
tree8d0393e86a1132a8bd7085827178e2c3d1d04299
parentc23ca32b9e6bbdede8c4ef4f56d4de6acdcd69a3 (diff)
parent8172b0fdf726941bc935fd33f0b65c62b1a6c887 (diff)
downloadmongo-87e3f4466bbf2597f1beecdb9ce87745381de232.tar.gz
Merge pull request #1812 from wiredtiger/checkpoint-closing-files
Checkpoint closing files
-rw-r--r--dist/flags.py1
-rw-r--r--src/btree/bt_sync.c11
-rw-r--r--src/conn/conn_dhandle.c12
-rw-r--r--src/conn/conn_sweep.c27
-rw-r--r--src/evict/evict_file.c16
-rw-r--r--src/evict/evict_lru.c2
-rw-r--r--src/include/btree.h1
-rw-r--r--src/include/btree.i4
-rw-r--r--src/include/extern.h6
-rw-r--r--src/include/flags.h1
-rw-r--r--src/meta/meta_track.c25
-rw-r--r--src/schema/schema_create.c2
-rw-r--r--src/schema/schema_drop.c2
-rw-r--r--src/schema/schema_rename.c2
-rw-r--r--src/session/session_dhandle.c2
-rw-r--r--src/txn/txn_ckpt.c50
-rw-r--r--src/txn/txn_log.c7
-rw-r--r--test/suite/test_durability01.py87
-rw-r--r--test/suite/test_sweep01.py8
19 files changed, 200 insertions, 66 deletions
diff --git a/dist/flags.py b/dist/flags.py
index f1eb6b24968..34f3ab3e02f 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -54,6 +54,7 @@ flags = {
'TXN_LOG_CKPT_PREPARE',
'TXN_LOG_CKPT_START',
'TXN_LOG_CKPT_STOP',
+ 'TXN_LOG_CKPT_SYNC',
],
'verbose' : [
'VERB_API',
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index 1bbaee4bf1b..dae2dd8d480 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -109,6 +109,17 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
/* Write all dirty in-cache pages. */
flags |= WT_READ_NO_EVICT;
for (walk = NULL;;) {
+ /*
+ * If we have a page, and it was ever modified, track
+ * the highest transaction ID in the tree. We do this
+ * here because we want the value after reconciling
+ * dirty pages.
+ */
+ if (walk != NULL && walk->page != NULL &&
+ (mod = walk->page->modify) != NULL &&
+ TXNID_LT(btree->rec_max_txn, mod->rec_max_txn))
+ btree->rec_max_txn = mod->rec_max_txn;
+
WT_ERR(__wt_tree_walk(session, &walk, NULL, flags));
if (walk == NULL)
break;
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c
index 9f308a2569c..63180d64019 100644
--- a/src/conn/conn_dhandle.c
+++ b/src/conn/conn_dhandle.c
@@ -234,7 +234,7 @@ err: WT_TRET(__wt_rwlock_destroy(session, &dhandle->rwlock));
* Sync and close the underlying btree handle.
*/
int
-__wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int force)
+__wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int final, int force)
{
WT_BTREE *btree;
WT_DATA_HANDLE *dhandle;
@@ -273,7 +273,7 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int force)
*/
if (!F_ISSET(btree,
WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY))
- WT_ERR(__wt_checkpoint_close(session, force));
+ WT_ERR(__wt_checkpoint_close(session, final, force));
if (dhandle->checkpoint == NULL)
--S2C(session)->open_btree_count;
@@ -391,7 +391,7 @@ __conn_btree_open(WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags)
* in the tree that can block the close.
*/
if (F_ISSET(dhandle, WT_DHANDLE_OPEN))
- WT_RET(__wt_conn_btree_sync_and_close(session, 0));
+ WT_RET(__wt_conn_btree_sync_and_close(session, 0, 0));
/* Discard any previous configuration, set up the new configuration. */
__conn_btree_config_clear(session);
@@ -423,7 +423,7 @@ __conn_btree_open(WT_SESSION_IMPL *session, const char *cfg[], uint32_t flags)
err: F_CLR(btree, WT_BTREE_SPECIAL_FLAGS);
/* If the open failed, close the handle. */
if (F_ISSET(dhandle, WT_DHANDLE_OPEN))
- WT_TRET(__wt_conn_btree_sync_and_close(session, 0));
+ WT_TRET(__wt_conn_btree_sync_and_close(session, 0, 0));
}
return (ret);
@@ -669,7 +669,7 @@ __wt_conn_dhandle_close_all(
if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
if ((ret = __wt_meta_track_sub_on(session)) == 0)
ret = __wt_conn_btree_sync_and_close(
- session, force);
+ session, 0, force);
/*
* If the close succeeded, drop any locks it acquired.
@@ -731,7 +731,7 @@ __wt_conn_dhandle_discard_single(WT_SESSION_IMPL *session, int final)
dhandle = session->dhandle;
if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
- tret = __wt_conn_btree_sync_and_close(session, 0);
+ tret = __wt_conn_btree_sync_and_close(session, final, 0);
if (final && tret != 0) {
__wt_err(session, tret,
"Final close of %s failed", dhandle->name);
diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c
index 2a862758e08..8c82803f608 100644
--- a/src/conn/conn_sweep.c
+++ b/src/conn/conn_sweep.c
@@ -51,6 +51,7 @@ __sweep_remove_handles(WT_SESSION_IMPL *session)
static int
__sweep(WT_SESSION_IMPL *session)
{
+ WT_BTREE *btree;
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
@@ -83,15 +84,13 @@ __sweep(WT_SESSION_IMPL *session)
/*
* We have a candidate for closing; if it's open, acquire an
- * exclusive lock on the handle and close it. We might be
- * blocking opens for a long time (over disk I/O), but the
- * handle was quiescent for awhile.
+ * exclusive lock on the handle and close it.
*
- * The close can fail if an update cannot be written (updates
- * in a no-longer-referenced file might not yet be globally
- * visible if sessions have disjoint sets of files open). If
- * the handle is busy, skip it, we'll retry the close the next
- * time, after the transaction state has progressed.
+ * The close would require I/O if an update cannot be written
+ * (updates in a no-longer-referenced file might not yet be
+ * globally visible if sessions have disjoint sets of files
+ * open). In that case, skip it: we'll retry the close the
+ * next time, after the transaction state has progressed.
*
* We don't set WT_DHANDLE_EXCLUSIVE deliberately, we want
* opens to block on us rather than returning an EBUSY error to
@@ -101,10 +100,16 @@ __sweep(WT_SESSION_IMPL *session)
__wt_try_writelock(session, dhandle->rwlock)) == EBUSY)
continue;
+ /* Only sweep clean trees where all updates are visible. */
+ btree = dhandle->handle;
+ if (btree->modified ||
+ !__wt_txn_visible_all(session, btree->rec_max_txn))
+ goto unlock;
+
/* If the handle is open, try to close it. */
if (F_ISSET(dhandle, WT_DHANDLE_OPEN)) {
- WT_WITH_DHANDLE(session, dhandle,
- ret = __wt_conn_btree_sync_and_close(session, 0));
+ WT_WITH_DHANDLE(session, dhandle, ret =
+ __wt_conn_btree_sync_and_close(session, 0, 0));
/* We closed the btree handle, bump the statistic. */
if (ret == 0)
@@ -115,7 +120,7 @@ __sweep(WT_SESSION_IMPL *session)
if (dhandle->session_inuse == 0 && dhandle->session_ref == 0)
++closed_handles;
- WT_TRET(__wt_writeunlock(session, dhandle->rwlock));
+unlock: WT_TRET(__wt_writeunlock(session, dhandle->rwlock));
WT_RET_BUSY_OK(ret);
}
diff --git a/src/evict/evict_file.c b/src/evict/evict_file.c
index f546a5adae1..864c116a380 100644
--- a/src/evict/evict_file.c
+++ b/src/evict/evict_file.c
@@ -72,23 +72,17 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop)
WT_READ_CACHE | WT_READ_NO_EVICT));
switch (syncop) {
- case WT_SYNC_DISCARD:
- /*
- * Check that the page is clean: if we see a dirty page
- * (including a dirty parent page after evicting a
- * child), give up. The higher level can try to
- * checkpoint, but during discard we aren't set up to
- * manage checkpoints.
- */
- if (__wt_page_is_modified(page))
- WT_ERR(EBUSY);
- /* FALLTHROUGH */
case WT_SYNC_CLOSE:
/*
* Evict the page.
*/
WT_ERR(__wt_evict(session, ref, 1));
break;
+ case WT_SYNC_DISCARD:
+ WT_ASSERT(session,
+ __wt_page_can_evict(session, page, 0));
+ __wt_evict_page_clean_update(session, ref);
+ break;
case WT_SYNC_DISCARD_FORCE:
/*
* Forced discard of the page, whether clean or dirty.
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 482bf3c9f59..62326015d2c 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -1211,7 +1211,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
}
fast: /* If the page can't be evicted, give up. */
- if (!__wt_page_can_evict(session, page, 0))
+ if (!__wt_page_can_evict(session, page, 1))
continue;
/*
diff --git a/src/include/btree.h b/src/include/btree.h
index f00a7ac9a8e..cc571124207 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -124,6 +124,7 @@ struct __wt_btree {
u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */
uint64_t checkpoint_gen; /* Checkpoint generation */
+ uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */
uint64_t write_gen; /* Write generation */
WT_REF *evict_ref; /* Eviction thread's location */
diff --git a/src/include/btree.i b/src/include/btree.i
index e933ce46930..7d9a3095a0c 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -980,7 +980,7 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_PAGE *page, int check_splits)
* a transaction value, once that's globally visible, we know we can
* evict the created page.
*/
- if (WT_PAGE_IS_INTERNAL(page) &&
+ if (check_splits && WT_PAGE_IS_INTERNAL(page) &&
!__wt_txn_visible_all(session, mod->mod_split_txn))
return (0);
@@ -1023,7 +1023,7 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_PAGE *page, int check_splits)
/*
* If the page was recently split in-memory, don't force it out: we
- * hope eviction will find it first.
+ * hope an eviction thread will find it first.
*/
if (check_splits &&
!__wt_txn_visible_all(session, mod->inmem_split_txn))
diff --git a/src/include/extern.h b/src/include/extern.h
index 12877c04051..676264b49eb 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -225,7 +225,7 @@ extern int __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *c
extern int __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session);
extern int __wt_checkpoint_signal(WT_SESSION_IMPL *session, wt_off_t logsize);
extern int __wt_conn_dhandle_find(WT_SESSION_IMPL *session, const char *name, const char *ckpt, uint32_t flags);
-extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int force);
+extern int __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, int final, int force);
extern int __wt_conn_btree_get(WT_SESSION_IMPL *session, const char *name, const char *ckpt, const char *cfg[], uint32_t flags);
extern int __wt_conn_btree_apply(WT_SESSION_IMPL *session, int apply_checkpoints, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
extern int __wt_conn_btree_apply_single_ckpt(WT_SESSION_IMPL *session, const char *uri, int (*func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[]);
@@ -425,7 +425,7 @@ extern int __wt_metadata_remove(WT_SESSION_IMPL *session, const char *key);
extern int __wt_metadata_search( WT_SESSION_IMPL *session, const char *key, char **valuep);
extern void __wt_meta_track_discard(WT_SESSION_IMPL *session);
extern int __wt_meta_track_on(WT_SESSION_IMPL *session);
-extern int __wt_meta_track_off(WT_SESSION_IMPL *session, int unroll);
+extern int __wt_meta_track_off(WT_SESSION_IMPL *session, int sync, int unroll);
extern int __wt_meta_track_sub_on(WT_SESSION_IMPL *session);
extern int __wt_meta_track_sub_off(WT_SESSION_IMPL *session);
extern int __wt_meta_track_checkpoint(WT_SESSION_IMPL *session);
@@ -672,7 +672,7 @@ extern int __wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]);
-extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, int force);
+extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force);
extern uint64_t __wt_ext_transaction_id(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session);
extern int __wt_ext_transaction_isolation_level( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session);
extern int __wt_ext_transaction_notify( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, WT_TXN_NOTIFY *notify);
diff --git a/src/include/flags.h b/src/include/flags.h
index 30b2ab1c0e3..99c77c94f49 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -65,6 +65,7 @@
#define WT_TXN_LOG_CKPT_PREPARE 0x00000002
#define WT_TXN_LOG_CKPT_START 0x00000004
#define WT_TXN_LOG_CKPT_STOP 0x00000008
+#define WT_TXN_LOG_CKPT_SYNC 0x00000010
#define WT_VERB_API 0x00000001
#define WT_VERB_BLOCK 0x00000002
#define WT_VERB_CHECKPOINT 0x00000004
diff --git a/src/meta/meta_track.c b/src/meta/meta_track.c
index 85ca1732586..42955e734e1 100644
--- a/src/meta/meta_track.c
+++ b/src/meta/meta_track.c
@@ -188,7 +188,7 @@ free: trk->op = WT_ST_EMPTY;
* Turn off metadata operation tracking, unrolling on error.
*/
int
-__wt_meta_track_off(WT_SESSION_IMPL *session, int unroll)
+__wt_meta_track_off(WT_SESSION_IMPL *session, int sync, int unroll)
{
WT_DECL_RET;
WT_META_TRACK *trk, *trk_orig;
@@ -218,13 +218,28 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, int unroll)
WT_TRET(__meta_track_apply(session, trk, unroll));
/*
- * If the operation succeeded and we aren't relying on the log for
- * durability, checkpoint the metadata.
+ * Unroll operations don't need to flush the metadata.
+ *
+ * Also, if we don't have the metadata handle (e.g, we're in the
+ * process of creating the metadata), we can't sync it.
*/
- if (!unroll && ret == 0 && session->meta_dhandle != NULL &&
- !FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED))
+ if (unroll || ret != 0 || !sync || session->meta_dhandle == NULL)
+ return (ret);
+
+ /* If we're logging, make sure the metadata update was flushed. */
+ if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED)) {
+ if (!FLD_ISSET(S2C(session)->txn_logsync,
+ WT_LOG_DSYNC | WT_LOG_FSYNC))
+ WT_WITH_DHANDLE(session, session->meta_dhandle,
+ ret = __wt_txn_checkpoint_log(session,
+ 0, WT_TXN_LOG_CKPT_SYNC, NULL));
+ } else {
WT_WITH_DHANDLE(session, session->meta_dhandle,
ret = __wt_checkpoint(session, NULL));
+ WT_RET(ret);
+ WT_WITH_DHANDLE(session, session->meta_dhandle,
+ ret = __wt_checkpoint_sync(session, NULL));
+ }
return (ret);
}
diff --git a/src/schema/schema_create.c b/src/schema/schema_create.c
index 720b6fc6412..80e443d8a21 100644
--- a/src/schema/schema_create.c
+++ b/src/schema/schema_create.c
@@ -637,7 +637,7 @@ __wt_schema_create(
ret = __wt_bad_object_type(session, uri);
session->dhandle = NULL;
- WT_TRET(__wt_meta_track_off(session, ret != 0));
+ WT_TRET(__wt_meta_track_off(session, 1, ret != 0));
return (ret);
}
diff --git a/src/schema/schema_drop.c b/src/schema/schema_drop.c
index 03dece47722..03097128ec2 100644
--- a/src/schema/schema_drop.c
+++ b/src/schema/schema_drop.c
@@ -192,7 +192,7 @@ __wt_schema_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
/* Bump the schema generation so that stale data is ignored. */
++S2C(session)->schema_gen;
- WT_TRET(__wt_meta_track_off(session, ret != 0));
+ WT_TRET(__wt_meta_track_off(session, 1, ret != 0));
return (ret);
}
diff --git a/src/schema/schema_rename.c b/src/schema/schema_rename.c
index 38124754cd5..51281eccec5 100644
--- a/src/schema/schema_rename.c
+++ b/src/schema/schema_rename.c
@@ -274,7 +274,7 @@ __wt_schema_rename(WT_SESSION_IMPL *session,
/* Bump the schema generation so that stale data is ignored. */
++S2C(session)->schema_gen;
- WT_TRET(__wt_meta_track_off(session, ret != 0));
+ WT_TRET(__wt_meta_track_off(session, 1, ret != 0));
/* If we didn't find a metadata entry, map that error to ENOENT. */
return (ret == WT_NOTFOUND ? ENOENT : ret);
diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c
index 833d098efeb..0825f783ca3 100644
--- a/src/session/session_dhandle.c
+++ b/src/session/session_dhandle.c
@@ -168,7 +168,7 @@ __wt_session_release_btree(WT_SESSION_IMPL *session)
WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE));
F_CLR(dhandle, WT_DHANDLE_DISCARD);
- WT_TRET(__wt_conn_btree_sync_and_close(session, 0));
+ WT_TRET(__wt_conn_btree_sync_and_close(session, 0, 0));
}
if (F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE))
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index fa77d2b5fa5..7c1532390f9 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -534,7 +534,7 @@ err: /*
*/
session->isolation = txn->isolation = TXN_ISO_READ_UNCOMMITTED;
if (tracking)
- WT_TRET(__wt_meta_track_off(session, ret != 0));
+ WT_TRET(__wt_meta_track_off(session, 0, ret != 0));
if (F_ISSET(txn, TXN_RUNNING)) {
/*
@@ -1090,30 +1090,50 @@ __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[])
* Checkpoint a single file as part of closing the handle.
*/
int
-__wt_checkpoint_close(WT_SESSION_IMPL *session, int force)
+__wt_checkpoint_close(WT_SESSION_IMPL *session, int final, int force)
{
+ WT_BTREE *btree;
WT_DECL_RET;
+ int bulk, need_tracking;
+
+ btree = S2BT(session);
+ bulk = F_ISSET(btree, WT_BTREE_BULK) ? 1 : 0;
/* Handle forced discard (when dropping a file). */
if (force)
return (__wt_cache_op(session, NULL, WT_SYNC_DISCARD_FORCE));
- /* If closing an unmodified file, try to evict its pages. */
- if (!S2BT(session)->modified) {
- ret = __wt_cache_op(session, NULL, WT_SYNC_DISCARD);
- if (ret != EBUSY)
- return (ret);
+ /*
+ * If closing an unmodified file, check that no update is required
+ * for active readers.
+ */
+ if (!btree->modified && !bulk) {
+ __wt_txn_update_oldest(session);
+ return (__wt_txn_visible_all(session, btree->rec_max_txn) ?
+ __wt_cache_op(session, NULL, WT_SYNC_DISCARD) : EBUSY);
}
/*
- * If closing a modified file, or closing an unmodified file was blocked
- * for any reason, checkpoint the file and optionally flush the writes
- * (the checkpoint call will discard the blocks, there's no additional
- * step needed).
+ * If closing a modified file, checkpoint the file and optionally flush
+ * the writes (the checkpoint call will discard the blocks, there's no
+ * additional step needed).
+ *
+ * We should already have the schema lock unless we're finishing a bulk
+ * load -- the only other paths to closing files (sweep and LSM) have
+ * already checked for read-only trees.
*/
- WT_RET(__checkpoint_worker(session, NULL, 0));
- if (F_ISSET(S2C(session), WT_CONN_CKPT_SYNC))
- WT_RET(__wt_checkpoint_sync(session, NULL));
+ if (!final)
+ WT_ASSERT(session,
+ bulk || F_ISSET(session, WT_SESSION_SCHEMA_LOCKED));
- return (0);
+ need_tracking = !bulk && !final && !WT_META_TRACKING(session);
+ if (need_tracking)
+ WT_RET(__wt_meta_track_on(session));
+
+ WT_TRET(__checkpoint_worker(session, NULL, 0));
+
+ if (need_tracking)
+ WT_RET(__wt_meta_track_off(session, 1, ret != 0));
+
+ return (ret);
}
diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c
index afe98d95c6f..bf0b1990557 100644
--- a/src/txn/txn_log.c
+++ b/src/txn/txn_log.c
@@ -221,11 +221,12 @@ __txn_log_file_sync(WT_SESSION_IMPL *session, uint32_t flags, WT_LSN *lsnp)
WT_DECL_RET;
size_t header_size;
uint32_t rectype = WT_LOGREC_FILE_SYNC;
- int start;
+ int start, sync;
const char *fmt = WT_UNCHECKED_STRING(III);
btree = S2BT(session);
start = LF_ISSET(WT_TXN_LOG_CKPT_START);
+ sync = LF_ISSET(WT_TXN_LOG_CKPT_SYNC);
WT_RET(__wt_struct_size(
session, &header_size, fmt, rectype, btree->id, start));
@@ -236,7 +237,7 @@ __txn_log_file_sync(WT_SESSION_IMPL *session, uint32_t flags, WT_LSN *lsnp)
fmt, rectype, btree->id, start));
logrec->size += (uint32_t)header_size;
- WT_ERR(__wt_log_write(session, logrec, lsnp, 0));
+ WT_ERR(__wt_log_write(session, logrec, lsnp, sync ? WT_LOG_FSYNC : 0));
err: __wt_logrec_free(session, &logrec);
return (ret);
}
@@ -360,6 +361,8 @@ __wt_txn_checkpoint_log(
__wt_scr_free(session, &txn->ckpt_snapshot);
txn->full_ckpt = 0;
break;
+
+ WT_ILLEGAL_VALUE_ERR(session);
}
err: __wt_logrec_free(session, &logrec);
diff --git a/test/suite/test_durability01.py b/test/suite/test_durability01.py
new file mode 100644
index 00000000000..53529c7e89f
--- /dev/null
+++ b/test/suite/test_durability01.py
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2015 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_durability01.py
+# Durability: make sure the metadata is stable after exclusive operations
+# cause files to be closed.
+#
+
+import fnmatch, os, shutil, time
+from suite_subprocess import suite_subprocess
+from wiredtiger import wiredtiger_open
+from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
+import wttest
+
+class test_durability01(wttest.WiredTigerTestCase, suite_subprocess):
+ uri = 'table:test_durability01'
+ create_params = 'key_format=i,value_format=i'
+
+ def check_crash_restart(self, olddir, newdir):
+ ''' Simulate a crash from olddir and restart in newdir. '''
+ # with the connection still open, copy files to new directory
+ shutil.rmtree(newdir, ignore_errors=True)
+ os.mkdir(newdir)
+ for fname in os.listdir(olddir):
+ fullname = os.path.join(olddir, fname)
+ # Skip lock file on Windows since it is locked
+ if os.path.isfile(fullname) and "WiredTiger.lock" not in fullname:
+ shutil.copy(fullname, newdir)
+
+ # Open the new directory
+ conn = self.setUpConnectionOpen(newdir)
+ session = self.setUpSessionOpen(conn)
+ session.verify(self.uri)
+ conn.close()
+
+ def test_durability(self):
+ '''Check for missing metadata checkpoints'''
+
+ # Here's the strategy:
+ # - update the table
+ # - verify, which causes the table to be flushed
+ # - copy the database directory (live, simulating a crash)
+ # - verify in the copy
+ # - repeat
+ #
+ # If the metadata isn't flushed, eventually the metadata we copy will
+ # be sufficiently out-of-sync with the data file that it won't verify.
+ self.session.create(self.uri, self.create_params)
+ for i in range(100):
+ c = self.session.open_cursor(self.uri)
+ c.set_key(i)
+ c.set_value(i)
+ c.insert()
+ c.close()
+ if i % 5 == 0:
+ self.session.checkpoint()
+ else:
+ self.session.verify(self.uri)
+ self.check_crash_restart(".", "RESTART")
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/test/suite/test_sweep01.py b/test/suite/test_sweep01.py
index a1a89c58838..989ffb7a971 100644
--- a/test/suite/test_sweep01.py
+++ b/test/suite/test_sweep01.py
@@ -42,10 +42,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
uri = 'table:' + tablebase
numfiles = 50
numkv = 1000
- ckpt_list = [
- ('off', dict(ckpt=0)),
- ('on', dict(ckpt=10)),
- ]
+ ckpt=10
types = [
('row', dict(tabletype='row',
@@ -56,8 +53,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
create_params = 'key_format=r,value_format=8t')),
]
- scenarios = number_scenarios(
- prune_scenarios(multiply_scenarios('.', types, ckpt_list), 1, 100))
+ scenarios = types
# Overrides WiredTigerTestCase
def setUpConnectionOpen(self, dir):