summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@mongodb.com>2015-05-28 15:33:06 +1000
committerMichael Cahill <michael.cahill@mongodb.com>2015-05-29 15:45:58 +1000
commit13e942f2611aaeae05376d1bc8391ae12647ff76 (patch)
tree6557384b5640abc622d9104f59d0a657f1b1298e
parent166c533b55417bff1c9c2f25c67322362ff12132 (diff)
downloadmongo-13e942f2611aaeae05376d1bc8391ae12647ff76.tar.gz
Merge pull request #1995 from wiredtiger/skip-checkpoint
Only skip metadata checkpoint if we really skipped it, not force. (cherry picked from commit 3fe41b427ace35308de552caac67329db0d85c52)
-rw-r--r--src/include/btree.h5
-rw-r--r--src/include/extern.h1
-rw-r--r--src/log/log.c44
-rw-r--r--src/txn/txn_ckpt.c25
-rw-r--r--src/txn/txn_log.c6
5 files changed, 73 insertions, 8 deletions
diff --git a/src/include/btree.h b/src/include/btree.h
index 44ec40364cc..264ebdefc39 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -147,8 +147,9 @@ struct __wt_btree {
#define WT_BTREE_IN_MEMORY 0x00200 /* Cache-resident object */
#define WT_BTREE_NO_EVICTION 0x00400 /* Disable eviction */
#define WT_BTREE_SALVAGE 0x00800 /* Handle is for salvage */
-#define WT_BTREE_UPGRADE 0x01000 /* Handle is for upgrade */
-#define WT_BTREE_VERIFY 0x02000 /* Handle is for verify */
+#define WT_BTREE_SKIP_CKPT 0x01000 /* Handle skipped checkpoint */
+#define WT_BTREE_UPGRADE 0x02000 /* Handle is for upgrade */
+#define WT_BTREE_VERIFY 0x04000 /* Handle is for verify */
uint32_t flags;
};
diff --git a/src/include/extern.h b/src/include/extern.h
index f42a504e5be..59e795893b5 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -316,6 +316,7 @@ extern void __wt_cache_dump(WT_SESSION_IMPL *session);
extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive);
extern void __wt_evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref);
extern int __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn);
+extern int __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn);
extern int __wt_log_needs_recovery(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn, int *rec);
extern void __wt_log_written_reset(WT_SESSION_IMPL *session);
extern int __wt_log_get_all_files(WT_SESSION_IMPL *session, char ***filesp, u_int *countp, uint32_t *maxid, int active_only);
diff --git a/src/log/log.c b/src/log/log.c
index 2c36abd8eab..e6f0ea6d1ab 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -36,6 +36,50 @@ __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn)
}
/*
+ * __wt_log_force_sync --
+ * Force a sync of the log and files.
+ */
+int
+__wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_LOG *log;
+
+ conn = S2C(session);
+ log = conn->log;
+ __wt_spin_lock(session, &log->log_sync_lock);
+ WT_ASSERT(session, log->log_dir_fh != NULL);
+ /*
+ * Sync the directory if the log file entry hasn't been written
+ * into the directory.
+ */
+ if (log->sync_dir_lsn.file < min_lsn->file) {
+ WT_ERR(__wt_verbose(session, WT_VERB_LOG,
+ "log_force_sync: sync directory %s",
+ log->log_dir_fh->name));
+ WT_ERR(__wt_directory_sync_fh(session, log->log_dir_fh));
+ log->sync_dir_lsn = *min_lsn;
+ WT_STAT_FAST_CONN_INCR(session, log_sync_dir);
+ }
+ /*
+ * Sync the log file if needed.
+ */
+ if (WT_LOG_CMP(&log->sync_lsn, min_lsn) < 0) {
+ WT_ERR(__wt_verbose(session, WT_VERB_LOG,
+ "log_force_sync: sync to LSN %d/%lu",
+ min_lsn->file, min_lsn->offset));
+ WT_ERR(__wt_fsync(session, log->log_fh));
+ log->sync_lsn = *min_lsn;
+ WT_STAT_FAST_CONN_INCR(session, log_sync);
+ WT_ERR(__wt_cond_signal(session, log->log_sync_cond));
+ }
+err:
+ __wt_spin_unlock(session, &log->log_sync_lock);
+ return (ret);
+}
+
+/*
* __wt_log_needs_recovery --
* Return 0 if we encounter a clean shutdown and 1 if recovery
* must be run in the given variable.
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index 11155134ca7..3954e22583c 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -554,11 +554,19 @@ err: /*
txn_global->checkpoint_id = WT_TXN_NONE;
txn_global->checkpoint_snap_min = WT_TXN_NONE;
- /* Tell logging that we have finished a database checkpoint. */
- if (logging)
+ /*
+ * Tell logging that we have finished a database checkpoint. Do not
+ * write a log record if the database was idle.
+ */
+ if (logging) {
+ if (ret == 0 && full &&
+ F_ISSET((WT_BTREE *)session->meta_dhandle->handle,
+ WT_BTREE_SKIP_CKPT))
+ idle = 1;
WT_TRET(__wt_txn_checkpoint_log(session, full,
- (ret == 0) ? WT_TXN_LOG_CKPT_STOP : WT_TXN_LOG_CKPT_FAIL,
- NULL));
+ (ret == 0 && !idle) ?
+ WT_TXN_LOG_CKPT_STOP : WT_TXN_LOG_CKPT_CLEANUP, NULL));
+ }
for (i = 0; i < session->ckpt_handle_next; ++i) {
if (session->ckpt_handle[i].dhandle == NULL) {
@@ -789,6 +797,7 @@ __checkpoint_worker(
* means it must exist.
*/
force = 0;
+ F_CLR(btree, WT_BTREE_SKIP_CKPT);
if (!btree->modified && cfg != NULL) {
ret = __wt_config_gets(session, cfg, "force", &cval);
if (ret != 0 && ret != WT_NOTFOUND)
@@ -797,8 +806,10 @@ __checkpoint_worker(
force = 1;
}
if (!btree->modified && !force) {
- if (!is_checkpoint)
+ if (!is_checkpoint) {
+ F_SET(btree, WT_BTREE_SKIP_CKPT);
goto done;
+ }
deleted = 0;
WT_CKPT_FOREACH(ckptbase, ckpt)
@@ -816,8 +827,10 @@ __checkpoint_worker(
(strcmp(name, (ckpt - 1)->name) == 0 ||
(WT_PREFIX_MATCH(name, WT_CHECKPOINT) &&
WT_PREFIX_MATCH((ckpt - 1)->name, WT_CHECKPOINT))) &&
- deleted < 2)
+ deleted < 2) {
+ F_SET(btree, WT_BTREE_SKIP_CKPT);
goto done;
+ }
}
/* Add a new checkpoint entry at the end of the list. */
diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c
index 901785628e1..e87c3cf0e3c 100644
--- a/src/txn/txn_log.c
+++ b/src/txn/txn_log.c
@@ -303,6 +303,12 @@ __wt_txn_checkpoint_log(
case WT_TXN_LOG_CKPT_PREPARE:
txn->full_ckpt = 1;
*ckpt_lsn = S2C(session)->log->write_start_lsn;
+ /*
+ * We need to make sure that the log records in the checkpoint
+ * LSN are on disk. In particular to make sure that the
+ * current log file exists.
+ */
+ WT_ERR(__wt_log_force_sync(session, ckpt_lsn));
break;
case WT_TXN_LOG_CKPT_START: