summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSusan LoVerso <sue@wiredtiger.com>2015-01-21 12:57:20 -0500
committerSusan LoVerso <sue@wiredtiger.com>2015-01-21 12:57:20 -0500
commitebb1d9402c0ce2911069b0437d71766b92c3dc12 (patch)
treefa7a4fc550321923a45aa2915afcfbd9daa96e32
parenta80cbb671fcac3d31abd3f38cf547641de9a1776 (diff)
downloadmongo-ebb1d9402c0ce2911069b0437d71766b92c3dc12.tar.gz
Add log code to ensure write-no-sync. #1585
-rw-r--r--dist/api_data.py4
-rw-r--r--src/config/config_def.c16
-rw-r--r--src/conn/conn_log.c2
-rw-r--r--src/docs/tune-durability.dox31
-rw-r--r--src/include/log.h2
-rw-r--r--src/include/wiredtiger.in4
-rw-r--r--src/log/log.c7
7 files changed, 41 insertions, 25 deletions
diff --git a/dist/api_data.py b/dist/api_data.py
index 0141526285c..2ebd5310d5a 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -547,12 +547,12 @@ common_wiredtiger_open = [
Config('transaction_sync', '', r'''
how to sync log records when the transaction commits''',
type='category', subconfig=[
- Config('enabled', 'false', r'''
+ Config('enabled', 'true', r'''
whether to sync the log on every commit by default, can
be overridden by the \c sync setting to
WT_SESSION::begin_transaction''',
type='boolean'),
- Config('method', 'fsync', r'''
+ Config('method', 'none', r'''
the method used to ensure log records are stable on disk,
see @ref tune_durability for more information''',
choices=['dsync', 'fsync', 'none']),
diff --git a/src/config/config_def.c b/src/config/config_def.c
index ec44e8839b0..7a95f01b731 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -684,8 +684,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
",name=,reserve=0,size=500MB),statistics=none,"
"statistics_log=(on_close=0,path=\"WiredTigerStat.%d.%H\","
"sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
- "transaction_sync=(enabled=0,method=fsync),use_environment_priv=0"
- ",verbose=",
+ "transaction_sync=(enabled=,method=),use_environment_priv=0,"
+ "verbose=",
confchk_wiredtiger_open
},
{ "wiredtiger_open_all",
@@ -702,8 +702,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
",name=,reserve=0,size=500MB),statistics=none,"
"statistics_log=(on_close=0,path=\"WiredTigerStat.%d.%H\","
"sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
- "transaction_sync=(enabled=0,method=fsync),use_environment_priv=0"
- ",verbose=,version=(major=0,minor=0)",
+ "transaction_sync=(enabled=,method=),use_environment_priv=0,"
+ "verbose=,version=(major=0,minor=0)",
confchk_wiredtiger_open_all
},
{ "wiredtiger_open_basecfg",
@@ -719,8 +719,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"shared_cache=(chunk=10MB,name=,reserve=0,size=500MB),"
"statistics=none,statistics_log=(on_close=0,"
"path=\"WiredTigerStat.%d.%H\",sources=,"
- "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0"
- ",method=fsync),verbose=,version=(major=0,minor=0)",
+ "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=,"
+ "method=),verbose=,version=(major=0,minor=0)",
confchk_wiredtiger_open_basecfg
},
{ "wiredtiger_open_usercfg",
@@ -736,8 +736,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"shared_cache=(chunk=10MB,name=,reserve=0,size=500MB),"
"statistics=none,statistics_log=(on_close=0,"
"path=\"WiredTigerStat.%d.%H\",sources=,"
- "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0"
- ",method=fsync),verbose=",
+ "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=,"
+ "method=),verbose=",
confchk_wiredtiger_open_usercfg
},
{ NULL, NULL, NULL }
diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c
index f70a9a4a60c..abf21408d2d 100644
--- a/src/conn/conn_log.c
+++ b/src/conn/conn_log.c
@@ -444,6 +444,7 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[])
WT_INIT_LSN(&log->write_lsn);
log->fileid = 0;
WT_RET(__wt_cond_alloc(session, "log sync", 0, &log->log_sync_cond));
+ WT_RET(__wt_cond_alloc(session, "log write", 0, &log->log_write_cond));
WT_RET(__wt_log_open(session));
WT_RET(__wt_log_slot_init(session));
@@ -565,6 +566,7 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session)
WT_TRET(__wt_log_slot_destroy(session));
WT_TRET(__wt_cond_destroy(session, &conn->log->log_sync_cond));
+ WT_TRET(__wt_cond_destroy(session, &conn->log->log_write_cond));
WT_TRET(__wt_rwlock_destroy(session, &conn->log->log_archive_lock));
__wt_spin_destroy(session, &conn->log->log_lock);
__wt_spin_destroy(session, &conn->log->log_slot_lock);
diff --git a/src/docs/tune-durability.dox b/src/docs/tune-durability.dox
index 0c4bb2bc997..8cf3977e202 100644
--- a/src/docs/tune-durability.dox
+++ b/src/docs/tune-durability.dox
@@ -11,34 +11,39 @@ required for this feature.
/*! @class doc_tune_durability_flush_config
-By default, log records are flushed to disk before
+By default, log records are written to the operating system before
WT_SESSION::commit_transaction returns, ensuring durability at the
-commit. However, the durability guarantees can be relaxed to increase
-performance.
+application level. The durability guarantees can be stricter to increase
+durability guarantees.
If \c transaction_sync=(enabled=false) is configured to ::wiredtiger_open,
-log records will be buffered in memory, and only flushed to disk by
-checkpoints or calls to WT_SESSION::commit_transaction with \c sync=true.
-(Note that any call to WT_SESSION::commit_transaction with \c sync=true
+log records may be buffered in memory, and only flushed to disk by
+checkpoints or calls to WT_SESSION::begin_transaction with \c sync=true.
+(Note that any call to WT_SESSION::begin_transaction with \c sync=true
will flush the log records for all committed transactions, not just the
transaction where the configuration is set.) This provides the minimal
guarantees, but will be significantly faster than other configurations.
If \c transaction_sync=(enabled=true), \c transaction_sync=(method)
further configures the method used to flush log records to disk. By
-default, the configured value is \c fsync, which calls the operating
-system's \c fsync call (or \c fdatasync if available) as each commit
-completes.
+default, the configured value is \c none, which calls the operating
+system's \c write call as each commit completes, but does not
+flush to disk.
-If the value is set to \c dsync instead, the \c O_DSYNC or \c O_SYNC
+If the value is set to \c dsync, the \c O_DSYNC or \c O_SYNC
flag to the operating system's \c open call will be specified when the
file is opened. (The durability guarantee of the \c fsync and \c dsync
configurations are the same, and in our experience the \c open flags are
slower, this configuration is only included for systems where that may
not be the case.)
-Finally, if the value is set to \c none, commit will call the operating
-system's \c write call before returning, but will not flush the write.
+If the value is set to \c fsync, the operating system's \c fsync call
+(or \c fdatasync if available) will be called as each commit completes.
+
+When a log file fills and the system moves to the next log file, the
+previous log file will always be flushed to disk prior to close. So
+when running in a durability mode that does not flush to disk, the risk
+is bounded by the most recent log file change.
Here is the expected performance of durability modes, in order from the
fastest to the slowest (and from the fewest durability guarantees to the
@@ -49,7 +54,7 @@ most durability guarantees).
@row{<code>log=(enabled=false)</code>, checkpoint-level durability}
@row{<code>log=(enabled)\,transaction_sync=(enabled=false)</code>,
in-memory buffered logging configured; updates durable after
- checkpoint or after \c sync is set in WT_SESSION::commit_transaction}
+ checkpoint or after \c sync is set in WT_SESSION::begin_transaction}
@row{<code>log=(enabled)\,transaction_sync=(enabled=true\,method=none)</code>,
logging configured; updates durable after application failure\,
but not after system failure}
diff --git a/src/include/log.h b/src/include/log.h
index 17a3fd5a23a..f88a5381227 100644
--- a/src/include/log.h
+++ b/src/include/log.h
@@ -135,6 +135,8 @@ typedef struct {
/* Notify any waiting threads when sync_lsn is updated. */
WT_CONDVAR *log_sync_cond;
+ /* Notify any waiting threads when write_lsn is updated. */
+ WT_CONDVAR *log_write_cond;
/*
* Consolidation array information
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 982e850241b..6146ba9d633 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -2023,11 +2023,11 @@ struct __wt_connection {
* commits., a set of related configuration options defined below.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, whether to sync the log on every
* commit by default\, can be overridden by the \c sync setting to
- * WT_SESSION::begin_transaction., a boolean flag; default \c false.}
+ * WT_SESSION::begin_transaction., a boolean flag; default \c true.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;method, the method used to ensure log records
* are stable on disk\, see @ref tune_durability for more information., a
* string\, chosen from the following options: \c "dsync"\, \c "fsync"\, \c
- * "none"; default \c fsync.}
+ * "none"; default \c none.}
* @config{ ),,}
* @config{use_environment_priv, use the \c WIREDTIGER_CONFIG and \c
* WIREDTIGER_HOME environment variables regardless of whether or not the
diff --git a/src/log/log.c b/src/log/log.c
index e75946e9885..a173a829436 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -883,6 +883,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
while (LOG_CMP(&log->write_lsn, &slot->slot_release_lsn) != 0)
__wt_yield();
log->write_lsn = slot->slot_end_lsn;
+ WT_ERR(__wt_cond_signal(session, log->log_write_cond));
if (F_ISSET(slot, SLOT_CLOSEFH))
WT_ERR(__wt_cond_signal(session, conn->log_close_cond));
@@ -1651,6 +1652,12 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp,
myslot.slot->slot_error == 0)
(void)__wt_cond_wait(
session, log->log_sync_cond, 10000);
+ } else if (LF_ISSET(WT_LOG_FLUSH)) {
+ /* Wait for our writes to reach the OS */
+ while (LOG_CMP(&log->write_lsn, &lsn) <= 0 &&
+ myslot.slot->slot_error == 0)
+ (void)__wt_cond_wait(
+ session, log->log_write_cond, 10000);
}
err:
if (locked)