diff options
author | Susan LoVerso <sue@wiredtiger.com> | 2015-01-21 12:57:20 -0500 |
---|---|---|
committer | Susan LoVerso <sue@wiredtiger.com> | 2015-01-21 12:57:20 -0500 |
commit | ebb1d9402c0ce2911069b0437d71766b92c3dc12 (patch) | |
tree | fa7a4fc550321923a45aa2915afcfbd9daa96e32 | |
parent | a80cbb671fcac3d31abd3f38cf547641de9a1776 (diff) | |
download | mongo-ebb1d9402c0ce2911069b0437d71766b92c3dc12.tar.gz |
Add log code to ensure write-no-sync. #1585
-rw-r--r-- | dist/api_data.py | 4 | ||||
-rw-r--r-- | src/config/config_def.c | 16 | ||||
-rw-r--r-- | src/conn/conn_log.c | 2 | ||||
-rw-r--r-- | src/docs/tune-durability.dox | 31 | ||||
-rw-r--r-- | src/include/log.h | 2 | ||||
-rw-r--r-- | src/include/wiredtiger.in | 4 | ||||
-rw-r--r-- | src/log/log.c | 7 |
7 files changed, 41 insertions, 25 deletions
diff --git a/dist/api_data.py b/dist/api_data.py index 0141526285c..2ebd5310d5a 100644 --- a/dist/api_data.py +++ b/dist/api_data.py @@ -547,12 +547,12 @@ common_wiredtiger_open = [ Config('transaction_sync', '', r''' how to sync log records when the transaction commits''', type='category', subconfig=[ - Config('enabled', 'false', r''' + Config('enabled', 'true', r''' whether to sync the log on every commit by default, can be overridden by the \c sync setting to WT_SESSION::begin_transaction''', type='boolean'), - Config('method', 'fsync', r''' + Config('method', 'none', r''' the method used to ensure log records are stable on disk, see @ref tune_durability for more information''', choices=['dsync', 'fsync', 'none']), diff --git a/src/config/config_def.c b/src/config/config_def.c index ec44e8839b0..7a95f01b731 100644 --- a/src/config/config_def.c +++ b/src/config/config_def.c @@ -684,8 +684,8 @@ static const WT_CONFIG_ENTRY config_entries[] = { ",name=,reserve=0,size=500MB),statistics=none," "statistics_log=(on_close=0,path=\"WiredTigerStat.%d.%H\"," "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," - "transaction_sync=(enabled=0,method=fsync),use_environment_priv=0" - ",verbose=", + "transaction_sync=(enabled=,method=),use_environment_priv=0," + "verbose=", confchk_wiredtiger_open }, { "wiredtiger_open_all", @@ -702,8 +702,8 @@ static const WT_CONFIG_ENTRY config_entries[] = { ",name=,reserve=0,size=500MB),statistics=none," "statistics_log=(on_close=0,path=\"WiredTigerStat.%d.%H\"," "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0)," - "transaction_sync=(enabled=0,method=fsync),use_environment_priv=0" - ",verbose=,version=(major=0,minor=0)", + "transaction_sync=(enabled=,method=),use_environment_priv=0," + "verbose=,version=(major=0,minor=0)", confchk_wiredtiger_open_all }, { "wiredtiger_open_basecfg", @@ -719,8 +719,8 @@ static const WT_CONFIG_ENTRY config_entries[] = { "shared_cache=(chunk=10MB,name=,reserve=0,size=500MB)," "statistics=none,statistics_log=(on_close=0," "path=\"WiredTigerStat.%d.%H\",sources=," - "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" - ",method=fsync),verbose=,version=(major=0,minor=0)", + "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=," + "method=),verbose=,version=(major=0,minor=0)", confchk_wiredtiger_open_basecfg }, { "wiredtiger_open_usercfg", @@ -736,8 +736,8 @@ static const WT_CONFIG_ENTRY config_entries[] = { "shared_cache=(chunk=10MB,name=,reserve=0,size=500MB)," "statistics=none,statistics_log=(on_close=0," "path=\"WiredTigerStat.%d.%H\",sources=," - "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0" - ",method=fsync),verbose=", + "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=," + "method=),verbose=", confchk_wiredtiger_open_usercfg }, { NULL, NULL, NULL } diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index f70a9a4a60c..abf21408d2d 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -444,6 +444,7 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[]) WT_INIT_LSN(&log->write_lsn); log->fileid = 0; WT_RET(__wt_cond_alloc(session, "log sync", 0, &log->log_sync_cond)); + WT_RET(__wt_cond_alloc(session, "log write", 0, &log->log_write_cond)); WT_RET(__wt_log_open(session)); WT_RET(__wt_log_slot_init(session)); @@ -565,6 +566,7 @@ __wt_logmgr_destroy(WT_SESSION_IMPL *session) WT_TRET(__wt_log_slot_destroy(session)); WT_TRET(__wt_cond_destroy(session, &conn->log->log_sync_cond)); + WT_TRET(__wt_cond_destroy(session, &conn->log->log_write_cond)); WT_TRET(__wt_rwlock_destroy(session, &conn->log->log_archive_lock)); __wt_spin_destroy(session, &conn->log->log_lock); __wt_spin_destroy(session, &conn->log->log_slot_lock); diff --git a/src/docs/tune-durability.dox b/src/docs/tune-durability.dox index 0c4bb2bc997..8cf3977e202 100644 --- a/src/docs/tune-durability.dox +++ b/src/docs/tune-durability.dox @@ -11,34 +11,39 @@ required for this feature. /*! @class doc_tune_durability_flush_config -By default, log records are flushed to disk before +By default, log records are written to the operating system before WT_SESSION::commit_transaction returns, ensuring durability at the -commit. However, the durability guarantees can be relaxed to increase -performance. +application level. The durability guarantees can be stricter to increase +durability guarantees. If \c transaction_sync=(enabled=false) is configured to ::wiredtiger_open, -log records will be buffered in memory, and only flushed to disk by -checkpoints or calls to WT_SESSION::commit_transaction with \c sync=true. -(Note that any call to WT_SESSION::commit_transaction with \c sync=true +log records may be buffered in memory, and only flushed to disk by +checkpoints or calls to WT_SESSION::begin_transaction with \c sync=true. +(Note that any call to WT_SESSION::begin_transaction with \c sync=true will flush the log records for all committed transactions, not just the transaction where the configuration is set.) This provides the minimal guarantees, but will be significantly faster than other configurations. If \c transaction_sync=(enabled=true), \c transaction_sync=(method) further configures the method used to flush log records to disk. By -default, the configured value is \c fsync, which calls the operating -system's \c fsync call (or \c fdatasync if available) as each commit -completes. +default, the configured value is \c none, which calls the operating +system's \c write call as each commit completes, but does not +flush to disk. -If the value is set to \c dsync instead, the \c O_DSYNC or \c O_SYNC +If the value is set to \c dsync, the \c O_DSYNC or \c O_SYNC flag to the operating system's \c open call will be specified when the file is opened. (The durability guarantee of the \c fsync and \c dsync configurations are the same, and in our experience the \c open flags are slower, this configuration is only included for systems where that may not be the case.) -Finally, if the value is set to \c none, commit will call the operating -system's \c write call before returning, but will not flush the write. +If the value is set to \c fsync, the operating system's \c fsync call +(or \c fdatasync if available) will be called as each commit completes. + +When a log file fills and the system moves to the next log file, the +previous log file will always be flushed to disk prior to close. So +when running in a durability mode that does not flush to disk, the risk +is bounded by the most recent log file change. Here is the expected performance of durability modes, in order from the fastest to the slowest (and from the fewest durability guarantees to the @@ -49,7 +54,7 @@ most durability guarantees). @row{<code>log=(enabled=false)</code>, checkpoint-level durability} @row{<code>log=(enabled)\,transaction_sync=(enabled=false)</code>, in-memory buffered logging configured; updates durable after - checkpoint or after \c sync is set in WT_SESSION::commit_transaction} + checkpoint or after \c sync is set in WT_SESSION::begin_transaction} @row{<code>log=(enabled)\,transaction_sync=(enabled=true\,method=none)</code>, logging configured; updates durable after application failure\, but not after system failure} diff --git a/src/include/log.h b/src/include/log.h index 17a3fd5a23a..f88a5381227 100644 --- a/src/include/log.h +++ b/src/include/log.h @@ -135,6 +135,8 @@ typedef struct { /* Notify any waiting threads when sync_lsn is updated. */ WT_CONDVAR *log_sync_cond; + /* Notify any waiting threads when write_lsn is updated. */ + WT_CONDVAR *log_write_cond; /* * Consolidation array information diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in index 982e850241b..6146ba9d633 100644 --- a/src/include/wiredtiger.in +++ b/src/include/wiredtiger.in @@ -2023,11 +2023,11 @@ struct __wt_connection { * commits., a set of related configuration options defined below.} * @config{ enabled, whether to sync the log on every * commit by default\, can be overridden by the \c sync setting to - * WT_SESSION::begin_transaction., a boolean flag; default \c false.} + * WT_SESSION::begin_transaction., a boolean flag; default \c true.} * @config{ method, the method used to ensure log records * are stable on disk\, see @ref tune_durability for more information., a * string\, chosen from the following options: \c "dsync"\, \c "fsync"\, \c - * "none"; default \c fsync.} + * "none"; default \c none.} * @config{ ),,} * @config{use_environment_priv, use the \c WIREDTIGER_CONFIG and \c * WIREDTIGER_HOME environment variables regardless of whether or not the diff --git a/src/log/log.c b/src/log/log.c index e75946e9885..a173a829436 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -883,6 +883,7 @@ __log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot) while (LOG_CMP(&log->write_lsn, &slot->slot_release_lsn) != 0) __wt_yield(); log->write_lsn = slot->slot_end_lsn; + WT_ERR(__wt_cond_signal(session, log->log_write_cond)); if (F_ISSET(slot, SLOT_CLOSEFH)) WT_ERR(__wt_cond_signal(session, conn->log_close_cond)); @@ -1651,6 +1652,12 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp, myslot.slot->slot_error == 0) (void)__wt_cond_wait( session, log->log_sync_cond, 10000); + } else if (LF_ISSET(WT_LOG_FLUSH)) { + /* Wait for our writes to reach the OS */ + while (LOG_CMP(&log->write_lsn, &lsn) <= 0 && + myslot.slot->slot_error == 0) + (void)__wt_cond_wait( + session, log->log_write_cond, 10000); } err: if (locked) |