diff options
author | Keith Bostic <keith.bostic@mongodb.com> | 2017-04-19 00:34:45 -0400 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2017-04-19 14:34:45 +1000 |
commit | f8db6cf5707ee44b7906f00add610b705d696dc6 (patch) | |
tree | 122db8070a7cb1de385f51ec520d5ae794326891 | |
parent | 35e221c039a0931af5b3a18069e57ba9a218aead (diff) | |
download | mongo-f8db6cf5707ee44b7906f00add610b705d696dc6.tar.gz |
WT-3292 review/cleanup full-barrier calls in WiredTiger (#3395)
* Rework logging's log_close_fh and log_close_lsn barrier code. There are two fields, the close handle and the close LSN, and they both have to be set. The __log_file_server() code was checking the close handle and then spinning until the close LSN was set, but a simpler solution is to publish the close LSN write before setting the close handle.
To be clear, the previous code was correct, it was the comment that was wrong.
* Remove barriers from around read/write of WT_TXN_GLOBAL.checkpoint_running. The read-barrier in __compact_checkpoint() isn't needed because the check is on memory declared volatile. The full-barrier in __txn_checkpoint_wrapper isn't needed because there's no ordering constraint.
* Don't flush the reset of the WT_REF.state field, we've never seen any performance reason that a barrier is needed.
* Fix a comment: technically, the las-was-written flag has to be flushed before any relevant read happens, document it that way.
* Instead of explicitly flushing the clear of the WT_CONN_SERVER_LSM flag, change __wt_sleep() to imply a barrier, there are loops which don't have no other barriers, like __lsm_manager_run_server().
-rw-r--r-- | src/btree/bt_compact.c | 6 | ||||
-rw-r--r-- | src/btree/bt_sync.c | 2 | ||||
-rw-r--r-- | src/cache/cache_las.c | 5 | ||||
-rw-r--r-- | src/conn/conn_log.c | 10 | ||||
-rw-r--r-- | src/log/log.c | 16 | ||||
-rw-r--r-- | src/lsm/lsm_manager.c | 6 | ||||
-rw-r--r-- | src/lsm/lsm_tree.c | 4 | ||||
-rw-r--r-- | src/os_posix/os_sleep.c | 8 | ||||
-rw-r--r-- | src/os_win/os_sleep.c | 11 | ||||
-rw-r--r-- | src/session/session_compact.c | 5 | ||||
-rw-r--r-- | src/txn/txn_ckpt.c | 5 |
11 files changed, 41 insertions, 37 deletions
diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c index e7edae5ea79..17308d02d91 100644 --- a/src/btree/bt_compact.c +++ b/src/btree/bt_compact.c @@ -228,12 +228,8 @@ __wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) bm, session, addr, addr_size, skipp); } - /* - * Reset the WT_REF state and push the change. The full-barrier isn't - * necessary, but it's better to keep pages in circulation than not. - */ + /* Reset the WT_REF state. */ ref->state = WT_REF_DISK; - WT_FULL_BARRIER(); return (ret); } diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c index 112f0725f94..5b0bf53dc6c 100644 --- a/src/btree/bt_sync.c +++ b/src/btree/bt_sync.c @@ -179,7 +179,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop) * Set the checkpointing flag to block such actions and wait for * any problematic eviction or page splits to complete. */ - WT_PUBLISH(btree->checkpointing, WT_CKPT_PREPARE); + btree->checkpointing = WT_CKPT_PREPARE; (void)__wt_gen_next_drain(session, WT_GEN_EVICT); btree->checkpointing = WT_CKPT_RUNNING; diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c index 9e8545453d3..06c6354148c 100644 --- a/src/cache/cache_las.c +++ b/src/cache/cache_las.c @@ -140,8 +140,9 @@ __wt_las_set_written(WT_SESSION_IMPL *session) conn->las_written = true; /* - * Push the flag: unnecessary, but from now page reads must deal - * with lookaside table records, and we only do the write once. + * Future page reads must deal with lookaside table records. + * No write could be cached until a future read might matter, + * the barrier is more documentation than requirement. */ WT_FULL_BARRIER(); } diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c index 47ba4d45dc3..08b572244af 100644 --- a/src/conn/conn_log.c +++ b/src/conn/conn_log.c @@ -391,13 +391,11 @@ __log_file_server(void *arg) WT_ERR(__wt_log_extract_lognum(session, close_fh->name, &filenum)); /* - * We update the close file handle before updating the - * close LSN when changing files. It is possible we - * could see mismatched settings. If we do, yield - * until it is set. This should rarely happen. + * The closing file handle should have a correct close + * LSN. */ - while (log->log_close_lsn.l.file < filenum) - __wt_yield(); + WT_ASSERT(session, + log->log_close_lsn.l.file == filenum); if (__wt_log_cmp( &log->write_lsn, &log->log_close_lsn) >= 0) { diff --git a/src/log/log.c b/src/log/log.c index f95ad93d872..3c37e1eb326 100644 --- a/src/log/log.c +++ b/src/log/log.c @@ -876,18 +876,16 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created) __wt_yield(); } /* - * Note, the file server worker thread has code that knows that - * the file handle is set before the LSN. Do not reorder without - * also reviewing that code. + * Note, the file server worker thread requires the LSN be set once the + * close file handle is set, force that ordering. */ - log->log_close_fh = log->log_fh; - if (log->log_close_fh != NULL) + if (log->log_fh == NULL) + log->log_close_fh = NULL; + else { log->log_close_lsn = log->alloc_lsn; + WT_PUBLISH(log->log_close_fh, log->log_fh); + } log->fileid++; - /* - * Make sure everything we set above is visible. - */ - WT_FULL_BARRIER(); /* * If pre-allocating log files look for one; otherwise, or if we don't diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 88b3e1980be..f391c553d2a 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -284,12 +284,8 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) manager = &conn->lsm_manager; removed = 0; - /* - * Clear the LSM server flag and flush to ensure running threads see - * the state change. - */ + /* Clear the LSM server flag. */ F_CLR(conn, WT_CONN_SERVER_LSM); - WT_FULL_BARRIER(); WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) || manager->lsm_workers == 0); diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index fb8eb9d38a7..fe36237969f 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -768,13 +768,13 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_ERR(__wt_lsm_meta_write(session, lsm_tree, NULL)); lsm_tree->need_switch = false; - ++lsm_tree->dsk_gen; - lsm_tree->modified = true; + /* * Ensure the updated disk generation is visible to all other threads * before updating the transaction ID. */ + ++lsm_tree->dsk_gen; WT_FULL_BARRIER(); /* diff --git a/src/os_posix/os_sleep.c b/src/os_posix/os_sleep.c index a0545d3f5fe..67c0aaa375c 100644 --- a/src/os_posix/os_sleep.c +++ b/src/os_posix/os_sleep.c @@ -18,6 +18,14 @@ __wt_sleep(uint64_t seconds, uint64_t micro_seconds) { struct timeval t; + /* + * Sleeping isn't documented as a memory barrier, and it's a reasonable + * expectation to have. There's no reason not to explicitly include a + * barrier since we're giving up the CPU, and ensures callers are never + * surprised. + */ + WT_FULL_BARRIER(); + t.tv_sec = (time_t)(seconds + micro_seconds / WT_MILLION); t.tv_usec = (suseconds_t)(micro_seconds % WT_MILLION); diff --git a/src/os_win/os_sleep.c b/src/os_win/os_sleep.c index 4b6bdaea0be..477474e0665 100644 --- a/src/os_win/os_sleep.c +++ b/src/os_win/os_sleep.c @@ -18,8 +18,15 @@ __wt_sleep(uint64_t seconds, uint64_t micro_seconds) DWORD dwMilliseconds; /* - * If the caller wants a small pause, set to our - * smallest granularity. + * Sleeping isn't documented as a memory barrier, and it's a reasonable + * expectation to have. There's no reason not to explicitly include a + * barrier since we're giving up the CPU, and ensures callers are never + * surprised. + */ + WT_FULL_BARRIER(); + + /* + * If the caller wants a small pause, set to our smallest granularity. */ if (seconds == 0 && micro_seconds < WT_THOUSAND) micro_seconds = WT_THOUSAND; diff --git a/src/session/session_compact.c b/src/session/session_compact.c index 30c6ad297f7..c4710dbb1a5 100644 --- a/src/session/session_compact.c +++ b/src/session/session_compact.c @@ -226,7 +226,10 @@ __compact_checkpoint(WT_SESSION_IMPL *session) */ txn_global = &S2C(session)->txn_global; for (txn_gen = __wt_gen(session, WT_GEN_CHECKPOINT);;) { - WT_READ_BARRIER(); + /* + * This loop only checks objects that are declared volatile, + * therefore no barriers are needed. + */ if (!txn_global->checkpoint_running || txn_gen != __wt_gen(session, WT_GEN_CHECKPOINT)) break; diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 92dfd9e3887..82163f471b8 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -943,13 +943,11 @@ __txn_checkpoint_wrapper(WT_SESSION_IMPL *session, const char *cfg[]) WT_STAT_CONN_SET(session, txn_checkpoint_running, 1); txn_global->checkpoint_running = true; - WT_FULL_BARRIER(); ret = __txn_checkpoint(session, cfg); WT_STAT_CONN_SET(session, txn_checkpoint_running, 0); txn_global->checkpoint_running = false; - WT_FULL_BARRIER(); return (ret); } @@ -1446,8 +1444,7 @@ __checkpoint_tree( * the checkpoint start, which might not be included, will re-set the * modified flag. The "unless reconciliation skips updates" problem is * handled in the reconciliation code: if reconciliation skips updates, - * it sets the modified flag itself. Use a full barrier so we get the - * store done quickly, this isn't a performance path. + * it sets the modified flag itself. */ btree->modified = false; WT_FULL_BARRIER(); |