summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith.bostic@mongodb.com>2017-04-19 00:34:45 -0400
committerMichael Cahill <michael.cahill@mongodb.com>2017-04-19 14:34:45 +1000
commitf8db6cf5707ee44b7906f00add610b705d696dc6 (patch)
tree122db8070a7cb1de385f51ec520d5ae794326891
parent35e221c039a0931af5b3a18069e57ba9a218aead (diff)
downloadmongo-f8db6cf5707ee44b7906f00add610b705d696dc6.tar.gz
WT-3292 review/cleanup full-barrier calls in WiredTiger (#3395)
* Rework logging's log_close_fh and log_close_lsn barrier code. There are two fields, the close handle and the close LSN, and they both have to be set. The __log_file_server() code was checking the close handle and then spinning until the close LSN was set, but a simpler solution is to publish the close LSN write before setting the close handle. To be clear, the previous code was correct, it was the comment that was wrong. * Remove barriers from around read/write of WT_TXN_GLOBAL.checkpoint_running. The read-barrier in __compact_checkpoint() isn't needed because the check is on memory declared volatile. The full-barrier in __txn_checkpoint_wrapper isn't needed because there's no ordering constraint. * Don't flush the reset of the WT_REF.state field, we've never seen any performance reason that a barrier is needed. * Fix a comment: technically, the las-was-written flag has to be flushed before any relevant read happens, document it that way. * Instead of explicitly flushing the clear of the WT_CONN_SERVER_LSM flag, change __wt_sleep() to imply a barrier, there are loops which don't have no other barriers, like __lsm_manager_run_server().
-rw-r--r--src/btree/bt_compact.c6
-rw-r--r--src/btree/bt_sync.c2
-rw-r--r--src/cache/cache_las.c5
-rw-r--r--src/conn/conn_log.c10
-rw-r--r--src/log/log.c16
-rw-r--r--src/lsm/lsm_manager.c6
-rw-r--r--src/lsm/lsm_tree.c4
-rw-r--r--src/os_posix/os_sleep.c8
-rw-r--r--src/os_win/os_sleep.c11
-rw-r--r--src/session/session_compact.c5
-rw-r--r--src/txn/txn_ckpt.c5
11 files changed, 41 insertions, 37 deletions
diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c
index e7edae5ea79..17308d02d91 100644
--- a/src/btree/bt_compact.c
+++ b/src/btree/bt_compact.c
@@ -228,12 +228,8 @@ __wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
bm, session, addr, addr_size, skipp);
}
- /*
- * Reset the WT_REF state and push the change. The full-barrier isn't
- * necessary, but it's better to keep pages in circulation than not.
- */
+ /* Reset the WT_REF state. */
ref->state = WT_REF_DISK;
- WT_FULL_BARRIER();
return (ret);
}
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index 112f0725f94..5b0bf53dc6c 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -179,7 +179,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
* Set the checkpointing flag to block such actions and wait for
* any problematic eviction or page splits to complete.
*/
- WT_PUBLISH(btree->checkpointing, WT_CKPT_PREPARE);
+ btree->checkpointing = WT_CKPT_PREPARE;
(void)__wt_gen_next_drain(session, WT_GEN_EVICT);
btree->checkpointing = WT_CKPT_RUNNING;
diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c
index 9e8545453d3..06c6354148c 100644
--- a/src/cache/cache_las.c
+++ b/src/cache/cache_las.c
@@ -140,8 +140,9 @@ __wt_las_set_written(WT_SESSION_IMPL *session)
conn->las_written = true;
/*
- * Push the flag: unnecessary, but from now page reads must deal
- * with lookaside table records, and we only do the write once.
+ * Future page reads must deal with lookaside table records.
+ * No write could be cached until a future read might matter,
+ * the barrier is more documentation than requirement.
*/
WT_FULL_BARRIER();
}
diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c
index 47ba4d45dc3..08b572244af 100644
--- a/src/conn/conn_log.c
+++ b/src/conn/conn_log.c
@@ -391,13 +391,11 @@ __log_file_server(void *arg)
WT_ERR(__wt_log_extract_lognum(session, close_fh->name,
&filenum));
/*
- * We update the close file handle before updating the
- * close LSN when changing files. It is possible we
- * could see mismatched settings. If we do, yield
- * until it is set. This should rarely happen.
+ * The closing file handle should have a correct close
+ * LSN.
*/
- while (log->log_close_lsn.l.file < filenum)
- __wt_yield();
+ WT_ASSERT(session,
+ log->log_close_lsn.l.file == filenum);
if (__wt_log_cmp(
&log->write_lsn, &log->log_close_lsn) >= 0) {
diff --git a/src/log/log.c b/src/log/log.c
index f95ad93d872..3c37e1eb326 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -876,18 +876,16 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created)
__wt_yield();
}
/*
- * Note, the file server worker thread has code that knows that
- * the file handle is set before the LSN. Do not reorder without
- * also reviewing that code.
+ * Note, the file server worker thread requires the LSN be set once the
+ * close file handle is set, force that ordering.
*/
- log->log_close_fh = log->log_fh;
- if (log->log_close_fh != NULL)
+ if (log->log_fh == NULL)
+ log->log_close_fh = NULL;
+ else {
log->log_close_lsn = log->alloc_lsn;
+ WT_PUBLISH(log->log_close_fh, log->log_fh);
+ }
log->fileid++;
- /*
- * Make sure everything we set above is visible.
- */
- WT_FULL_BARRIER();
/*
* If pre-allocating log files look for one; otherwise, or if we don't
diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c
index 88b3e1980be..f391c553d2a 100644
--- a/src/lsm/lsm_manager.c
+++ b/src/lsm/lsm_manager.c
@@ -284,12 +284,8 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session)
manager = &conn->lsm_manager;
removed = 0;
- /*
- * Clear the LSM server flag and flush to ensure running threads see
- * the state change.
- */
+ /* Clear the LSM server flag. */
F_CLR(conn, WT_CONN_SERVER_LSM);
- WT_FULL_BARRIER();
WT_ASSERT(session, !F_ISSET(conn, WT_CONN_READONLY) ||
manager->lsm_workers == 0);
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index fb8eb9d38a7..fe36237969f 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -768,13 +768,13 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
WT_ERR(__wt_lsm_meta_write(session, lsm_tree, NULL));
lsm_tree->need_switch = false;
- ++lsm_tree->dsk_gen;
-
lsm_tree->modified = true;
+
/*
* Ensure the updated disk generation is visible to all other threads
* before updating the transaction ID.
*/
+ ++lsm_tree->dsk_gen;
WT_FULL_BARRIER();
/*
diff --git a/src/os_posix/os_sleep.c b/src/os_posix/os_sleep.c
index a0545d3f5fe..67c0aaa375c 100644
--- a/src/os_posix/os_sleep.c
+++ b/src/os_posix/os_sleep.c
@@ -18,6 +18,14 @@ __wt_sleep(uint64_t seconds, uint64_t micro_seconds)
{
struct timeval t;
+ /*
+ * Sleeping isn't documented as a memory barrier, and it's a reasonable
+ * expectation to have. There's no reason not to explicitly include a
+ * barrier since we're giving up the CPU, and ensures callers are never
+ * surprised.
+ */
+ WT_FULL_BARRIER();
+
t.tv_sec = (time_t)(seconds + micro_seconds / WT_MILLION);
t.tv_usec = (suseconds_t)(micro_seconds % WT_MILLION);
diff --git a/src/os_win/os_sleep.c b/src/os_win/os_sleep.c
index 4b6bdaea0be..477474e0665 100644
--- a/src/os_win/os_sleep.c
+++ b/src/os_win/os_sleep.c
@@ -18,8 +18,15 @@ __wt_sleep(uint64_t seconds, uint64_t micro_seconds)
DWORD dwMilliseconds;
/*
- * If the caller wants a small pause, set to our
- * smallest granularity.
+ * Sleeping isn't documented as a memory barrier, and it's a reasonable
+ * expectation to have. There's no reason not to explicitly include a
+ * barrier since we're giving up the CPU, and ensures callers are never
+ * surprised.
+ */
+ WT_FULL_BARRIER();
+
+ /*
+ * If the caller wants a small pause, set to our smallest granularity.
*/
if (seconds == 0 && micro_seconds < WT_THOUSAND)
micro_seconds = WT_THOUSAND;
diff --git a/src/session/session_compact.c b/src/session/session_compact.c
index 30c6ad297f7..c4710dbb1a5 100644
--- a/src/session/session_compact.c
+++ b/src/session/session_compact.c
@@ -226,7 +226,10 @@ __compact_checkpoint(WT_SESSION_IMPL *session)
*/
txn_global = &S2C(session)->txn_global;
for (txn_gen = __wt_gen(session, WT_GEN_CHECKPOINT);;) {
- WT_READ_BARRIER();
+ /*
+ * This loop only checks objects that are declared volatile,
+ * therefore no barriers are needed.
+ */
if (!txn_global->checkpoint_running ||
txn_gen != __wt_gen(session, WT_GEN_CHECKPOINT))
break;
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index 92dfd9e3887..82163f471b8 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -943,13 +943,11 @@ __txn_checkpoint_wrapper(WT_SESSION_IMPL *session, const char *cfg[])
WT_STAT_CONN_SET(session, txn_checkpoint_running, 1);
txn_global->checkpoint_running = true;
- WT_FULL_BARRIER();
ret = __txn_checkpoint(session, cfg);
WT_STAT_CONN_SET(session, txn_checkpoint_running, 0);
txn_global->checkpoint_running = false;
- WT_FULL_BARRIER();
return (ret);
}
@@ -1446,8 +1444,7 @@ __checkpoint_tree(
* the checkpoint start, which might not be included, will re-set the
* modified flag. The "unless reconciliation skips updates" problem is
* handled in the reconciliation code: if reconciliation skips updates,
- * it sets the modified flag itself. Use a full barrier so we get the
- * store done quickly, this isn't a performance path.
+ * it sets the modified flag itself.
*/
btree->modified = false;
WT_FULL_BARRIER();