diff options
author | Alex Gorrod <alexg@wiredtiger.com> | 2016-03-21 12:16:18 +1100 |
---|---|---|
committer | Alex Gorrod <alexg@wiredtiger.com> | 2016-03-21 12:16:18 +1100 |
commit | b5a7e1742035dceeb842cd465fa09338e5a72cfa (patch) | |
tree | 0ce0e5beefd38af72ebbde28c4e5d6b1b1cbc856 | |
parent | 4cc6155364ad81d45f17a16f21cb5cfda973a2cb (diff) | |
download | mongo-b5a7e1742035dceeb842cd465fa09338e5a72cfa.tar.gz |
WT-2499 Fix a race in LSM between setting and checking WT_LSM_TREE_ACTIVE.
Tree close relies on any state change being visible immediately. Found
by inspection, but could cause the symptom seen in this ticket.
-rw-r--r-- | src/include/lsm.h | 19 | ||||
-rw-r--r-- | src/lsm/lsm_manager.c | 4 | ||||
-rw-r--r-- | src/lsm/lsm_merge.c | 2 | ||||
-rw-r--r-- | src/lsm/lsm_tree.c | 20 | ||||
-rw-r--r-- | src/lsm/lsm_work_unit.c | 4 |
5 files changed, 30 insertions, 19 deletions
diff --git a/src/include/lsm.h b/src/include/lsm.h index eacb4d52d3e..444073087df 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -242,13 +242,18 @@ struct __wt_lsm_tree { int64_t lsm_lookup_no_bloom; int64_t lsm_merge_throttle; -#define WT_LSM_TREE_ACTIVE 0x01 /* Workers are active */ -#define WT_LSM_TREE_AGGRESSIVE_TIMER 0x02 /* Timer for merge aggression */ -#define WT_LSM_TREE_COMPACTING 0x04 /* Tree being compacted */ -#define WT_LSM_TREE_MERGES 0x08 /* Tree should run merges */ -#define WT_LSM_TREE_NEED_SWITCH 0x10 /* New chunk needs creating */ -#define WT_LSM_TREE_OPEN 0x20 /* The tree is open */ -#define WT_LSM_TREE_THROTTLE 0x40 /* Throttle updates */ + /* + * The tree is open for business. This used to be a flag, but it is + * susceptible to races. + */ + bool active; + +#define WT_LSM_TREE_AGGRESSIVE_TIMER 0x01 /* Timer for merge aggression */ +#define WT_LSM_TREE_COMPACTING 0x02 /* Tree being compacted */ +#define WT_LSM_TREE_MERGES 0x04 /* Tree should run merges */ +#define WT_LSM_TREE_NEED_SWITCH 0x08 /* New chunk needs creating */ +#define WT_LSM_TREE_OPEN 0x10 /* The tree is open */ +#define WT_LSM_TREE_THROTTLE 0x20 /* Throttle updates */ uint32_t flags; }; diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 24707abdb5a..943a5894ab3 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -390,7 +390,7 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) F_SET(session, WT_SESSION_LOCKED_HANDLE_LIST); dhandle_locked = true; TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) { - if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) + if (!lsm_tree->active) continue; WT_ERR(__wt_epoch(session, &now)); pushms = lsm_tree->work_push_ts.tv_sec == 0 ? 0 : @@ -650,7 +650,7 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session, * is checked. */ (void)__wt_atomic_add32(&lsm_tree->queue_ref, 1); - if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) { + if (!lsm_tree->active) { (void)__wt_atomic_sub32(&lsm_tree->queue_ref, 1); return (0); } diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index 973043f334f..6d907284546 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -463,7 +463,7 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) #define LSM_MERGE_CHECK_INTERVAL WT_THOUSAND for (insert_count = 0; (ret = src->next(src)) == 0; insert_count++) { if (insert_count % LSM_MERGE_CHECK_INTERVAL == 0) { - if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) + if (!lsm_tree->active) WT_ERR(EINTR); WT_STAT_FAST_CONN_INCRV(session, diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index b64b72256cc..73466a53215 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -90,8 +90,13 @@ __lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool wait) WT_DECL_RET; int i; - /* Stop any active merges. */ - F_CLR(lsm_tree, WT_LSM_TREE_ACTIVE); + /* + * Stop any new work units being added. The barrier is necessary + * because we rely on the state change being visible before checking + * the tree queue state. + */ + lsm_tree->active = false; + WT_READ_BARRIER(); /* * Wait for all LSM operations to drain. If WiredTiger is shutting @@ -123,7 +128,7 @@ __lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool wait) } return (0); -err: F_SET(lsm_tree, WT_LSM_TREE_ACTIVE); +err: lsm_tree->active = true; return (ret); } @@ -389,7 +394,7 @@ __lsm_tree_find(WT_SESSION_IMPL *session, lsm_tree->refcnt != 1) { (void)__wt_atomic_sub32( &lsm_tree->refcnt, 1); - F_SET(lsm_tree, WT_LSM_TREE_ACTIVE); + lsm_tree->active = true; lsm_tree->excl_session = NULL; return (EBUSY); } @@ -504,7 +509,8 @@ __lsm_tree_open(WT_SESSION_IMPL *session, /* Now the tree is setup, make it visible to others. */ TAILQ_INSERT_HEAD(&S2C(session)->lsmqh, lsm_tree, q); - F_SET(lsm_tree, WT_LSM_TREE_ACTIVE | WT_LSM_TREE_OPEN); + lsm_tree->active = true; + F_SET(lsm_tree, WT_LSM_TREE_OPEN); *treep = lsm_tree; @@ -545,7 +551,7 @@ __wt_lsm_tree_release(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) WT_ASSERT(session, lsm_tree->refcnt > 0); if (lsm_tree->excl_session == session) { /* We cleared the active flag when getting exclusive access. */ - F_SET(lsm_tree, WT_LSM_TREE_ACTIVE); + lsm_tree->active = true; lsm_tree->excl_session = NULL; } (void)__wt_atomic_sub32(&lsm_tree->refcnt, 1); @@ -1218,7 +1224,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp) } /* Wait for the work unit queues to drain. */ - while (F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) { + while (lsm_tree->active) { /* * The flush flag is cleared when the chunk has been flushed. * Continue to push forced flushes until the chunk is on disk. diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index 88054f86d65..2af0c11e37e 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -29,7 +29,7 @@ __lsm_copy_chunks(WT_SESSION_IMPL *session, cookie->nchunks = 0; WT_RET(__wt_lsm_tree_readlock(session, lsm_tree)); - if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) + if (!lsm_tree->active) return (__wt_lsm_tree_readunlock(session, lsm_tree)); /* Take a copy of the current state of the LSM tree. */ @@ -79,7 +79,7 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session, WT_ASSERT(session, lsm_tree->queue_ref > 0); WT_RET(__wt_lsm_tree_readlock(session, lsm_tree)); - if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE) || lsm_tree->nchunks == 0) + if (!lsm_tree->active || lsm_tree->nchunks == 0) return (__wt_lsm_tree_readunlock(session, lsm_tree)); /* Search for a chunk to evict and/or a chunk to flush. */ |