diff options
author | Alex Gorrod <alexg@wiredtiger.com> | 2014-06-16 12:11:54 +1000 |
---|---|---|
committer | Alex Gorrod <alexg@wiredtiger.com> | 2014-06-16 12:11:54 +1000 |
commit | 45c3d6cb8af80badcc1159ac31fbdb44f2608884 (patch) | |
tree | 5c05dbb6887e7def2bed8e1635ad9c8dec8c8693 | |
parent | 8da868bfc6dcca67e8cdbd62b6a525b47633e384 (diff) | |
parent | 589a6d43c5bc7670cae9915aa25dfa628819889e (diff) | |
download | mongo-45c3d6cb8af80badcc1159ac31fbdb44f2608884.tar.gz |
Merge pull request #1063 from wiredtiger/lsm-compact-improvements
LSM compact improvements
-rw-r--r-- | bench/wtperf/wtperf_opt.i | 3 | ||||
-rw-r--r-- | src/include/lsm.h | 11 | ||||
-rw-r--r-- | src/lsm/lsm_merge.c | 7 | ||||
-rw-r--r-- | src/lsm/lsm_tree.c | 44 | ||||
-rw-r--r-- | src/lsm/lsm_worker.c | 28 |
5 files changed, 65 insertions, 28 deletions
diff --git a/bench/wtperf/wtperf_opt.i b/bench/wtperf/wtperf_opt.i index f0fbb4532d4..de9a42be66e 100644 --- a/bench/wtperf/wtperf_opt.i +++ b/bench/wtperf/wtperf_opt.i @@ -130,7 +130,8 @@ DEF_OPT_AS_UINT32(stress_checkpoint_rate, 0, "checkpoint every rate operations during the populate phase, 0 to disable") DEF_OPT_AS_CONFIG_STRING(table_config, "key_format=S,value_format=S,type=lsm,exclusive=true," - "leaf_page_max=4kb,internal_page_max=64kb,allocation_size=4kb,", + "allocation_size=4kb,internal_page_max=64kb,leaf_page_max=4kb," + "split_pct=100", "table configuration string") DEF_OPT_AS_UINT32(table_count, 1, "number of tables to run operations over. Keys are divided evenly " diff --git a/src/include/lsm.h b/src/include/lsm.h index b5c3859605c..873a9872a51 100644 --- a/src/include/lsm.h +++ b/src/include/lsm.h @@ -105,6 +105,8 @@ struct __wt_lsm_tree { u_int merge_min, merge_max; u_int merge_threads; + u_int merge_idle; /* Count of idle merge threads */ + #define WT_LSM_BLOOM_MERGED 0x00000001 #define WT_LSM_BLOOM_OFF 0x00000002 #define WT_LSM_BLOOM_OLDEST 0x00000004 @@ -129,10 +131,11 @@ struct __wt_lsm_tree { u_int nold_chunks; /* Number of old chunks */ #define WT_LSM_TREE_COMPACTING 0x01 -#define WT_LSM_TREE_NEED_SWITCH 0x02 -#define WT_LSM_TREE_OPEN 0x04 -#define WT_LSM_TREE_THROTTLE 0x08 -#define WT_LSM_TREE_WORKING 0x10 +#define WT_LSM_TREE_FLUSH_ALL 0x02 +#define WT_LSM_TREE_NEED_SWITCH 0x04 +#define WT_LSM_TREE_OPEN 0x08 +#define WT_LSM_TREE_THROTTLE 0x10 +#define WT_LSM_TREE_WORKING 0x20 uint32_t flags; }; diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index 3e972eecdb5..841674a31ef 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -60,7 +60,7 @@ __wt_lsm_merge( WT_LSM_CHUNK *chunk, *previous, *youngest; uint32_t generation, max_gap, max_gen, max_level, start_id; uint64_t insert_count, record_count, chunk_size; - u_int dest_id, end_chunk, i, merge_min, nchunks, start_chunk; + u_int dest_id, end_chunk, i, merge_max, merge_min, nchunks, start_chunk; int create_bloom, tret; const char *cfg[3]; const char *drop_cfg[] = @@ -80,6 +80,7 @@ __wt_lsm_merge( if (!lsm_tree->modified || F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) aggressive = 10; + merge_max = (aggressive > 5) ? 100 : lsm_tree->merge_min; merge_min = (aggressive > 5) ? 2 : lsm_tree->merge_min; max_gap = (aggressive + 4) / 5; max_level = (lsm_tree->merge_throttle > 0) ? 0 : id + aggressive; @@ -191,7 +192,7 @@ __wt_lsm_merge( * If we have a full window, or the merge would be too big, * remove the youngest chunk. */ - if (nchunks == lsm_tree->merge_max || + if (nchunks == merge_max || chunk_size > lsm_tree->chunk_max) { WT_ASSERT(session, F_ISSET(youngest, WT_LSM_CHUNK_MERGING)); @@ -203,7 +204,7 @@ __wt_lsm_merge( } nchunks = (end_chunk + 1) - start_chunk; - WT_ASSERT(session, nchunks <= lsm_tree->merge_max); + WT_ASSERT(session, nchunks <= merge_max); if (nchunks > 0) { WT_ASSERT(session, start_chunk + nchunks <= lsm_tree->nchunks); diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 1ab36ba937e..710b5695d99 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -1008,9 +1008,7 @@ __wt_lsm_tree_unlock( int __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) { - WT_DECL_RET; WT_LSM_TREE *lsm_tree; - uint64_t last_merge_progressing; time_t begin, end; /* @@ -1033,12 +1031,30 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) /* * If another thread started compacting this tree, we're done. */ - if (F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) + if (F_ISSET(lsm_tree, WT_LSM_TREE_FLUSH_ALL | WT_LSM_TREE_COMPACTING)) return (0); WT_RET(__wt_seconds(session, &begin)); /* + * Set the flush all flag so that the checkpoint worker tries to write + * all in-memory chunks to disk. + */ + F_SET(lsm_tree, WT_LSM_TREE_FLUSH_ALL); + + /* Wait for flushing to stop. */ + while (F_ISSET(lsm_tree, WT_LSM_TREE_FLUSH_ALL) && + F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) { + __wt_sleep(1, 0); + WT_RET(__wt_seconds(session, &end)); + if (session->compact->max_time > 0 && + session->compact->max_time < (uint64_t)(end - begin)) { + F_CLR(lsm_tree, WT_LSM_TREE_FLUSH_ALL); + return (ETIMEDOUT); + } + } + + /* * Set the compacting flag and clear the current merge throttle * setting, so that all merge threads look for merges at all levels of * the tree. @@ -1049,23 +1065,19 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip) /* Wake up the merge threads. */ WT_RET(__wt_cond_signal(session, lsm_tree->work_cond)); - /* Allow some time for merges to get started. */ - __wt_sleep(10, 0); - - /* Now wait for merge activity to stop. */ - do { - last_merge_progressing = lsm_tree->merge_progressing; + /* Wait for merge activity to stop. */ + while (F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING) && + F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) { __wt_sleep(1, 0); WT_RET(__wt_seconds(session, &end)); if (session->compact->max_time > 0 && - session->compact->max_time < (uint64_t)(end - begin)) - WT_ERR(ETIMEDOUT); - } while (lsm_tree->merge_progressing != last_merge_progressing && - lsm_tree->nchunks > 1); - -err: F_CLR(lsm_tree, WT_LSM_TREE_COMPACTING); + session->compact->max_time < (uint64_t)(end - begin)) { + F_CLR(lsm_tree, WT_LSM_TREE_COMPACTING); + return (ETIMEDOUT); + } + } - return (ret); + return (0); } /* diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index 49bf902ad7c..c6bfebeb2d1 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -146,9 +146,17 @@ __wt_lsm_merge_worker(void *vargs) stallms = 0; else if (F_ISSET(lsm_tree, WT_LSM_TREE_WORKING) && !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) { + if (WT_ATOMIC_ADD(lsm_tree->merge_idle, 1) == + lsm_tree->merge_threads && + F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) + F_CLR(lsm_tree, WT_LSM_TREE_COMPACTING); + /* Poll 10 times per second. */ WT_ERR_TIMEDOUT_OK(__wt_cond_wait( session, lsm_tree->work_cond, 100000)); + + (void)WT_ATOMIC_SUB(lsm_tree->merge_idle, 1); + /* * Randomize the tracking of stall time so that with * multiple LSM trees open, they don't all get @@ -260,21 +268,30 @@ __wt_lsm_checkpoint_worker(void *arg) while (F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) { if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) { - WT_WITH_SCHEMA_LOCK(session, ret = - __wt_lsm_tree_switch(session, lsm_tree)); + WT_WITH_SCHEMA_LOCK(session, + ret = __wt_lsm_tree_switch(session, lsm_tree)); WT_ERR(ret); } WT_ERR(__lsm_copy_chunks(session, lsm_tree, &cookie, 0)); /* Write checkpoints in all completed files. */ - for (i = 0, j = 0; i < cookie.nchunks - 1; i++) { + for (i = 0, j = 0; i < cookie.nchunks; i++) { if (!F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) goto err; if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) break; + /* + * Normally, we ignore the latest chunk in the tree + * unless we are flushing all chunks before a compact + * operation. + */ + if (i == cookie.nchunks - 1 && + !F_ISSET(lsm_tree, WT_LSM_TREE_FLUSH_ALL)) + break; + chunk = cookie.chunk_array[i]; /* Stop if a running transaction needs the chunk. */ @@ -402,9 +419,12 @@ __wt_lsm_checkpoint_worker(void *arg) } __lsm_unpin_chunks(session, &cookie); if (j == 0 && F_ISSET(lsm_tree, WT_LSM_TREE_WORKING) && - !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) + !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) { + if (F_ISSET(lsm_tree, WT_LSM_TREE_FLUSH_ALL)) + F_CLR(lsm_tree, WT_LSM_TREE_FLUSH_ALL); WT_ERR_TIMEDOUT_OK(__wt_cond_wait( session, lsm_tree->work_cond, 100000)); + } } err: __lsm_unpin_chunks(session, &cookie); |