summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexg@wiredtiger.com>2014-06-16 12:11:54 +1000
committerAlex Gorrod <alexg@wiredtiger.com>2014-06-16 12:11:54 +1000
commit45c3d6cb8af80badcc1159ac31fbdb44f2608884 (patch)
tree5c05dbb6887e7def2bed8e1635ad9c8dec8c8693
parent8da868bfc6dcca67e8cdbd62b6a525b47633e384 (diff)
parent589a6d43c5bc7670cae9915aa25dfa628819889e (diff)
downloadmongo-45c3d6cb8af80badcc1159ac31fbdb44f2608884.tar.gz
Merge pull request #1063 from wiredtiger/lsm-compact-improvements
LSM compact improvements
-rw-r--r--bench/wtperf/wtperf_opt.i3
-rw-r--r--src/include/lsm.h11
-rw-r--r--src/lsm/lsm_merge.c7
-rw-r--r--src/lsm/lsm_tree.c44
-rw-r--r--src/lsm/lsm_worker.c28
5 files changed, 65 insertions, 28 deletions
diff --git a/bench/wtperf/wtperf_opt.i b/bench/wtperf/wtperf_opt.i
index f0fbb4532d4..de9a42be66e 100644
--- a/bench/wtperf/wtperf_opt.i
+++ b/bench/wtperf/wtperf_opt.i
@@ -130,7 +130,8 @@ DEF_OPT_AS_UINT32(stress_checkpoint_rate, 0,
"checkpoint every rate operations during the populate phase, 0 to disable")
DEF_OPT_AS_CONFIG_STRING(table_config,
"key_format=S,value_format=S,type=lsm,exclusive=true,"
- "leaf_page_max=4kb,internal_page_max=64kb,allocation_size=4kb,",
+ "allocation_size=4kb,internal_page_max=64kb,leaf_page_max=4kb,"
+ "split_pct=100",
"table configuration string")
DEF_OPT_AS_UINT32(table_count, 1,
"number of tables to run operations over. Keys are divided evenly "
diff --git a/src/include/lsm.h b/src/include/lsm.h
index b5c3859605c..873a9872a51 100644
--- a/src/include/lsm.h
+++ b/src/include/lsm.h
@@ -105,6 +105,8 @@ struct __wt_lsm_tree {
u_int merge_min, merge_max;
u_int merge_threads;
+ u_int merge_idle; /* Count of idle merge threads */
+
#define WT_LSM_BLOOM_MERGED 0x00000001
#define WT_LSM_BLOOM_OFF 0x00000002
#define WT_LSM_BLOOM_OLDEST 0x00000004
@@ -129,10 +131,11 @@ struct __wt_lsm_tree {
u_int nold_chunks; /* Number of old chunks */
#define WT_LSM_TREE_COMPACTING 0x01
-#define WT_LSM_TREE_NEED_SWITCH 0x02
-#define WT_LSM_TREE_OPEN 0x04
-#define WT_LSM_TREE_THROTTLE 0x08
-#define WT_LSM_TREE_WORKING 0x10
+#define WT_LSM_TREE_FLUSH_ALL 0x02
+#define WT_LSM_TREE_NEED_SWITCH 0x04
+#define WT_LSM_TREE_OPEN 0x08
+#define WT_LSM_TREE_THROTTLE 0x10
+#define WT_LSM_TREE_WORKING 0x20
uint32_t flags;
};
diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c
index 3e972eecdb5..841674a31ef 100644
--- a/src/lsm/lsm_merge.c
+++ b/src/lsm/lsm_merge.c
@@ -60,7 +60,7 @@ __wt_lsm_merge(
WT_LSM_CHUNK *chunk, *previous, *youngest;
uint32_t generation, max_gap, max_gen, max_level, start_id;
uint64_t insert_count, record_count, chunk_size;
- u_int dest_id, end_chunk, i, merge_min, nchunks, start_chunk;
+ u_int dest_id, end_chunk, i, merge_max, merge_min, nchunks, start_chunk;
int create_bloom, tret;
const char *cfg[3];
const char *drop_cfg[] =
@@ -80,6 +80,7 @@ __wt_lsm_merge(
if (!lsm_tree->modified ||
F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING))
aggressive = 10;
+ merge_max = (aggressive > 5) ? 100 : lsm_tree->merge_min;
merge_min = (aggressive > 5) ? 2 : lsm_tree->merge_min;
max_gap = (aggressive + 4) / 5;
max_level = (lsm_tree->merge_throttle > 0) ? 0 : id + aggressive;
@@ -191,7 +192,7 @@ __wt_lsm_merge(
* If we have a full window, or the merge would be too big,
* remove the youngest chunk.
*/
- if (nchunks == lsm_tree->merge_max ||
+ if (nchunks == merge_max ||
chunk_size > lsm_tree->chunk_max) {
WT_ASSERT(session,
F_ISSET(youngest, WT_LSM_CHUNK_MERGING));
@@ -203,7 +204,7 @@ __wt_lsm_merge(
}
nchunks = (end_chunk + 1) - start_chunk;
- WT_ASSERT(session, nchunks <= lsm_tree->merge_max);
+ WT_ASSERT(session, nchunks <= merge_max);
if (nchunks > 0) {
WT_ASSERT(session, start_chunk + nchunks <= lsm_tree->nchunks);
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index 1ab36ba937e..710b5695d99 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -1008,9 +1008,7 @@ __wt_lsm_tree_unlock(
int
__wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip)
{
- WT_DECL_RET;
WT_LSM_TREE *lsm_tree;
- uint64_t last_merge_progressing;
time_t begin, end;
/*
@@ -1033,12 +1031,30 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip)
/*
* If another thread started compacting this tree, we're done.
*/
- if (F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING))
+ if (F_ISSET(lsm_tree, WT_LSM_TREE_FLUSH_ALL | WT_LSM_TREE_COMPACTING))
return (0);
WT_RET(__wt_seconds(session, &begin));
/*
+ * Set the flush all flag so that the checkpoint worker tries to write
+ * all in-memory chunks to disk.
+ */
+ F_SET(lsm_tree, WT_LSM_TREE_FLUSH_ALL);
+
+ /* Wait for flushing to stop. */
+ while (F_ISSET(lsm_tree, WT_LSM_TREE_FLUSH_ALL) &&
+ F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) {
+ __wt_sleep(1, 0);
+ WT_RET(__wt_seconds(session, &end));
+ if (session->compact->max_time > 0 &&
+ session->compact->max_time < (uint64_t)(end - begin)) {
+ F_CLR(lsm_tree, WT_LSM_TREE_FLUSH_ALL);
+ return (ETIMEDOUT);
+ }
+ }
+
+ /*
* Set the compacting flag and clear the current merge throttle
* setting, so that all merge threads look for merges at all levels of
* the tree.
@@ -1049,23 +1065,19 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip)
/* Wake up the merge threads. */
WT_RET(__wt_cond_signal(session, lsm_tree->work_cond));
- /* Allow some time for merges to get started. */
- __wt_sleep(10, 0);
-
- /* Now wait for merge activity to stop. */
- do {
- last_merge_progressing = lsm_tree->merge_progressing;
+ /* Wait for merge activity to stop. */
+ while (F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING) &&
+ F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) {
__wt_sleep(1, 0);
WT_RET(__wt_seconds(session, &end));
if (session->compact->max_time > 0 &&
- session->compact->max_time < (uint64_t)(end - begin))
- WT_ERR(ETIMEDOUT);
- } while (lsm_tree->merge_progressing != last_merge_progressing &&
- lsm_tree->nchunks > 1);
-
-err: F_CLR(lsm_tree, WT_LSM_TREE_COMPACTING);
+ session->compact->max_time < (uint64_t)(end - begin)) {
+ F_CLR(lsm_tree, WT_LSM_TREE_COMPACTING);
+ return (ETIMEDOUT);
+ }
+ }
- return (ret);
+ return (0);
}
/*
diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c
index 49bf902ad7c..c6bfebeb2d1 100644
--- a/src/lsm/lsm_worker.c
+++ b/src/lsm/lsm_worker.c
@@ -146,9 +146,17 @@ __wt_lsm_merge_worker(void *vargs)
stallms = 0;
else if (F_ISSET(lsm_tree, WT_LSM_TREE_WORKING) &&
!F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) {
+ if (WT_ATOMIC_ADD(lsm_tree->merge_idle, 1) ==
+ lsm_tree->merge_threads &&
+ F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING))
+ F_CLR(lsm_tree, WT_LSM_TREE_COMPACTING);
+
/* Poll 10 times per second. */
WT_ERR_TIMEDOUT_OK(__wt_cond_wait(
session, lsm_tree->work_cond, 100000));
+
+ (void)WT_ATOMIC_SUB(lsm_tree->merge_idle, 1);
+
/*
* Randomize the tracking of stall time so that with
* multiple LSM trees open, they don't all get
@@ -260,21 +268,30 @@ __wt_lsm_checkpoint_worker(void *arg)
while (F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) {
if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) {
- WT_WITH_SCHEMA_LOCK(session, ret =
- __wt_lsm_tree_switch(session, lsm_tree));
+ WT_WITH_SCHEMA_LOCK(session,
+ ret = __wt_lsm_tree_switch(session, lsm_tree));
WT_ERR(ret);
}
WT_ERR(__lsm_copy_chunks(session, lsm_tree, &cookie, 0));
/* Write checkpoints in all completed files. */
- for (i = 0, j = 0; i < cookie.nchunks - 1; i++) {
+ for (i = 0, j = 0; i < cookie.nchunks; i++) {
if (!F_ISSET(lsm_tree, WT_LSM_TREE_WORKING))
goto err;
if (F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH))
break;
+ /*
+ * Normally, we ignore the latest chunk in the tree
+ * unless we are flushing all chunks before a compact
+ * operation.
+ */
+ if (i == cookie.nchunks - 1 &&
+ !F_ISSET(lsm_tree, WT_LSM_TREE_FLUSH_ALL))
+ break;
+
chunk = cookie.chunk_array[i];
/* Stop if a running transaction needs the chunk. */
@@ -402,9 +419,12 @@ __wt_lsm_checkpoint_worker(void *arg)
}
__lsm_unpin_chunks(session, &cookie);
if (j == 0 && F_ISSET(lsm_tree, WT_LSM_TREE_WORKING) &&
- !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH))
+ !F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH)) {
+ if (F_ISSET(lsm_tree, WT_LSM_TREE_FLUSH_ALL))
+ F_CLR(lsm_tree, WT_LSM_TREE_FLUSH_ALL);
WT_ERR_TIMEDOUT_OK(__wt_cond_wait(
session, lsm_tree->work_cond, 100000));
+ }
}
err: __lsm_unpin_chunks(session, &cookie);