summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@wiredtiger.com>2013-12-11 15:03:51 +1100
committerMichael Cahill <michael.cahill@wiredtiger.com>2013-12-11 15:03:51 +1100
commit7e6d11a4a5f687cabfec91b93deadb4080497733 (patch)
tree11bf50065b703260ee1858e86340f2eadbd35c5a
parent745b2bb142e5fb41af5a16c54fd6bf8e6706a836 (diff)
downloadmongo-7e6d11a4a5f687cabfec91b93deadb4080497733.tar.gz
LSM compaction review updates. Implement Sue's suggestions, be more aggressive about waking the merge threads, fix some merge pathology.
--HG-- rename : bench/wtperf/runners/medium-lsm-sleep.wtperf => bench/wtperf/runners/medium-lsm-compact.wtperf
-rw-r--r--bench/wtperf/runners/medium-lsm-compact.wtperf (renamed from bench/wtperf/runners/medium-lsm-sleep.wtperf)6
-rw-r--r--src/docs/compact.dox16
-rw-r--r--src/lsm/lsm_merge.c19
-rw-r--r--src/lsm/lsm_tree.c25
-rw-r--r--src/session/session_compact.c32
5 files changed, 47 insertions, 51 deletions
diff --git a/bench/wtperf/runners/medium-lsm-sleep.wtperf b/bench/wtperf/runners/medium-lsm-compact.wtperf
index 8b6e5a1f683..5393cdbfeba 100644
--- a/bench/wtperf/runners/medium-lsm-sleep.wtperf
+++ b/bench/wtperf/runners/medium-lsm-compact.wtperf
@@ -2,8 +2,8 @@
conn_config="cache_size=1G"
table_config="lsm=(chunk_size=100MB,merge_threads=2),type=lsm"
icount=50000000
-report_interval=5
-run_time=120
populate_threads=1
-threads=((count=16,read=1))
compact=true
+threads=((count=16,read=1))
+run_time=120
+report_interval=5
diff --git a/src/docs/compact.dox b/src/docs/compact.dox
index 634bb98d9b5..c68415b5324 100644
--- a/src/docs/compact.dox
+++ b/src/docs/compact.dox
@@ -1,13 +1,13 @@
/*! @page compaction Compaction
-The WT_SESSION::compact method can be used to compact a row- or column-store
-btree (log-structured merge trees cannot be compacted).
+The WT_SESSION::compact method can be used to compact btree and log-structured
+merge tree data sources.
-The data source does not have to be quiescent, compaction may be performed on
-a live data source.
-
-Because checkpoints named by the application are not discarded until
-explicitly removed or replaced, they may prevent WT_SESSION::compact
-from accomplishing anything.
+The data source does not have to be quiescent, compaction may be performed on a
+live data source.
+Because checkpoints named by the application are not discarded until explicitly
+removed or replaced, they may prevent WT_SESSION::compact from accomplishing
+anything.
+ *
*/
diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c
index 9337b1cb18b..f92b8610f80 100644
--- a/src/lsm/lsm_merge.c
+++ b/src/lsm/lsm_merge.c
@@ -72,15 +72,15 @@ __wt_lsm_merge(
start_id = 0;
/*
- * If the tree is open read-only, be very aggressive. Otherwise, we
- * can spend a long time waiting for merges to start in read-only
- * applications.
+ * If the tree is open read-only or we are compacting, be very
+ * aggressive. Otherwise, we can spend a long time waiting for merges
+ * to start in read-only applications.
*/
if (!lsm_tree->modified ||
F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING))
aggressive = 10;
merge_min = (aggressive > 5) ? 2 : lsm_tree->merge_min;
- max_generation_gap = aggressive > 10 ? 3 : 1;
+ max_generation_gap = 1 + aggressive / 5;
/*
* If there aren't any chunks to merge, or some of the chunks aren't
@@ -156,13 +156,16 @@ __wt_lsm_merge(
break;
/*
- * If we have enough chunks for a merge and the next chunk is
- * in a different generation, stop.
+ * In normal operation, if we have enough chunks for a merge
+ * and the next chunk is in a different generation, stop.
+ * In aggressive mode, look for the biggest merge we can do.
*/
if (nchunks >= merge_min) {
previous = lsm_tree->chunk[start_chunk];
- if (chunk->generation > previous->generation &&
- previous->generation <= youngest->generation + 1)
+ if (previous->generation <=
+ youngest->generation + max_generation_gap &&
+ chunk->generation >
+ previous->generation + max_generation_gap - 1)
break;
}
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index 3aa1b6419d7..0f400befe9d 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -932,8 +932,8 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name)
{
WT_DECL_RET;
WT_LSM_TREE *lsm_tree;
- struct timespec begin, end;
uint64_t last_merge_progressing;
+ time_t begin, end;
/* Ignore non LSM names. */
if (!WT_PREFIX_MATCH(name, "lsm:"))
@@ -941,19 +941,28 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name)
WT_RET(__wt_lsm_tree_get(session, name, 0, &lsm_tree));
- WT_RET(__wt_epoch(session, &begin));
+ if (!F_ISSET(S2C(session), WT_CONN_LSM_MERGE) ||
+ lsm_tree->merge_threads == 0)
+ WT_RET_MSG(session, EINVAL,
+ "LSM compaction requires active merge threads");
+
+ WT_RET(__wt_seconds(session, &begin));
F_SET(lsm_tree, WT_LSM_TREE_COMPACTING);
- /* Wait for merge activity to stop. */
+
+ /* Wake up the merge threads. */
+ WT_RET(__wt_cond_signal(session, lsm_tree->work_cond));
+
+ /* Now wait for merge activity to stop. */
do {
last_merge_progressing = lsm_tree->merge_progressing;
- __wt_sleep(10, 0);
- WT_RET(__wt_epoch(session, &end));
+ __wt_sleep(1, 0);
+ WT_RET(__wt_seconds(session, &end));
if (session->compact->max_time > 0 &&
- session->compact->max_time <
- WT_TIMEDIFF(end, begin) / WT_BILLION)
+ session->compact->max_time < (uint64_t)(end - begin))
WT_ERR(ETIMEDOUT);
- } while (lsm_tree->merge_progressing != last_merge_progressing);
+ } while (lsm_tree->merge_progressing != last_merge_progressing &&
+ lsm_tree->nchunks > 1);
err: F_CLR(lsm_tree, WT_LSM_TREE_COMPACTING);
diff --git a/src/session/session_compact.c b/src/session/session_compact.c
index 76b67119a05..d3f5be6b71b 100644
--- a/src/session/session_compact.c
+++ b/src/session/session_compact.c
@@ -118,7 +118,7 @@ __wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri)
/*
* __session_compact_check_timeout --
- *
+ * Check if the timeout has been exceeded.
*/
static int
__session_compact_check_timeout(
@@ -150,8 +150,8 @@ __compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
int i;
struct timespec start_time;
- wt_session = (WT_SESSION *)session;
txn = &session->txn;
+ wt_session = &session->iface;
/*
* File compaction requires checkpoints, which will fail in a
@@ -174,12 +174,10 @@ __compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
/*
* We compact 10% of the file on each pass, try 10 times (which is
* probably overkill), and quit if we make no progress. Check for a
- * timeout between each operation, to be as responsive to the user
- * as is practical.
+ * timeout each time through the loop.
*/
for (i = 0; i < 10; ++i) {
WT_ERR(wt_session->checkpoint(wt_session, t->data));
- WT_ERR(__session_compact_check_timeout(session, start_time));
session->compaction = 0;
WT_WITH_SCHEMA_LOCK(session,
@@ -188,10 +186,8 @@ __compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
WT_ERR(ret);
if (!session->compaction)
break;
- WT_ERR(__session_compact_check_timeout(session, start_time));
WT_ERR(wt_session->checkpoint(wt_session, t->data));
- WT_ERR(__session_compact_check_timeout(session, start_time));
WT_ERR(wt_session->checkpoint(wt_session, t->data));
WT_ERR(__session_compact_check_timeout(session, start_time));
}
@@ -214,32 +210,20 @@ __wt_session_compact(
WT_SESSION_IMPL *session;
session = (WT_SESSION_IMPL *)wt_session;
-
SESSION_API_CALL(session, compact, config, cfg);
/* Setup the structure in the session handle */
memset(&compact, 0, sizeof(WT_COMPACT));
session->compact = &compact;
- /*
- * Find what types of data sources are being compacted.
- */
- WT_ERR(__wt_schema_worker(
- session, uri, NULL, __wt_compact_uri_analyze, cfg, 0));
-
- /* We are done if there aren't any files we can compact. */
- if (session->compact->lsm_count == 0 &&
- session->compact->file_count == 0)
- goto err;
-
WT_ERR(__wt_config_gets(session, cfg, "timeout", &cval));
session->compact->max_time = (uint64_t)cval.val;
- /*
- * TODO: We can't hold the schema lock here - LSM acquires the
- * schema lock when completing merges. We probably do want to stop
- * "external" schema changes while we are compacting though.
- */
+ /* Find the types of data sources are being compacted. */
+ WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker(
+ session, uri, NULL, __wt_compact_uri_analyze, cfg, 0));
+ WT_ERR(ret);
+
if (session->compact->lsm_count != 0)
WT_ERR(__wt_schema_worker(
session, uri, NULL, __wt_lsm_compact, cfg, 0));