diff options
author | Michael Cahill <michael.cahill@wiredtiger.com> | 2013-12-11 15:03:51 +1100 |
---|---|---|
committer | Michael Cahill <michael.cahill@wiredtiger.com> | 2013-12-11 15:03:51 +1100 |
commit | 7e6d11a4a5f687cabfec91b93deadb4080497733 (patch) | |
tree | 11bf50065b703260ee1858e86340f2eadbd35c5a | |
parent | 745b2bb142e5fb41af5a16c54fd6bf8e6706a836 (diff) | |
download | mongo-7e6d11a4a5f687cabfec91b93deadb4080497733.tar.gz |
LSM compaction review updates. Implement Sue's suggestions, be more aggressive about waking the merge threads, fix some merge pathology.
--HG--
rename : bench/wtperf/runners/medium-lsm-sleep.wtperf => bench/wtperf/runners/medium-lsm-compact.wtperf
-rw-r--r-- | bench/wtperf/runners/medium-lsm-compact.wtperf (renamed from bench/wtperf/runners/medium-lsm-sleep.wtperf) | 6 | ||||
-rw-r--r-- | src/docs/compact.dox | 16 | ||||
-rw-r--r-- | src/lsm/lsm_merge.c | 19 | ||||
-rw-r--r-- | src/lsm/lsm_tree.c | 25 | ||||
-rw-r--r-- | src/session/session_compact.c | 32 |
5 files changed, 47 insertions, 51 deletions
diff --git a/bench/wtperf/runners/medium-lsm-sleep.wtperf b/bench/wtperf/runners/medium-lsm-compact.wtperf index 8b6e5a1f683..5393cdbfeba 100644 --- a/bench/wtperf/runners/medium-lsm-sleep.wtperf +++ b/bench/wtperf/runners/medium-lsm-compact.wtperf @@ -2,8 +2,8 @@ conn_config="cache_size=1G" table_config="lsm=(chunk_size=100MB,merge_threads=2),type=lsm" icount=50000000 -report_interval=5 -run_time=120 populate_threads=1 -threads=((count=16,read=1)) compact=true +threads=((count=16,read=1)) +run_time=120 +report_interval=5 diff --git a/src/docs/compact.dox b/src/docs/compact.dox index 634bb98d9b5..c68415b5324 100644 --- a/src/docs/compact.dox +++ b/src/docs/compact.dox @@ -1,13 +1,13 @@ /*! @page compaction Compaction -The WT_SESSION::compact method can be used to compact a row- or column-store -btree (log-structured merge trees cannot be compacted). +The WT_SESSION::compact method can be used to compact btree and log-structured +merge tree data sources. -The data source does not have to be quiescent, compaction may be performed on -a live data source. - -Because checkpoints named by the application are not discarded until -explicitly removed or replaced, they may prevent WT_SESSION::compact -from accomplishing anything. +The data source does not have to be quiescent, compaction may be performed on a +live data source. +Because checkpoints named by the application are not discarded until explicitly +removed or replaced, they may prevent WT_SESSION::compact from accomplishing +anything. + * */ diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index 9337b1cb18b..f92b8610f80 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -72,15 +72,15 @@ __wt_lsm_merge( start_id = 0; /* - * If the tree is open read-only, be very aggressive. Otherwise, we - * can spend a long time waiting for merges to start in read-only - * applications. + * If the tree is open read-only or we are compacting, be very + * aggressive. Otherwise, we can spend a long time waiting for merges + * to start in read-only applications. */ if (!lsm_tree->modified || F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) aggressive = 10; merge_min = (aggressive > 5) ? 2 : lsm_tree->merge_min; - max_generation_gap = aggressive > 10 ? 3 : 1; + max_generation_gap = 1 + aggressive / 5; /* * If there aren't any chunks to merge, or some of the chunks aren't @@ -156,13 +156,16 @@ __wt_lsm_merge( break; /* - * If we have enough chunks for a merge and the next chunk is - * in a different generation, stop. + * In normal operation, if we have enough chunks for a merge + * and the next chunk is in a different generation, stop. + * In aggressive mode, look for the biggest merge we can do. */ if (nchunks >= merge_min) { previous = lsm_tree->chunk[start_chunk]; - if (chunk->generation > previous->generation && - previous->generation <= youngest->generation + 1) + if (previous->generation <= + youngest->generation + max_generation_gap && + chunk->generation > + previous->generation + max_generation_gap - 1) break; } diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c index 3aa1b6419d7..0f400befe9d 100644 --- a/src/lsm/lsm_tree.c +++ b/src/lsm/lsm_tree.c @@ -932,8 +932,8 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name) { WT_DECL_RET; WT_LSM_TREE *lsm_tree; - struct timespec begin, end; uint64_t last_merge_progressing; + time_t begin, end; /* Ignore non LSM names. */ if (!WT_PREFIX_MATCH(name, "lsm:")) @@ -941,19 +941,28 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name) WT_RET(__wt_lsm_tree_get(session, name, 0, &lsm_tree)); - WT_RET(__wt_epoch(session, &begin)); + if (!F_ISSET(S2C(session), WT_CONN_LSM_MERGE) || + lsm_tree->merge_threads == 0) + WT_RET_MSG(session, EINVAL, + "LSM compaction requires active merge threads"); + + WT_RET(__wt_seconds(session, &begin)); F_SET(lsm_tree, WT_LSM_TREE_COMPACTING); - /* Wait for merge activity to stop. */ + + /* Wake up the merge threads. */ + WT_RET(__wt_cond_signal(session, lsm_tree->work_cond)); + + /* Now wait for merge activity to stop. */ do { last_merge_progressing = lsm_tree->merge_progressing; - __wt_sleep(10, 0); - WT_RET(__wt_epoch(session, &end)); + __wt_sleep(1, 0); + WT_RET(__wt_seconds(session, &end)); if (session->compact->max_time > 0 && - session->compact->max_time < - WT_TIMEDIFF(end, begin) / WT_BILLION) + session->compact->max_time < (uint64_t)(end - begin)) WT_ERR(ETIMEDOUT); - } while (lsm_tree->merge_progressing != last_merge_progressing); + } while (lsm_tree->merge_progressing != last_merge_progressing && + lsm_tree->nchunks > 1); err: F_CLR(lsm_tree, WT_LSM_TREE_COMPACTING); diff --git a/src/session/session_compact.c b/src/session/session_compact.c index 76b67119a05..d3f5be6b71b 100644 --- a/src/session/session_compact.c +++ b/src/session/session_compact.c @@ -118,7 +118,7 @@ __wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri) /* * __session_compact_check_timeout -- - * + * Check if the timeout has been exceeded. */ static int __session_compact_check_timeout( @@ -150,8 +150,8 @@ __compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) int i; struct timespec start_time; - wt_session = (WT_SESSION *)session; txn = &session->txn; + wt_session = &session->iface; /* * File compaction requires checkpoints, which will fail in a @@ -174,12 +174,10 @@ __compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) /* * We compact 10% of the file on each pass, try 10 times (which is * probably overkill), and quit if we make no progress. Check for a - * timeout between each operation, to be as responsive to the user - * as is practical. + * timeout each time through the loop. */ for (i = 0; i < 10; ++i) { WT_ERR(wt_session->checkpoint(wt_session, t->data)); - WT_ERR(__session_compact_check_timeout(session, start_time)); session->compaction = 0; WT_WITH_SCHEMA_LOCK(session, @@ -188,10 +186,8 @@ __compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[]) WT_ERR(ret); if (!session->compaction) break; - WT_ERR(__session_compact_check_timeout(session, start_time)); WT_ERR(wt_session->checkpoint(wt_session, t->data)); - WT_ERR(__session_compact_check_timeout(session, start_time)); WT_ERR(wt_session->checkpoint(wt_session, t->data)); WT_ERR(__session_compact_check_timeout(session, start_time)); } @@ -214,32 +210,20 @@ __wt_session_compact( WT_SESSION_IMPL *session; session = (WT_SESSION_IMPL *)wt_session; - SESSION_API_CALL(session, compact, config, cfg); /* Setup the structure in the session handle */ memset(&compact, 0, sizeof(WT_COMPACT)); session->compact = &compact; - /* - * Find what types of data sources are being compacted. - */ - WT_ERR(__wt_schema_worker( - session, uri, NULL, __wt_compact_uri_analyze, cfg, 0)); - - /* We are done if there aren't any files we can compact. */ - if (session->compact->lsm_count == 0 && - session->compact->file_count == 0) - goto err; - WT_ERR(__wt_config_gets(session, cfg, "timeout", &cval)); session->compact->max_time = (uint64_t)cval.val; - /* - * TODO: We can't hold the schema lock here - LSM acquires the - * schema lock when completing merges. We probably do want to stop - * "external" schema changes while we are compacting though. - */ + /* Find the types of data sources are being compacted. */ + WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker( + session, uri, NULL, __wt_compact_uri_analyze, cfg, 0)); + WT_ERR(ret); + if (session->compact->lsm_count != 0) WT_ERR(__wt_schema_worker( session, uri, NULL, __wt_lsm_compact, cfg, 0)); |