summaryrefslogtreecommitdiff
path: root/src/lsm
diff options
context:
space:
mode:
authorAlex Gorrod <alexander.gorrod@mongodb.com>2015-06-04 16:29:12 +1000
committerAlex Gorrod <alexander.gorrod@mongodb.com>2015-06-04 16:29:12 +1000
commitf1965102d783ddb1ae9335a632f0814040cde578 (patch)
treeb541adb69121504288084932434b708c2a7f728a /src/lsm
parente80c7b2d41f06328690bebec88dd9f08cb89e549 (diff)
parent2dc9489396988828831e5614a68f2ceb644e43e9 (diff)
downloadmongo-f1965102d783ddb1ae9335a632f0814040cde578.tar.gz
Merge pull request #2010 from wiredtiger/lsm-merge-aggressive
Lsm merge aggressive
Diffstat (limited to 'src/lsm')
-rw-r--r--src/lsm/lsm_manager.c46
-rw-r--r--src/lsm/lsm_merge.c156
-rw-r--r--src/lsm/lsm_work_unit.c1
-rw-r--r--src/lsm/lsm_worker.c2
4 files changed, 143 insertions, 62 deletions
diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c
index 0d3ce5da2d8..cb078d991d8 100644
--- a/src/lsm/lsm_manager.c
+++ b/src/lsm/lsm_manager.c
@@ -8,7 +8,6 @@
#include "wt_internal.h"
-static int __lsm_manager_aggressive_update(WT_SESSION_IMPL *, WT_LSM_TREE *);
static int __lsm_manager_run_server(WT_SESSION_IMPL *);
static WT_THREAD_RET __lsm_worker_manager(void *);
@@ -339,43 +338,6 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session)
}
/*
- * __lsm_manager_aggressive_update --
- * Update the merge aggressiveness for a single LSM tree.
- */
-static int
-__lsm_manager_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
-{
- struct timespec now;
- uint64_t chunk_wait, stallms;
- u_int new_aggressive;
-
- WT_RET(__wt_epoch(session, &now));
- stallms = WT_TIMEDIFF(now, lsm_tree->last_flush_ts) / WT_MILLION;
- /*
- * Get aggressive if more than enough chunks for a merge should have
- * been created by now. Use 10 seconds as a default if we don't have an
- * estimate.
- */
- if (lsm_tree->nchunks > 1)
- chunk_wait = stallms / (lsm_tree->chunk_fill_ms == 0 ?
- 10000 : lsm_tree->chunk_fill_ms);
- else
- chunk_wait = 0;
- new_aggressive = (u_int)(chunk_wait / lsm_tree->merge_min);
-
- if (new_aggressive > lsm_tree->merge_aggressiveness) {
- WT_RET(__wt_verbose(session, WT_VERB_LSM,
- "LSM merge %s got aggressive (old %u new %u), "
- "merge_min %d, %u / %" PRIu64,
- lsm_tree->name, lsm_tree->merge_aggressiveness,
- new_aggressive, lsm_tree->merge_min, stallms,
- lsm_tree->chunk_fill_ms));
- lsm_tree->merge_aggressiveness = new_aggressive;
- }
- return (0);
-}
-
-/*
* __lsm_manager_worker_shutdown --
* Shutdown the LSM manager and worker threads.
*/
@@ -428,8 +390,6 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) {
if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE))
continue;
- WT_ERR(__lsm_manager_aggressive_update(
- session, lsm_tree));
WT_ERR(__wt_epoch(session, &now));
pushms = lsm_tree->work_push_ts.tv_sec == 0 ? 0 :
WT_TIMEDIFF(
@@ -458,7 +418,8 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
lsm_tree->nchunks > 1) ||
(lsm_tree->queue_ref == 0 &&
lsm_tree->nchunks > 1) ||
- (lsm_tree->merge_aggressiveness > 3 &&
+ (lsm_tree->merge_aggressiveness >
+ WT_LSM_AGGRESSIVE_THRESHOLD &&
!F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) ||
pushms > fillms) {
WT_ERR(__wt_lsm_manager_push_entry(
@@ -469,7 +430,8 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
session, WT_LSM_WORK_FLUSH, 0, lsm_tree));
WT_ERR(__wt_lsm_manager_push_entry(
session, WT_LSM_WORK_BLOOM, 0, lsm_tree));
- WT_ERR(__wt_verbose(session, WT_VERB_LSM,
+ WT_ERR(__wt_verbose(session,
+ WT_VERB_LSM_MANAGER,
"MGR %s: queue %d mod %d nchunks %d"
" flags 0x%x aggressive %d pushms %" PRIu64
" fillms %" PRIu64,
diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c
index d75f3b0619b..d7e684b8f51 100644
--- a/src/lsm/lsm_merge.c
+++ b/src/lsm/lsm_merge.c
@@ -40,6 +40,101 @@ __wt_lsm_merge_update_tree(WT_SESSION_IMPL *session,
}
/*
+ * __lsm_merge_aggressive_clear --
+ * We found a merge to do - clear the aggressive timer.
+ */
+static int
+__lsm_merge_aggressive_clear(WT_LSM_TREE *lsm_tree)
+{
+ F_CLR(lsm_tree, WT_LSM_TREE_AGGRESSIVE_TIMER);
+ lsm_tree->merge_aggressiveness = 0;
+ return (0);
+}
+
+/*
+ * __lsm_merge_aggressive_update --
+ * Update the merge aggressiveness for an LSM tree.
+ */
+static int
+__lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
+{
+ struct timespec now;
+ uint64_t msec_since_last_merge, msec_to_create_merge;
+ u_int new_aggressive;
+
+ new_aggressive = 0;
+
+ /*
+ * If the tree is open read-only or we are compacting, be very
+ * aggressive. Otherwise, we can spend a long time waiting for merges
+ * to start in read-only applications.
+ */
+ if (!lsm_tree->modified ||
+ F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) {
+ lsm_tree->merge_aggressiveness = 10;
+ return (0);
+ }
+
+ /*
+ * Only get aggressive if a reasonable number of flushes have been
+ * completed since opening the tree.
+ */
+ if (lsm_tree->chunks_flushed <= lsm_tree->merge_min)
+ return (__lsm_merge_aggressive_clear(lsm_tree));
+
+ /*
+ * Start the timer if it isn't running. Use a flag to define whether
+ * the timer is running - since clearing and checking a special
+ * timer value isn't simple.
+ */
+ if (!F_ISSET(lsm_tree, WT_LSM_TREE_AGGRESSIVE_TIMER)) {
+ F_SET(lsm_tree, WT_LSM_TREE_AGGRESSIVE_TIMER);
+ return (__wt_epoch(session, &lsm_tree->merge_aggressive_ts));
+ }
+
+ WT_RET(__wt_epoch(session, &now));
+ msec_since_last_merge =
+ WT_TIMEDIFF(now, lsm_tree->merge_aggressive_ts) / WT_MILLION;
+
+ /*
+ * If there is no estimate for how long it's taking to fill chunks
+ * pick 10 seconds.
+ */
+ msec_to_create_merge = lsm_tree->merge_min *
+ (lsm_tree->chunk_fill_ms == 0 ? 10000 : lsm_tree->chunk_fill_ms);
+
+ /*
+ * Don't consider getting aggressive until enough time has passed that
+ * we should have created enough chunks to trigger a new merge. We
+ * track average chunk-creation time - hence the "should"; the average
+ * fill time may not reflect the actual state if an application
+ * generates a variable load.
+ */
+ if (msec_since_last_merge < msec_to_create_merge)
+ return (0);
+
+ /*
+ * Bump how aggressively we look for merges based on how long since
+ * the last merge complete. The aggressive setting only increases
+ * slowly - triggering merges across generations of chunks isn't
+ * an efficient use of resources.
+ */
+ while ((msec_since_last_merge /= msec_to_create_merge) > 1)
+ ++new_aggressive;
+
+ if (new_aggressive > lsm_tree->merge_aggressiveness) {
+ WT_RET(__wt_verbose(session, WT_VERB_LSM,
+ "LSM merge %s got aggressive (old %u new %u), "
+ "merge_min %d, %u / %" PRIu64,
+ lsm_tree->name, lsm_tree->merge_aggressiveness,
+ new_aggressive, lsm_tree->merge_min,
+ msec_since_last_merge, lsm_tree->chunk_fill_ms));
+ lsm_tree->merge_aggressiveness = new_aggressive;
+ }
+ return (0);
+}
+
+/*
* __lsm_merge_span --
* Figure out the best span of chunks to merge. Return an error if
* there is no need to do any merges. Called with the LSM tree
@@ -53,6 +148,7 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
uint32_t aggressive, max_gap, max_gen, max_level;
uint64_t record_count, chunk_size;
u_int end_chunk, i, merge_max, merge_min, nchunks, start_chunk;
+ u_int oldest_gen, youngest_gen;
chunk_size = 0;
nchunks = 0;
@@ -64,18 +160,9 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
*end = 0;
*records = 0;
- /*
- * If the tree is open read-only or we are compacting, be very
- * aggressive. Otherwise, we can spend a long time waiting for merges
- * to start in read-only applications.
- */
- if (!lsm_tree->modified ||
- F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING))
- lsm_tree->merge_aggressiveness = 10;
-
aggressive = lsm_tree->merge_aggressiveness;
merge_max = (aggressive > WT_LSM_AGGRESSIVE_THRESHOLD) ?
- 100 : lsm_tree->merge_min;
+ 100 : lsm_tree->merge_max;
merge_min = (aggressive > WT_LSM_AGGRESSIVE_THRESHOLD) ?
2 : lsm_tree->merge_min;
max_gap = (aggressive + 4) / 5;
@@ -127,6 +214,8 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
* with the most recent set of chunks and work backwards until going
* further becomes significantly less efficient.
*/
+retry_find:
+ oldest_gen = youngest_gen = lsm_tree->chunk[end_chunk]->generation;
for (start_chunk = end_chunk + 1, record_count = 0;
start_chunk > 0; ) {
chunk = lsm_tree->chunk[start_chunk - 1];
@@ -159,6 +248,15 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
chunk_size - youngest->size > lsm_tree->chunk_max))
break;
+ /* Track chunk generations seen while looking for a merge */
+ if (chunk->generation < youngest_gen)
+ youngest_gen = chunk->generation;
+ else if (chunk->generation > oldest_gen)
+ oldest_gen = chunk->generation;
+
+ if (oldest_gen - youngest_gen > max_gap)
+ break;
+
/*
* If we have enough chunks for a merge and the next chunk is
* in too high a generation, stop.
@@ -176,18 +274,23 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
--start_chunk;
/*
- * If we have a full window, or the merge would be too big,
- * remove the youngest chunk.
+ * If the merge would be too big, or we have a full window
+ * and we could include an older chunk if the window wasn't
+ * full, remove the youngest chunk.
*/
- if (nchunks == merge_max ||
- chunk_size > lsm_tree->chunk_max) {
+ if (chunk_size > lsm_tree->chunk_max ||
+ (nchunks == merge_max && start_chunk > 0 &&
+ chunk->generation ==
+ lsm_tree->chunk[start_chunk - 1]->generation)) {
WT_ASSERT(session,
F_ISSET(youngest, WT_LSM_CHUNK_MERGING));
F_CLR(youngest, WT_LSM_CHUNK_MERGING);
record_count -= youngest->count;
chunk_size -= youngest->size;
--end_chunk;
- }
+ } else if (nchunks == merge_max)
+ /* We've found the best full merge we can */
+ break;
}
nchunks = (end_chunk + 1) - start_chunk;
@@ -208,17 +311,28 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree,
* generations.
*/
if (nchunks < merge_min ||
- lsm_tree->chunk[end_chunk]->generation >
- youngest->generation + max_gap) {
+ oldest_gen - youngest_gen > max_gap) {
for (i = 0; i < nchunks; i++) {
chunk = lsm_tree->chunk[start_chunk + i];
WT_ASSERT(session,
F_ISSET(chunk, WT_LSM_CHUNK_MERGING));
F_CLR(chunk, WT_LSM_CHUNK_MERGING);
}
+ /*
+ * If we didn't find a merge with appropriate gaps, try again
+ * with a smaller range.
+ */
+ if (end_chunk > lsm_tree->merge_min &&
+ oldest_gen - youngest_gen > max_gap) {
+ --end_chunk;
+ goto retry_find;
+ }
+ /* Consider getting aggressive if no merge was found */
+ WT_RET(__lsm_merge_aggressive_update(session, lsm_tree));
return (WT_NOTFOUND);
}
+ WT_RET(__lsm_merge_aggressive_clear(lsm_tree));
*records = record_count;
*start = start_chunk;
*end = end_chunk;
@@ -299,8 +413,12 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id)
start_chunk, end_chunk, dest_id, record_count, generation));
for (verb = start_chunk; verb <= end_chunk; verb++)
WT_ERR(__wt_verbose(session, WT_VERB_LSM,
- "%s: Chunk[%u] id %u",
- lsm_tree->name, verb, lsm_tree->chunk[verb]->id));
+ "Merging %s: Chunk[%u] id %u, gen: %" PRIu32
+ ", size: %" PRIu64 ", records: %" PRIu64,
+ lsm_tree->name, verb, lsm_tree->chunk[verb]->id,
+ lsm_tree->chunk[verb]->generation,
+ lsm_tree->chunk[verb]->size,
+ lsm_tree->chunk[verb]->count));
}
WT_ERR(__wt_calloc_one(session, &chunk));
diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c
index 547a4096260..c3bee162ea1 100644
--- a/src/lsm/lsm_work_unit.c
+++ b/src/lsm/lsm_work_unit.c
@@ -333,6 +333,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
/* Update the flush timestamp to help track ongoing progress. */
WT_RET(__wt_epoch(session, &lsm_tree->last_flush_ts));
+ ++lsm_tree->chunks_flushed;
/* Lock the tree, mark the chunk as on disk and update the metadata. */
WT_RET(__wt_lsm_tree_writelock(session, lsm_tree));
diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c
index d1272df763d..8ed4a117641 100644
--- a/src/lsm/lsm_worker.c
+++ b/src/lsm/lsm_worker.c
@@ -19,7 +19,7 @@ static WT_THREAD_RET __lsm_worker(void *);
int
__wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args)
{
- WT_RET(__wt_verbose(session, WT_VERB_LSM,
+ WT_RET(__wt_verbose(session, WT_VERB_LSM_MANAGER,
"Start LSM worker %d type 0x%x", args->id, args->type));
return (__wt_thread_create(session, &args->tid, __lsm_worker, args));
}