diff options
author | Alex Gorrod <alexander.gorrod@mongodb.com> | 2015-06-04 16:29:12 +1000 |
---|---|---|
committer | Alex Gorrod <alexander.gorrod@mongodb.com> | 2015-06-04 16:29:12 +1000 |
commit | f1965102d783ddb1ae9335a632f0814040cde578 (patch) | |
tree | b541adb69121504288084932434b708c2a7f728a /src/lsm | |
parent | e80c7b2d41f06328690bebec88dd9f08cb89e549 (diff) | |
parent | 2dc9489396988828831e5614a68f2ceb644e43e9 (diff) | |
download | mongo-f1965102d783ddb1ae9335a632f0814040cde578.tar.gz |
Merge pull request #2010 from wiredtiger/lsm-merge-aggressive
Lsm merge aggressive
Diffstat (limited to 'src/lsm')
-rw-r--r-- | src/lsm/lsm_manager.c | 46 | ||||
-rw-r--r-- | src/lsm/lsm_merge.c | 156 | ||||
-rw-r--r-- | src/lsm/lsm_work_unit.c | 1 | ||||
-rw-r--r-- | src/lsm/lsm_worker.c | 2 |
4 files changed, 143 insertions, 62 deletions
diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c index 0d3ce5da2d8..cb078d991d8 100644 --- a/src/lsm/lsm_manager.c +++ b/src/lsm/lsm_manager.c @@ -8,7 +8,6 @@ #include "wt_internal.h" -static int __lsm_manager_aggressive_update(WT_SESSION_IMPL *, WT_LSM_TREE *); static int __lsm_manager_run_server(WT_SESSION_IMPL *); static WT_THREAD_RET __lsm_worker_manager(void *); @@ -339,43 +338,6 @@ __wt_lsm_manager_destroy(WT_SESSION_IMPL *session) } /* - * __lsm_manager_aggressive_update -- - * Update the merge aggressiveness for a single LSM tree. - */ -static int -__lsm_manager_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) -{ - struct timespec now; - uint64_t chunk_wait, stallms; - u_int new_aggressive; - - WT_RET(__wt_epoch(session, &now)); - stallms = WT_TIMEDIFF(now, lsm_tree->last_flush_ts) / WT_MILLION; - /* - * Get aggressive if more than enough chunks for a merge should have - * been created by now. Use 10 seconds as a default if we don't have an - * estimate. - */ - if (lsm_tree->nchunks > 1) - chunk_wait = stallms / (lsm_tree->chunk_fill_ms == 0 ? - 10000 : lsm_tree->chunk_fill_ms); - else - chunk_wait = 0; - new_aggressive = (u_int)(chunk_wait / lsm_tree->merge_min); - - if (new_aggressive > lsm_tree->merge_aggressiveness) { - WT_RET(__wt_verbose(session, WT_VERB_LSM, - "LSM merge %s got aggressive (old %u new %u), " - "merge_min %d, %u / %" PRIu64, - lsm_tree->name, lsm_tree->merge_aggressiveness, - new_aggressive, lsm_tree->merge_min, stallms, - lsm_tree->chunk_fill_ms)); - lsm_tree->merge_aggressiveness = new_aggressive; - } - return (0); -} - -/* * __lsm_manager_worker_shutdown -- * Shutdown the LSM manager and worker threads. */ @@ -428,8 +390,6 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) { if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE)) continue; - WT_ERR(__lsm_manager_aggressive_update( - session, lsm_tree)); WT_ERR(__wt_epoch(session, &now)); pushms = lsm_tree->work_push_ts.tv_sec == 0 ? 0 : WT_TIMEDIFF( @@ -458,7 +418,8 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) lsm_tree->nchunks > 1) || (lsm_tree->queue_ref == 0 && lsm_tree->nchunks > 1) || - (lsm_tree->merge_aggressiveness > 3 && + (lsm_tree->merge_aggressiveness > + WT_LSM_AGGRESSIVE_THRESHOLD && !F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) || pushms > fillms) { WT_ERR(__wt_lsm_manager_push_entry( @@ -469,7 +430,8 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session) session, WT_LSM_WORK_FLUSH, 0, lsm_tree)); WT_ERR(__wt_lsm_manager_push_entry( session, WT_LSM_WORK_BLOOM, 0, lsm_tree)); - WT_ERR(__wt_verbose(session, WT_VERB_LSM, + WT_ERR(__wt_verbose(session, + WT_VERB_LSM_MANAGER, "MGR %s: queue %d mod %d nchunks %d" " flags 0x%x aggressive %d pushms %" PRIu64 " fillms %" PRIu64, diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c index d75f3b0619b..d7e684b8f51 100644 --- a/src/lsm/lsm_merge.c +++ b/src/lsm/lsm_merge.c @@ -40,6 +40,101 @@ __wt_lsm_merge_update_tree(WT_SESSION_IMPL *session, } /* + * __lsm_merge_aggressive_clear -- + * We found a merge to do - clear the aggressive timer. + */ +static int +__lsm_merge_aggressive_clear(WT_LSM_TREE *lsm_tree) +{ + F_CLR(lsm_tree, WT_LSM_TREE_AGGRESSIVE_TIMER); + lsm_tree->merge_aggressiveness = 0; + return (0); +} + +/* + * __lsm_merge_aggressive_update -- + * Update the merge aggressiveness for an LSM tree. + */ +static int +__lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree) +{ + struct timespec now; + uint64_t msec_since_last_merge, msec_to_create_merge; + u_int new_aggressive; + + new_aggressive = 0; + + /* + * If the tree is open read-only or we are compacting, be very + * aggressive. Otherwise, we can spend a long time waiting for merges + * to start in read-only applications. + */ + if (!lsm_tree->modified || + F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) { + lsm_tree->merge_aggressiveness = 10; + return (0); + } + + /* + * Only get aggressive if a reasonable number of flushes have been + * completed since opening the tree. + */ + if (lsm_tree->chunks_flushed <= lsm_tree->merge_min) + return (__lsm_merge_aggressive_clear(lsm_tree)); + + /* + * Start the timer if it isn't running. Use a flag to define whether + * the timer is running - since clearing and checking a special + * timer value isn't simple. + */ + if (!F_ISSET(lsm_tree, WT_LSM_TREE_AGGRESSIVE_TIMER)) { + F_SET(lsm_tree, WT_LSM_TREE_AGGRESSIVE_TIMER); + return (__wt_epoch(session, &lsm_tree->merge_aggressive_ts)); + } + + WT_RET(__wt_epoch(session, &now)); + msec_since_last_merge = + WT_TIMEDIFF(now, lsm_tree->merge_aggressive_ts) / WT_MILLION; + + /* + * If there is no estimate for how long it's taking to fill chunks + * pick 10 seconds. + */ + msec_to_create_merge = lsm_tree->merge_min * + (lsm_tree->chunk_fill_ms == 0 ? 10000 : lsm_tree->chunk_fill_ms); + + /* + * Don't consider getting aggressive until enough time has passed that + * we should have created enough chunks to trigger a new merge. We + * track average chunk-creation time - hence the "should"; the average + * fill time may not reflect the actual state if an application + * generates a variable load. + */ + if (msec_since_last_merge < msec_to_create_merge) + return (0); + + /* + * Bump how aggressively we look for merges based on how long since + * the last merge complete. The aggressive setting only increases + * slowly - triggering merges across generations of chunks isn't + * an efficient use of resources. + */ + while ((msec_since_last_merge /= msec_to_create_merge) > 1) + ++new_aggressive; + + if (new_aggressive > lsm_tree->merge_aggressiveness) { + WT_RET(__wt_verbose(session, WT_VERB_LSM, + "LSM merge %s got aggressive (old %u new %u), " + "merge_min %d, %u / %" PRIu64, + lsm_tree->name, lsm_tree->merge_aggressiveness, + new_aggressive, lsm_tree->merge_min, + msec_since_last_merge, lsm_tree->chunk_fill_ms)); + lsm_tree->merge_aggressiveness = new_aggressive; + } + return (0); +} + +/* * __lsm_merge_span -- * Figure out the best span of chunks to merge. Return an error if * there is no need to do any merges. Called with the LSM tree @@ -53,6 +148,7 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, uint32_t aggressive, max_gap, max_gen, max_level; uint64_t record_count, chunk_size; u_int end_chunk, i, merge_max, merge_min, nchunks, start_chunk; + u_int oldest_gen, youngest_gen; chunk_size = 0; nchunks = 0; @@ -64,18 +160,9 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, *end = 0; *records = 0; - /* - * If the tree is open read-only or we are compacting, be very - * aggressive. Otherwise, we can spend a long time waiting for merges - * to start in read-only applications. - */ - if (!lsm_tree->modified || - F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) - lsm_tree->merge_aggressiveness = 10; - aggressive = lsm_tree->merge_aggressiveness; merge_max = (aggressive > WT_LSM_AGGRESSIVE_THRESHOLD) ? - 100 : lsm_tree->merge_min; + 100 : lsm_tree->merge_max; merge_min = (aggressive > WT_LSM_AGGRESSIVE_THRESHOLD) ? 2 : lsm_tree->merge_min; max_gap = (aggressive + 4) / 5; @@ -127,6 +214,8 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, * with the most recent set of chunks and work backwards until going * further becomes significantly less efficient. */ +retry_find: + oldest_gen = youngest_gen = lsm_tree->chunk[end_chunk]->generation; for (start_chunk = end_chunk + 1, record_count = 0; start_chunk > 0; ) { chunk = lsm_tree->chunk[start_chunk - 1]; @@ -159,6 +248,15 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, chunk_size - youngest->size > lsm_tree->chunk_max)) break; + /* Track chunk generations seen while looking for a merge */ + if (chunk->generation < youngest_gen) + youngest_gen = chunk->generation; + else if (chunk->generation > oldest_gen) + oldest_gen = chunk->generation; + + if (oldest_gen - youngest_gen > max_gap) + break; + /* * If we have enough chunks for a merge and the next chunk is * in too high a generation, stop. @@ -176,18 +274,23 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, --start_chunk; /* - * If we have a full window, or the merge would be too big, - * remove the youngest chunk. + * If the merge would be too big, or we have a full window + * and we could include an older chunk if the window wasn't + * full, remove the youngest chunk. */ - if (nchunks == merge_max || - chunk_size > lsm_tree->chunk_max) { + if (chunk_size > lsm_tree->chunk_max || + (nchunks == merge_max && start_chunk > 0 && + chunk->generation == + lsm_tree->chunk[start_chunk - 1]->generation)) { WT_ASSERT(session, F_ISSET(youngest, WT_LSM_CHUNK_MERGING)); F_CLR(youngest, WT_LSM_CHUNK_MERGING); record_count -= youngest->count; chunk_size -= youngest->size; --end_chunk; - } + } else if (nchunks == merge_max) + /* We've found the best full merge we can */ + break; } nchunks = (end_chunk + 1) - start_chunk; @@ -208,17 +311,28 @@ __lsm_merge_span(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, * generations. */ if (nchunks < merge_min || - lsm_tree->chunk[end_chunk]->generation > - youngest->generation + max_gap) { + oldest_gen - youngest_gen > max_gap) { for (i = 0; i < nchunks; i++) { chunk = lsm_tree->chunk[start_chunk + i]; WT_ASSERT(session, F_ISSET(chunk, WT_LSM_CHUNK_MERGING)); F_CLR(chunk, WT_LSM_CHUNK_MERGING); } + /* + * If we didn't find a merge with appropriate gaps, try again + * with a smaller range. + */ + if (end_chunk > lsm_tree->merge_min && + oldest_gen - youngest_gen > max_gap) { + --end_chunk; + goto retry_find; + } + /* Consider getting aggressive if no merge was found */ + WT_RET(__lsm_merge_aggressive_update(session, lsm_tree)); return (WT_NOTFOUND); } + WT_RET(__lsm_merge_aggressive_clear(lsm_tree)); *records = record_count; *start = start_chunk; *end = end_chunk; @@ -299,8 +413,12 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id) start_chunk, end_chunk, dest_id, record_count, generation)); for (verb = start_chunk; verb <= end_chunk; verb++) WT_ERR(__wt_verbose(session, WT_VERB_LSM, - "%s: Chunk[%u] id %u", - lsm_tree->name, verb, lsm_tree->chunk[verb]->id)); + "Merging %s: Chunk[%u] id %u, gen: %" PRIu32 + ", size: %" PRIu64 ", records: %" PRIu64, + lsm_tree->name, verb, lsm_tree->chunk[verb]->id, + lsm_tree->chunk[verb]->generation, + lsm_tree->chunk[verb]->size, + lsm_tree->chunk[verb]->count)); } WT_ERR(__wt_calloc_one(session, &chunk)); diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c index 547a4096260..c3bee162ea1 100644 --- a/src/lsm/lsm_work_unit.c +++ b/src/lsm/lsm_work_unit.c @@ -333,6 +333,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session, /* Update the flush timestamp to help track ongoing progress. */ WT_RET(__wt_epoch(session, &lsm_tree->last_flush_ts)); + ++lsm_tree->chunks_flushed; /* Lock the tree, mark the chunk as on disk and update the metadata. */ WT_RET(__wt_lsm_tree_writelock(session, lsm_tree)); diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c index d1272df763d..8ed4a117641 100644 --- a/src/lsm/lsm_worker.c +++ b/src/lsm/lsm_worker.c @@ -19,7 +19,7 @@ static WT_THREAD_RET __lsm_worker(void *); int __wt_lsm_worker_start(WT_SESSION_IMPL *session, WT_LSM_WORKER_ARGS *args) { - WT_RET(__wt_verbose(session, WT_VERB_LSM, + WT_RET(__wt_verbose(session, WT_VERB_LSM_MANAGER, "Start LSM worker %d type 0x%x", args->id, args->type)); return (__wt_thread_create(session, &args->tid, __lsm_worker, args)); } |