diff options
author | Alex Gorrod <alexander.gorrod@mongodb.com> | 2016-12-12 12:23:13 +1100 |
---|---|---|
committer | Alex Gorrod <alexander.gorrod@mongodb.com> | 2016-12-12 12:23:13 +1100 |
commit | 21a6f07d859c132154166bd3d83bbed238d5d719 (patch) | |
tree | bc261840853dda4307c68fd1c889caf4c89dd3d3 /src/third_party/wiredtiger/src/session/session_compact.c | |
parent | 7cf929f25638e4ad9525775c8ea0e18f3c86faf5 (diff) | |
download | mongo-21a6f07d859c132154166bd3d83bbed238d5d719.tar.gz |
Import wiredtiger: 1b6c815a3fd34f14c20d5cd627155799d1de535c from branch mongodb-3.6
ref: ca6eee06ff..1b6c815a3f
for: 3.5.1
WT-2336 Add a test validating schema operations via file system call monitoring
WT-2670 Add option to configure read-ahead per table and change default behavior
WT-2960 Inserting multi-megabyte values can cause pathological lookaside usage
WT-2969 Fix a bug that could cause snapshot corruption during compaction
WT-3014 Add GCC/clang support for ELF symbol visibility.
WT-3021 Fixes needed for Java log cursor example, Java raw mode cursors, log cursors in raw mode
WT-3025 fix error path in log_force_sync
WT-3028 Workloads with all dirty pages could trigger diagnostic stuck check
WT-3030 Test failure indicating invalid key order during traversal
WT-3034 Add support for single-writer named snapshots.
WT-3037 Fix some outdated comments in logging
WT-3048 WiredTiger maximum size warning uses the wrong format.
WT-3051 Remove external __wt_hex symbol.
WT-3052 Improve search if an index hint is wrong
WT-3053 Review Python and Java calls to internal WiredTiger functions
WT-3054 Java PackTest, PackTest03 do not compile
WT-3055 Java AsyncTest faults
WT-3057 WiredTiger hazard pointers should use the WT_REF, not the WT_PAGE.
WT-3064 minor tree cleanups: .gitignore, NEWS misspelling
Diffstat (limited to 'src/third_party/wiredtiger/src/session/session_compact.c')
-rw-r--r-- | src/third_party/wiredtiger/src/session/session_compact.c | 246 |
1 files changed, 154 insertions, 92 deletions
diff --git a/src/third_party/wiredtiger/src/session/session_compact.c b/src/third_party/wiredtiger/src/session/session_compact.c index 66635007723..85214ae6d98 100644 --- a/src/third_party/wiredtiger/src/session/session_compact.c +++ b/src/third_party/wiredtiger/src/session/session_compact.c @@ -53,11 +53,14 @@ * blocks, it can't easily know this is the case, and so we'll waste a lot of * effort trying to compact files that can't be compacted. * - * Now, to the actual process. First, we checkpoint the high-level object - * (which is potentially composed of multiple files): there are potentially - * many dirty blocks in the cache, and we want to write them out and then - * discard previous checkpoints so we have as many blocks as possible on the - * file's "available for reuse" list when we start compaction. + * Finally, compaction checkpoints are database-wide, otherwise we can corrupt + * file relationships, for example, an index checkpointed by compaction could + * be out of sync with the primary after a crash. + * + * Now, to the actual process. First, we checkpoint the database: there are + * potentially many dirty blocks in the cache, and we want to write them out + * and then discard previous checkpoints so we have as many blocks as possible + * on the file's "available for reuse" list when we start compaction. * * Then, we compact the high-level object. * @@ -97,29 +100,6 @@ */ /* - * __compact_uri_analyze -- - * Extract information relevant to deciding what work compact needs to - * do from a URI that is part of a table schema. - * Called via the schema_worker function. - */ -static int -__compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, bool *skipp) -{ - /* - * Add references to schema URI objects to the list of objects to be - * compacted. Skip over LSM trees or we will get false positives on - * the "file:" URIs for the chunks. - */ - if (WT_PREFIX_MATCH(uri, "lsm:")) { - session->compact->lsm_count++; - *skipp = true; - } else if (WT_PREFIX_MATCH(uri, "file:")) - session->compact->file_count++; - - return (0); -} - -/* * __compact_start -- * Start object compaction. */ @@ -146,6 +126,29 @@ __compact_end(WT_SESSION_IMPL *session) } /* + * __compact_uri_analyze -- + * Extract information relevant to deciding what work compact needs to + * do from a URI that is part of a table schema. + * Called via the schema_worker function. + */ +static int +__compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, bool *skipp) +{ + /* + * Add references to schema URI objects to the list of objects to be + * compacted. Skip over LSM trees or we will get false positives on + * the "file:" URIs for the chunks. + */ + if (WT_PREFIX_MATCH(uri, "lsm:")) { + session->compact->lsm_count++; + *skipp = true; + } else if (WT_PREFIX_MATCH(uri, "file:")) + session->compact->file_count++; + + return (0); +} + +/* * __compact_handle_append -- * Gather a file handle to be compacted. * Called via the schema_worker function. @@ -157,10 +160,6 @@ __compact_handle_append(WT_SESSION_IMPL *session, const char *cfg[]) WT_UNUSED(cfg); - /* Make sure there is space for the next entry. */ - WT_RET(__wt_realloc_def(session, &session->op_handle_allocated, - session->op_handle_next + 1, &session->op_handle)); - WT_RET(__wt_session_get_btree( session, session->dhandle->name, NULL, NULL, 0)); @@ -170,16 +169,20 @@ __compact_handle_append(WT_SESSION_IMPL *session, const char *cfg[]) return (ret); } + /* Make sure there is space for the next entry. */ + WT_RET(__wt_realloc_def(session, &session->op_handle_allocated, + session->op_handle_next + 1, &session->op_handle)); + session->op_handle[session->op_handle_next++] = session->dhandle; return (0); } /* - * __session_compact_check_timeout -- + * __wt_session_compact_check_timeout -- * Check if the timeout has been exceeded. */ -static int -__session_compact_check_timeout(WT_SESSION_IMPL *session, struct timespec begin) +int +__wt_session_compact_check_timeout(WT_SESSION_IMPL *session) { struct timespec end; @@ -187,64 +190,116 @@ __session_compact_check_timeout(WT_SESSION_IMPL *session, struct timespec begin) return (0); __wt_epoch(session, &end); - if (session->compact->max_time < WT_TIMEDIFF_SEC(end, begin)) - return (ETIMEDOUT); - return (0); + return (session->compact->max_time > + WT_TIMEDIFF_SEC(end, session->compact->begin) ? 0 : ETIMEDOUT); } /* - * __compact_file -- - * Function to alternate between checkpoints and compaction calls. + * __compact_checkpoint -- + * Perform a checkpoint for compaction. */ static int -__compact_file(WT_SESSION_IMPL *session, const char *cfg[]) +__compact_checkpoint(WT_SESSION_IMPL *session) { - struct timespec start_time; - WT_DATA_HANDLE *dhandle; - WT_DECL_ITEM(t); + WT_TXN_GLOBAL *txn_global; WT_DECL_RET; - int i; + uint64_t txn_gen; + + /* + * Force compaction checkpoints: we don't want to skip it because the + * work we need to have done is done in the underlying block manager. + */ const char *checkpoint_cfg[] = { - WT_CONFIG_BASE(session, WT_SESSION_checkpoint), NULL, NULL }; + WT_CONFIG_BASE(session, WT_SESSION_checkpoint), "force=1", NULL }; + + /* Checkpoints take a lot of time, check if we've run out. */ + WT_RET(__wt_session_compact_check_timeout(session)); + + if ((ret = __wt_txn_checkpoint(session, checkpoint_cfg, false)) == 0) + return (0); + WT_RET_BUSY_OK(ret); + + /* + * If there's a checkpoint running, wait for it to complete, checking if + * we're out of time. If there's no checkpoint running or the checkpoint + * generation number changes, the checkpoint blocking us has completed. + */ + txn_global = &S2C(session)->txn_global; + for (txn_gen = txn_global->checkpoint_gen;;) { + WT_READ_BARRIER(); + if (!txn_global->checkpoint_running || + txn_gen != txn_global->checkpoint_gen) + break; + + WT_RET(__wt_session_compact_check_timeout(session)); + __wt_sleep(2, 0); + } + + return (0); +} - dhandle = session->dhandle; +/* + * __compact_worker -- + * Function to alternate between checkpoints and compaction calls. + */ +static int +__compact_worker(WT_SESSION_IMPL *session) +{ + WT_DECL_RET; + u_int i, loop; + bool didwork; /* - * Force the checkpoint: we don't want to skip it because the work we - * need to have done is done in the underlying block manager. + * Reset the handles' compaction skip flag (we don't bother setting + * or resetting it when we finish compaction, it's simpler to do it + * once, here). */ - WT_ERR(__wt_scr_alloc(session, 128, &t)); - WT_ERR(__wt_buf_fmt( - session, t, "target=(\"%s\"),force=1", dhandle->name)); - checkpoint_cfg[1] = t->data; + for (i = 0; i < session->op_handle_next; ++i) + session->op_handle[i]->compact_skip = false; - __wt_epoch(session, &start_time); + /* + * Perform an initial checkpoint (see this file's leading comment for + * details). + */ + WT_ERR(__compact_checkpoint(session)); /* - * We compact 10% of the file on each pass (but the overall size of the + * We compact 10% of a file on each pass (but the overall size of the * file is decreasing each time, so we're not compacting 10% of the * original file each time). Try 100 times (which is clearly more than - * we need); quit if we make no progress and check for a timeout each - * time through the loop. + * we need); quit if we make no progress. */ - for (i = 0; i < 100; ++i) { - WT_ERR(__wt_txn_checkpoint(session, checkpoint_cfg)); - - session->compact_state = WT_COMPACT_RUNNING; - WT_WITH_DHANDLE(session, dhandle, - ret = __wt_compact(session, cfg)); - WT_ERR(ret); - if (session->compact_state != WT_COMPACT_SUCCESS) + for (loop = 0; loop < 100; ++loop) { + /* Step through the list of files being compacted. */ + for (didwork = false, i = 0; i < session->op_handle_next; ++i) { + /* Skip objects where there's no more work. */ + if (session->op_handle[i]->compact_skip) + continue; + + session->compact_state = WT_COMPACT_RUNNING; + WT_WITH_DHANDLE(session, + session->op_handle[i], ret = __wt_compact(session)); + WT_ERR(ret); + + /* If we did no work, skip this file in the future. */ + if (session->compact_state == WT_COMPACT_SUCCESS) + didwork = true; + else + session->op_handle[i]->compact_skip = true; + } + if (!didwork) break; - WT_ERR(__wt_txn_checkpoint(session, checkpoint_cfg)); - WT_ERR(__wt_txn_checkpoint(session, checkpoint_cfg)); - WT_ERR(__session_compact_check_timeout(session, start_time)); + /* + * Perform two checkpoints (see this file's leading comment for + * details). + */ + WT_ERR(__compact_checkpoint(session)); + WT_ERR(__compact_checkpoint(session)); } err: session->compact_state = WT_COMPACT_NONE; - __wt_scr_free(session, &t); return (ret); } @@ -256,8 +311,9 @@ int __wt_session_compact( WT_SESSION *wt_session, const char *uri, const char *config) { - WT_COMPACT compact; + WT_COMPACT_STATE compact; WT_CONFIG_ITEM cval; + WT_DATA_SOURCE *dsrc; WT_DECL_RET; WT_SESSION_IMPL *session; WT_TXN *txn; @@ -270,6 +326,17 @@ __wt_session_compact( if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY)) goto err; + /* + * Non-LSM object compaction requires checkpoints, which are impossible + * in transactional contexts. Disallow in all contexts (there's no + * reason for LSM to allow this, possible or not), and check now so the + * error message isn't confusing. + */ + txn = &session->txn; + if (F_ISSET(txn, WT_TXN_RUNNING)) + WT_ERR_MSG(session, EINVAL, + "compaction not permitted in a transaction"); + /* Disallow objects in the WiredTiger name space. */ WT_ERR(__wt_str_name_check(session, uri)); @@ -277,18 +344,28 @@ __wt_session_compact( !WT_PREFIX_MATCH(uri, "file:") && !WT_PREFIX_MATCH(uri, "index:") && !WT_PREFIX_MATCH(uri, "lsm:") && - !WT_PREFIX_MATCH(uri, "table:")) - WT_ERR(__wt_bad_object_type(session, uri)); + !WT_PREFIX_MATCH(uri, "table:")) { + if ((dsrc = __wt_schema_get_source(session, uri)) != NULL) + ret = dsrc->compact == NULL ? + __wt_object_unsupported(session, uri) : + dsrc->compact( + dsrc, wt_session, uri, (WT_CONFIG_ARG *)cfg); + else + ret = __wt_bad_object_type(session, uri); + goto err; + } - /* Setup the structure in the session handle */ - memset(&compact, 0, sizeof(WT_COMPACT)); + /* Setup the session handle's compaction state structure. */ + memset(&compact, 0, sizeof(WT_COMPACT_STATE)); session->compact = &compact; + /* Compaction can be time-limited. */ WT_ERR(__wt_config_gets(session, cfg, "timeout", &cval)); session->compact->max_time = (uint64_t)cval.val; + __wt_epoch(session, &session->compact->begin); /* Find the types of data sources being compacted. */ - WT_WITH_SCHEMA_LOCK(session, ret, + WT_WITH_SCHEMA_LOCK(session, ret = __wt_schema_worker(session, uri, __compact_handle_append, __compact_uri_analyze, cfg, 0)); WT_ERR(ret); @@ -296,23 +373,8 @@ __wt_session_compact( if (session->compact->lsm_count != 0) WT_ERR(__wt_schema_worker( session, uri, NULL, __wt_lsm_compact, cfg, 0)); - if (session->compact->file_count != 0) { - /* - * File compaction requires checkpoints, which will fail in a - * transactional context. Check now so the error message isn't - * confusing. - */ - txn = &session->txn; - if (F_ISSET(txn, WT_TXN_RUNNING)) - WT_ERR_MSG(session, EINVAL, - " File compaction not permitted in a transaction"); - - for (i = 0; i < session->op_handle_next; ++i) { - WT_WITH_DHANDLE(session, session->op_handle[i], - ret = __compact_file(session, cfg)); - WT_ERR(ret); - } - } + if (session->compact->file_count != 0) + WT_ERR(__compact_worker(session)); err: session->compact = NULL; |