diff options
author | Keith Bostic <keith@wiredtiger.com> | 2016-03-21 15:32:09 -0400 |
---|---|---|
committer | Keith Bostic <keith@wiredtiger.com> | 2016-03-21 15:32:09 -0400 |
commit | fe0efb43cd749b214b4ff5e53c386b82479bb860 (patch) | |
tree | 06a4aa44e0a7db531edf8f33ade5af58edfa90e8 /src/txn/txn_ckpt.c | |
parent | 811487a4b7576140e2a406786d62caa3c8b3e6a5 (diff) | |
parent | 9f81190f9929323fef6f27a89c6a2c02fffcfae9 (diff) | |
download | mongo-fe0efb43cd749b214b4ff5e53c386b82479bb860.tar.gz |
Merge branch 'develop' into wt-2330
Diffstat (limited to 'src/txn/txn_ckpt.c')
-rw-r--r-- | src/txn/txn_ckpt.c | 311 |
1 files changed, 179 insertions, 132 deletions
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 1283deb7032..2c1fcfa8155 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -8,6 +8,10 @@ #include "wt_internal.h" +static int __checkpoint_lock_tree( + WT_SESSION_IMPL *, bool, bool, const char *[]); +static int __checkpoint_tree_helper(WT_SESSION_IMPL *, const char *[]); + /* * __wt_checkpoint_name_ok -- * Complain if the checkpoint name isn't acceptable. @@ -224,11 +228,11 @@ __checkpoint_data_source(WT_SESSION_IMPL *session, const char *cfg[]) } /* - * __wt_checkpoint_list -- + * __wt_checkpoint_get_handles -- * Get a list of handles to flush. */ int -__wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[]) +__wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) { WT_DECL_RET; const char *name; @@ -254,6 +258,13 @@ __wt_checkpoint_list(WT_SESSION_IMPL *session, const char *cfg[]) if ((ret = __wt_session_get_btree(session, name, NULL, NULL, 0)) != 0) return (ret == EBUSY ? 0 : ret); + WT_SAVE_DHANDLE(session, + ret = __checkpoint_lock_tree(session, true, true, cfg)); + if (ret != 0) { + WT_TRET(__wt_session_release_btree(session)); + return (ret); + } + session->ckpt_handle[session->ckpt_handle_next++] = session->dhandle; return (0); } @@ -267,7 +278,7 @@ __checkpoint_write_leaves(WT_SESSION_IMPL *session, const char *cfg[]) { WT_UNUSED(cfg); - return (__wt_cache_op(session, NULL, WT_SYNC_WRITE_LEAVES)); + return (__wt_cache_op(session, WT_SYNC_WRITE_LEAVES)); } /* @@ -371,15 +382,20 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) /* Configure logging only if doing a full checkpoint. */ logging = FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED); + /* Keep track of handles acquired for locking. */ + WT_ERR(__wt_meta_track_on(session)); + tracking = true; + /* * Get a list of handles we want to flush; this may pull closed objects * into the session cache, but we're going to do that eventually anyway. */ + WT_ASSERT(session, session->ckpt_handle_next == 0); WT_WITH_SCHEMA_LOCK(session, ret, WT_WITH_TABLE_LOCK(session, ret, WT_WITH_HANDLE_LIST_LOCK(session, ret = __checkpoint_apply_all( - session, cfg, __wt_checkpoint_list, NULL)))); + session, cfg, __wt_checkpoint_get_handles, NULL)))); WT_ERR(ret); /* @@ -410,10 +426,6 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) */ WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync)); - /* Start the checkpoint for real. */ - WT_ERR(__wt_meta_track_on(session)); - tracking = true; - /* Tell logging that we are about to start a database checkpoint. */ if (full && logging) WT_ERR(__wt_txn_checkpoint_log( @@ -426,6 +438,8 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__wt_epoch(session, &start)); /* + * Start the checkpoint for real. + * * Bump the global checkpoint generation, used to figure out whether * checkpoint has visited a tree. There is no need for this to be * atomic: it is only written while holding the checkpoint lock. @@ -489,7 +503,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__wt_txn_checkpoint_log( session, full, WT_TXN_LOG_CKPT_START, NULL)); - WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint)); + WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_tree_helper)); /* * Clear the dhandle so the visibility check doesn't get confused about @@ -752,14 +766,13 @@ __drop_to(WT_CKPT *ckptbase, const char *name, size_t len) } /* - * __checkpoint_worker -- - * Checkpoint a tree. + * __checkpoint_lock_tree -- + * Acquire the locks required to checkpoint a tree. */ static int -__checkpoint_worker(WT_SESSION_IMPL *session, - const char *cfg[], bool is_checkpoint, bool need_tracking) +__checkpoint_lock_tree(WT_SESSION_IMPL *session, + bool is_checkpoint, bool need_tracking, const char *cfg[]) { - WT_BM *bm; WT_BTREE *btree; WT_CKPT *ckpt, *ckptbase; WT_CONFIG dropconf; @@ -767,19 +780,15 @@ __checkpoint_worker(WT_SESSION_IMPL *session, WT_CONNECTION_IMPL *conn; WT_DATA_HANDLE *dhandle; WT_DECL_RET; - WT_LSN ckptlsn; - int deleted, was_modified; - bool fake_ckpt, force, hot_backup_locked; - const char *name; char *name_alloc; + const char *name; + bool hot_backup_locked; btree = S2BT(session); - bm = btree->bm; conn = S2C(session); ckpt = ckptbase = NULL; dhandle = session->dhandle; - was_modified = btree->modified; - fake_ckpt = hot_backup_locked = false; + hot_backup_locked = false; name_alloc = NULL; /* @@ -798,15 +807,6 @@ __checkpoint_worker(WT_SESSION_IMPL *session, WT_ASSERT(session, !need_tracking || WT_IS_METADATA(session, dhandle) || WT_META_TRACKING(session)); - /* - * Set the checkpoint LSN to the maximum LSN so that if logging is - * disabled, recovery will never roll old changes forward over the - * non-logged changes in this checkpoint. If logging is enabled, a - * real checkpoint LSN will be assigned later for this checkpoint and - * overwrite this. - */ - WT_MAX_LSN(&ckptlsn); - /* Get the list of checkpoints for this file. */ WT_RET(__wt_meta_ckptlist_get(session, dhandle->name, &ckptbase)); @@ -857,70 +857,6 @@ __checkpoint_worker(WT_SESSION_IMPL *session, /* Drop checkpoints with the same name as the one we're taking. */ __drop(ckptbase, name, strlen(name)); - /* - * Check for clean objects not requiring a checkpoint. - * - * If we're closing a handle, and the object is clean, we can skip the - * checkpoint, whatever checkpoints we have are sufficient. (We might - * not have any checkpoints if the object was never modified, and that's - * OK: the object creation code doesn't mark the tree modified so we can - * skip newly created trees here.) - * - * If the application repeatedly checkpoints an object (imagine hourly - * checkpoints using the same explicit or internal name), there's no - * reason to repeat the checkpoint for clean objects. The test is if - * the only checkpoint we're deleting is the last one in the list and - * it has the same name as the checkpoint we're about to take, skip the - * work. (We can't skip checkpoints that delete more than the last - * checkpoint because deleting those checkpoints might free up space in - * the file.) This means an application toggling between two (or more) - * checkpoint names will repeatedly take empty checkpoints, but that's - * not likely enough to make detection worthwhile. - * - * Checkpoint read-only objects otherwise: the application must be able - * to open the checkpoint in a cursor after taking any checkpoint, which - * means it must exist. - */ - force = false; - F_CLR(btree, WT_BTREE_SKIP_CKPT); - if (!btree->modified && cfg != NULL) { - ret = __wt_config_gets(session, cfg, "force", &cval); - if (ret != 0 && ret != WT_NOTFOUND) - WT_ERR(ret); - if (ret == 0 && cval.val != 0) - force = true; - } - if (!btree->modified && !force) { - if (!is_checkpoint) - goto nockpt; - - deleted = 0; - WT_CKPT_FOREACH(ckptbase, ckpt) - if (F_ISSET(ckpt, WT_CKPT_DELETE)) - ++deleted; - /* - * Complicated test: if the last checkpoint in the object has - * the same name as the checkpoint we're taking (correcting for - * internal checkpoint names with their generational suffix - * numbers), we can skip the checkpoint, there's nothing to do. - * The exception is if we're deleting two or more checkpoints: - * then we may save space. - */ - if (ckpt > ckptbase && - (strcmp(name, (ckpt - 1)->name) == 0 || - (WT_PREFIX_MATCH(name, WT_CHECKPOINT) && - WT_PREFIX_MATCH((ckpt - 1)->name, WT_CHECKPOINT))) && - deleted < 2) { -nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT); - WT_PUBLISH(btree->checkpoint_gen, - S2C(session)->txn_global.checkpoint_gen); - WT_STAT_FAST_DATA_SET(session, - btree_checkpoint_generation, - btree->checkpoint_gen); - goto done; - } - } - /* Add a new checkpoint entry at the end of the list. */ WT_CKPT_FOREACH(ckptbase, ckpt) ; @@ -1005,32 +941,119 @@ nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT); * copy instead of forcing checkpoints on clean objects to associate * names with checkpoints. */ - if (is_checkpoint) - switch (F_MASK(btree, WT_BTREE_SPECIAL_FLAGS)) { - case 0: - break; - case WT_BTREE_BULK: - /* - * The only checkpoints a bulk-loaded file should have - * are fake ones we created without the underlying block - * manager. I'm leaving this code here because it's a - * cheap test and a nasty race. - */ - WT_CKPT_FOREACH(ckptbase, ckpt) - if (!F_ISSET(ckpt, WT_CKPT_ADD | WT_CKPT_FAKE)) - WT_ERR_MSG(session, ret, - "block-manager checkpoint found " - "for a bulk-loaded file"); - fake_ckpt = true; - goto fake; - case WT_BTREE_REBALANCE: - case WT_BTREE_SALVAGE: - case WT_BTREE_UPGRADE: - case WT_BTREE_VERIFY: - WT_ERR_MSG(session, EINVAL, - "checkpoints are blocked during rebalance, " - "salvage, upgrade or verify operations"); + WT_ASSERT(session, + !is_checkpoint || !F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)); + + hot_backup_locked = false; + WT_ERR(__wt_readunlock(session, conn->hot_backup_lock)); + + WT_ASSERT(session, btree->ckpt == NULL); + btree->ckpt = ckptbase; + + return (0); + +err: if (hot_backup_locked) + WT_TRET(__wt_readunlock(session, conn->hot_backup_lock)); + + __wt_meta_ckptlist_free(session, ckptbase); + __wt_free(session, name_alloc); + + return (ret); +} + +/* + * __checkpoint_tree -- + * Checkpoint a single tree. + * Assumes all necessary locks have been acquired by the caller. + */ +static int +__checkpoint_tree( + WT_SESSION_IMPL *session, bool is_checkpoint, const char *cfg[]) +{ + WT_BM *bm; + WT_BTREE *btree; + WT_CKPT *ckpt, *ckptbase; + WT_CONFIG_ITEM cval; + WT_CONNECTION_IMPL *conn; + WT_DATA_HANDLE *dhandle; + WT_DECL_RET; + WT_LSN ckptlsn; + const char *name; + int deleted, was_modified; + bool fake_ckpt, force; + + btree = S2BT(session); + bm = btree->bm; + ckptbase = btree->ckpt; + conn = S2C(session); + dhandle = session->dhandle; + fake_ckpt = false; + was_modified = btree->modified; + + /* + * Check for clean objects not requiring a checkpoint. + * + * If we're closing a handle, and the object is clean, we can skip the + * checkpoint, whatever checkpoints we have are sufficient. (We might + * not have any checkpoints if the object was never modified, and that's + * OK: the object creation code doesn't mark the tree modified so we can + * skip newly created trees here.) + * + * If the application repeatedly checkpoints an object (imagine hourly + * checkpoints using the same explicit or internal name), there's no + * reason to repeat the checkpoint for clean objects. The test is if + * the only checkpoint we're deleting is the last one in the list and + * it has the same name as the checkpoint we're about to take, skip the + * work. (We can't skip checkpoints that delete more than the last + * checkpoint because deleting those checkpoints might free up space in + * the file.) This means an application toggling between two (or more) + * checkpoint names will repeatedly take empty checkpoints, but that's + * not likely enough to make detection worthwhile. + * + * Checkpoint read-only objects otherwise: the application must be able + * to open the checkpoint in a cursor after taking any checkpoint, which + * means it must exist. + */ + force = false; + F_CLR(btree, WT_BTREE_SKIP_CKPT); + if (!btree->modified && cfg != NULL) { + ret = __wt_config_gets(session, cfg, "force", &cval); + if (ret != 0 && ret != WT_NOTFOUND) + WT_ERR(ret); + if (ret == 0 && cval.val != 0) + force = true; + } + if (!btree->modified && !force) { + if (!is_checkpoint) + goto nockpt; + + deleted = 0; + WT_CKPT_FOREACH(ckptbase, ckpt) + if (F_ISSET(ckpt, WT_CKPT_DELETE)) + ++deleted; + /* + * Complicated test: if the tree is clean and last two + * checkpoints have the same name (correcting for internal + * checkpoint names with their generational suffix numbers), we + * can skip the checkpoint, there's nothing to do. The + * exception is if we're deleting two or more checkpoints: then + * we may save space. + */ + name = (ckpt - 1)->name; + if (ckpt > ckptbase + 1 && deleted < 2 && + (strcmp(name, (ckpt - 2)->name) == 0 || + (WT_PREFIX_MATCH(name, WT_CHECKPOINT) && + WT_PREFIX_MATCH((ckpt - 2)->name, WT_CHECKPOINT)))) { +nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT); + WT_PUBLISH(btree->checkpoint_gen, + S2C(session)->txn_global.checkpoint_gen); + WT_STAT_FAST_DATA_SET(session, + btree_checkpoint_generation, + btree->checkpoint_gen); + ret = 0; + goto err; } + } /* * If an object has never been used (in other words, if it could become @@ -1077,6 +1100,15 @@ nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT); btree->modified = 0; WT_FULL_BARRIER(); + /* + * Set the checkpoint LSN to the maximum LSN so that if logging is + * disabled, recovery will never roll old changes forward over the + * non-logged changes in this checkpoint. If logging is enabled, a + * real checkpoint LSN will be assigned for this checkpoint and + * overwrite this. + */ + WT_MAX_LSN(&ckptlsn); + /* Tell logging that a file checkpoint is starting. */ if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) WT_ERR(__wt_txn_checkpoint_log( @@ -1084,9 +1116,9 @@ nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT); /* Flush the file from the cache, creating the checkpoint. */ if (is_checkpoint) - WT_ERR(__wt_cache_op(session, ckptbase, WT_SYNC_CHECKPOINT)); + WT_ERR(__wt_cache_op(session, WT_SYNC_CHECKPOINT)); else - WT_ERR(__wt_cache_op(session, ckptbase, WT_SYNC_CLOSE)); + WT_ERR(__wt_cache_op(session, WT_SYNC_CLOSE)); /* * All blocks being written have been written; set the object's write @@ -1144,7 +1176,6 @@ fake: /* WT_ERR(__wt_txn_checkpoint_log( session, false, WT_TXN_LOG_CKPT_STOP, NULL)); -done: err: /* * If the checkpoint didn't complete successfully, make sure the * tree is marked dirty. @@ -1152,30 +1183,42 @@ err: /* if (ret != 0 && !btree->modified && was_modified) btree->modified = 1; - if (hot_backup_locked) - WT_TRET(__wt_readunlock(session, conn->hot_backup_lock)); - __wt_meta_ckptlist_free(session, ckptbase); - __wt_free(session, name_alloc); + btree->ckpt = NULL; return (ret); } /* + * __checkpoint_tree_helper -- + * Checkpoint a tree (suitable for use in *_apply functions). + */ +static int +__checkpoint_tree_helper(WT_SESSION_IMPL *session, const char *cfg[]) +{ + return (__checkpoint_tree(session, true, cfg)); +} + +/* * __wt_checkpoint -- * Checkpoint a file. */ int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) { + WT_DECL_RET; + /* Should not be called with a checkpoint handle. */ WT_ASSERT(session, session->dhandle->checkpoint == NULL); - /* Should be holding the schema lock. */ + /* We must hold the metadata lock if checkpointing the metadata. */ WT_ASSERT(session, !WT_IS_METADATA(session, session->dhandle) || F_ISSET(session, WT_SESSION_LOCKED_METADATA)); - return (__checkpoint_worker(session, cfg, true, true)); + WT_SAVE_DHANDLE(session, + ret = __checkpoint_lock_tree(session, true, true, cfg)); + WT_RET(ret); + return (__checkpoint_tree(session, true, cfg)); } /* @@ -1225,7 +1268,7 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) if (F_ISSET(btree, WT_BTREE_NO_CHECKPOINT)) F_SET(session->dhandle, WT_DHANDLE_DEAD); if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD)) - return (__wt_cache_op(session, NULL, WT_SYNC_DISCARD)); + return (__wt_cache_op(session, WT_SYNC_DISCARD)); /* * If closing an unmodified file, check that no update is required @@ -1234,7 +1277,7 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) if (!btree->modified && !bulk) { __wt_txn_update_oldest(session, true); return (__wt_txn_visible_all(session, btree->rec_max_txn) ? - __wt_cache_op(session, NULL, WT_SYNC_DISCARD) : EBUSY); + __wt_cache_op(session, WT_SYNC_DISCARD) : EBUSY); } /* @@ -1248,10 +1291,14 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) if (need_tracking) WT_RET(__wt_meta_track_on(session)); - WT_TRET(__checkpoint_worker(session, NULL, false, need_tracking)); + WT_SAVE_DHANDLE(session, + ret = __checkpoint_lock_tree(session, false, need_tracking, NULL)); + WT_ASSERT(session, ret == 0); + if (ret == 0) + ret = __checkpoint_tree(session, false, NULL); if (need_tracking) - WT_RET(__wt_meta_track_off(session, true, ret != 0)); + WT_TRET(__wt_meta_track_off(session, true, ret != 0)); return (ret); } |