diff options
author | Michael Cahill <michael.cahill@mongodb.com> | 2017-03-13 17:13:31 +1100 |
---|---|---|
committer | Alex Gorrod <alexander.gorrod@mongodb.com> | 2017-03-13 17:13:31 +1100 |
commit | f72c78b74d42c9e89bc98ad56ba184536e8efcae (patch) | |
tree | fce94d8bca734bbd4ec9155fe88ded6fb25cf096 | |
parent | 1e05438f426c0c54a603f660fb7831eb2b9a523e (diff) | |
download | mongo-f72c78b74d42c9e89bc98ad56ba184536e8efcae.tar.gz |
WT-3207 Fix a leak if a checkpoint fails. (#3329)
Also switch to holding the schema lock when completing a bulk load.
This avoids a race with checkpoints starting, so avoids a failure mode
that was added to checkpoint earlier in this ticket. Assert that we
don't hit that case instead.
-rw-r--r-- | src/btree/bt_vrfy.c | 2 | ||||
-rw-r--r-- | src/conn/conn_dhandle.c | 10 | ||||
-rw-r--r-- | src/include/extern.h | 2 | ||||
-rw-r--r-- | src/meta/meta_ckpt.c | 10 | ||||
-rw-r--r-- | src/meta/meta_ext.c | 2 | ||||
-rw-r--r-- | src/session/session_dhandle.c | 10 | ||||
-rw-r--r-- | src/session/session_salvage.c | 2 | ||||
-rw-r--r-- | src/txn/txn_ckpt.c | 75 |
8 files changed, 50 insertions, 63 deletions
diff --git a/src/btree/bt_vrfy.c b/src/btree/bt_vrfy.c index 3c90e580696..7475811adc5 100644 --- a/src/btree/bt_vrfy.c +++ b/src/btree/bt_vrfy.c @@ -274,7 +274,7 @@ err: /* Inform the underlying block manager we're done. */ /* Discard the list of checkpoints. */ if (ckptbase != NULL) - __wt_meta_ckptlist_free(session, ckptbase); + __wt_meta_ckptlist_free(session, &ckptbase); /* Free allocated memory. */ __wt_scr_free(session, &vs->max_key); diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c index 25795a8d309..6c8d66d63f8 100644 --- a/src/conn/conn_dhandle.c +++ b/src/conn/conn_dhandle.c @@ -152,11 +152,11 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force) WT_RET(__wt_evict_file_exclusive_on(session)); /* - * If we don't already have the schema lock, make it an error to try - * to acquire it. The problem is that we are holding an exclusive - * lock on the handle, and if we attempt to acquire the schema lock - * we might deadlock with a thread that has the schema lock and wants - * a handle lock (specifically, checkpoint). + * If we don't already have the schema lock, make it an error to try to + * acquire it. The problem is that we are holding an exclusive lock on + * the handle, and if we attempt to acquire the schema lock we might + * deadlock with a thread that has the schema lock and wants a handle + * lock. */ no_schema_lock = false; if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) { diff --git a/src/include/extern.h b/src/include/extern.h index d0c9655fafb..db718966426 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -462,7 +462,7 @@ extern int __wt_meta_checkpoint_last_name( WT_SESSION_IMPL *session, const char extern int __wt_meta_checkpoint_clear(WT_SESSION_IMPL *session, const char *fname) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_ckptlist_get( WT_SESSION_IMPL *session, const char *fname, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_meta_ckptlist_set(WT_SESSION_IMPL *session, const char *fname, WT_CKPT *ckptbase, WT_LSN *ckptlsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); -extern void __wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT *ckptbase) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); +extern void __wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern void __wt_meta_checkpoint_free(WT_SESSION_IMPL *session, WT_CKPT *ckpt) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_ext_metadata_insert(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); extern int __wt_ext_metadata_remove( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden"))); diff --git a/src/meta/meta_ckpt.c b/src/meta/meta_ckpt.c index b985104c2eb..151bbe0e081 100644 --- a/src/meta/meta_ckpt.c +++ b/src/meta/meta_ckpt.c @@ -297,7 +297,7 @@ __wt_meta_ckptlist_get( *ckptbasep = ckptbase; if (0) { -err: __wt_meta_ckptlist_free(session, ckptbase); +err: __wt_meta_ckptlist_free(session, &ckptbase); } __wt_free(session, config); __wt_scr_free(session, &buf); @@ -463,16 +463,16 @@ err: __wt_scr_free(session, &buf); * Discard the checkpoint array. */ void -__wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT *ckptbase) +__wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT **ckptbasep) { - WT_CKPT *ckpt; + WT_CKPT *ckpt, *ckptbase; - if (ckptbase == NULL) + if ((ckptbase = *ckptbasep) == NULL) return; WT_CKPT_FOREACH(ckptbase, ckpt) __wt_meta_checkpoint_free(session, ckpt); - __wt_free(session, ckptbase); + __wt_free(session, *ckptbasep); } /* diff --git a/src/meta/meta_ext.c b/src/meta/meta_ext.c index 50e7568fe77..aa1ea8b974d 100644 --- a/src/meta/meta_ext.c +++ b/src/meta/meta_ext.c @@ -102,5 +102,5 @@ void __wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase) WT_GCC_FUNC_ATTRIBUTE((visibility("default"))) { - __wt_meta_ckptlist_free((WT_SESSION_IMPL *)session, ckptbase); + __wt_meta_ckptlist_free((WT_SESSION_IMPL *)session, &ckptbase); } diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c index 7c96dd8b8a8..95fb6a6f90e 100644 --- a/src/session/session_dhandle.c +++ b/src/session/session_dhandle.c @@ -270,6 +270,16 @@ __wt_session_release_btree(WT_SESSION_IMPL *session) if (F_ISSET(dhandle, WT_DHANDLE_DISCARD_FORCE)) { ret = __wt_conn_btree_sync_and_close(session, false, true); F_CLR(dhandle, WT_DHANDLE_DISCARD_FORCE); + } else if (F_ISSET(btree, WT_BTREE_BULK)) { + WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) && + !F_ISSET(dhandle, WT_DHANDLE_DISCARD)); + /* + * Acquire the schema lock while completing a bulk load. This + * avoids racing with a checkpoint while it gathers a set + * of handles. + */ + WT_WITH_SCHEMA_LOCK(session, ret = + __wt_conn_btree_sync_and_close(session, false, false)); } else if (F_ISSET(dhandle, WT_DHANDLE_DISCARD) || F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)) { WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE)); diff --git a/src/session/session_salvage.c b/src/session/session_salvage.c index 983b28dd8ea..12ce71cdbb0 100644 --- a/src/session/session_salvage.c +++ b/src/session/session_salvage.c @@ -54,6 +54,6 @@ __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[]) WT_ERR(__wt_meta_ckptlist_set( session, dhandle->name, ckptbase, NULL)); -err: __wt_meta_ckptlist_free(session, ckptbase); +err: __wt_meta_ckptlist_free(session, &ckptbase); return (ret); } diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c index 3eb07089b87..748f4aa2473 100644 --- a/src/txn/txn_ckpt.c +++ b/src/txn/txn_ckpt.c @@ -267,10 +267,9 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) { WT_BTREE *btree; WT_CONFIG_ITEM cval; - WT_CURSOR *meta_cursor; WT_DECL_RET; const char *name; - bool force, metadata_race; + bool force; btree = S2BT(session); @@ -291,6 +290,7 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) if (F_ISSET(btree, WT_BTREE_NO_CHECKPOINT)) return (0); +#ifdef HAVE_DIAGNOSTIC /* * We may have raced between starting the checkpoint transaction and * some operation completing on the handle that updated the metadata @@ -298,10 +298,12 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) * exclusive access to the handle or hold the schema lock. We are now * holding the schema lock and have an open btree handle, so if we * can't update the metadata, then there has been some state change - * invisible to the checkpoint transaction. Skip checkpointing such - * files: they must have a recent durable point. + * invisible to the checkpoint transaction. */ if (!WT_IS_METADATA(session->dhandle)) { + WT_CURSOR *meta_cursor; + bool metadata_race; + WT_ASSERT(session, !F_ISSET(&session->txn, WT_TXN_ERROR)); WT_RET(__wt_metadata_cursor(session, &meta_cursor)); meta_cursor->set_key(meta_cursor, session->dhandle->name); @@ -313,26 +315,9 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) metadata_race = false; WT_TRET(__wt_metadata_cursor_release(session, &meta_cursor)); WT_RET(ret); - if (metadata_race) { - /* - * The conflict registers as a rollback error: that can - * safely be skipped here. - */ - F_CLR(&session->txn, WT_TXN_ERROR); - if (force) { - WT_RET(__wt_msg(session, - "forced or named checkpoint raced with " - "a metadata update")); - return (EBUSY); - } - __wt_verbose(session, WT_VERB_CHECKPOINT, - "skipped checkpoint of %s with metadata conflict", - session->dhandle->name); - F_SET(btree, WT_BTREE_SKIP_CKPT); - __checkpoint_update_generation(session); - return (0); - } + WT_ASSERT(session, !metadata_race); } +#endif /* * Decide whether the tree needs to be included in the checkpoint and @@ -342,6 +327,7 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) session, true, force, true, cfg)); WT_RET(ret); if (F_ISSET(btree, WT_BTREE_SKIP_CKPT)) { + WT_ASSERT(session, btree->ckpt == NULL); __checkpoint_update_generation(session); return (0); } @@ -567,8 +553,11 @@ __checkpoint_verbose_track(WT_SESSION_IMPL *session, static void __checkpoint_fail_reset(WT_SESSION_IMPL *session) { - S2BT(session)->modified = true; - S2BT(session)->ckpt = NULL; + WT_BTREE *btree; + + btree = S2BT(session); + btree->modified = true; + __wt_meta_ckptlist_free(session, &btree->ckpt); } /* @@ -600,6 +589,8 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, const char *cfg[]) */ WT_RET(__wt_txn_begin(session, txn_cfg)); + WT_DIAGNOSTIC_YIELD; + /* Ensure a transaction ID is allocated prior to sharing it globally */ WT_RET(__wt_txn_id_check(session)); @@ -1286,33 +1277,20 @@ __checkpoint_lock_dirty_tree(WT_SESSION_IMPL *session, } /* - * There are special files: those being bulk-loaded, salvaged, upgraded - * or verified during the checkpoint. We have to do something for those - * objects because a checkpoint is an external name the application can - * reference and the name must exist no matter what's happening during - * the checkpoint. For bulk-loaded files, we could block until the load - * completes, checkpoint the partial load, or magic up an empty-file - * checkpoint. The first is too slow, the second is insane, so do the - * third. - * Salvage, upgrade and verify don't currently require any work, all - * three hold the schema lock, blocking checkpoints. If we ever want to - * fix that (and I bet we eventually will, at least for verify), we can - * copy the last checkpoint the file has. That works if we guarantee - * salvage, upgrade and verify act on objects with previous checkpoints - * (true if handles are closed/re-opened between object creation and a - * subsequent salvage, upgrade or verify operation). Presumably, - * salvage and upgrade will discard all previous checkpoints when they - * complete, which is fine with us. This change will require reference - * counting checkpoints, and once that's done, we should use checkpoint - * copy instead of forcing checkpoints on clean objects to associate - * names with checkpoints. + * There are special tree: those being bulk-loaded, salvaged, upgraded + * or verified during the checkpoint. They should never be part of a + * checkpoint: we will fail to lock them because the operations have + * exclusive access to the handles. Named checkpoints will fail in that + * case, ordinary checkpoints will skip files that cannot be opened + * normally. */ WT_ASSERT(session, !is_checkpoint || !F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)); __wt_readunlock(session, &conn->hot_backup_lock); - WT_ASSERT(session, btree->ckpt == NULL); + WT_ASSERT(session, btree->ckpt == NULL && + !F_ISSET(btree, WT_BTREE_SKIP_CKPT)); btree->ckpt = ckptbase; return (0); @@ -1320,7 +1298,7 @@ __checkpoint_lock_dirty_tree(WT_SESSION_IMPL *session, err: if (hot_backup_locked) __wt_readunlock(session, &conn->hot_backup_lock); - __wt_meta_ckptlist_free(session, ckptbase); + __wt_meta_ckptlist_free(session, &ckptbase); __wt_free(session, name_alloc); return (ret); @@ -1554,8 +1532,7 @@ err: /* S2C(session)->modified = true; } - __wt_meta_ckptlist_free(session, ckptbase); - btree->ckpt = NULL; + __wt_meta_ckptlist_free(session, &btree->ckpt); return (ret); } |