summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@mongodb.com>2017-03-13 17:13:31 +1100
committerAlex Gorrod <alexander.gorrod@mongodb.com>2017-03-13 17:13:31 +1100
commitf72c78b74d42c9e89bc98ad56ba184536e8efcae (patch)
treefce94d8bca734bbd4ec9155fe88ded6fb25cf096
parent1e05438f426c0c54a603f660fb7831eb2b9a523e (diff)
downloadmongo-f72c78b74d42c9e89bc98ad56ba184536e8efcae.tar.gz
WT-3207 Fix a leak if a checkpoint fails. (#3329)
Also switch to holding the schema lock when completing a bulk load. This avoids a race with checkpoints starting, so avoids a failure mode that was added to checkpoint earlier in this ticket. Assert that we don't hit that case instead.
-rw-r--r--src/btree/bt_vrfy.c2
-rw-r--r--src/conn/conn_dhandle.c10
-rw-r--r--src/include/extern.h2
-rw-r--r--src/meta/meta_ckpt.c10
-rw-r--r--src/meta/meta_ext.c2
-rw-r--r--src/session/session_dhandle.c10
-rw-r--r--src/session/session_salvage.c2
-rw-r--r--src/txn/txn_ckpt.c75
8 files changed, 50 insertions, 63 deletions
diff --git a/src/btree/bt_vrfy.c b/src/btree/bt_vrfy.c
index 3c90e580696..7475811adc5 100644
--- a/src/btree/bt_vrfy.c
+++ b/src/btree/bt_vrfy.c
@@ -274,7 +274,7 @@ err: /* Inform the underlying block manager we're done. */
/* Discard the list of checkpoints. */
if (ckptbase != NULL)
- __wt_meta_ckptlist_free(session, ckptbase);
+ __wt_meta_ckptlist_free(session, &ckptbase);
/* Free allocated memory. */
__wt_scr_free(session, &vs->max_key);
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c
index 25795a8d309..6c8d66d63f8 100644
--- a/src/conn/conn_dhandle.c
+++ b/src/conn/conn_dhandle.c
@@ -152,11 +152,11 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force)
WT_RET(__wt_evict_file_exclusive_on(session));
/*
- * If we don't already have the schema lock, make it an error to try
- * to acquire it. The problem is that we are holding an exclusive
- * lock on the handle, and if we attempt to acquire the schema lock
- * we might deadlock with a thread that has the schema lock and wants
- * a handle lock (specifically, checkpoint).
+ * If we don't already have the schema lock, make it an error to try to
+ * acquire it. The problem is that we are holding an exclusive lock on
+ * the handle, and if we attempt to acquire the schema lock we might
+ * deadlock with a thread that has the schema lock and wants a handle
+ * lock.
*/
no_schema_lock = false;
if (!F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)) {
diff --git a/src/include/extern.h b/src/include/extern.h
index d0c9655fafb..db718966426 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -462,7 +462,7 @@ extern int __wt_meta_checkpoint_last_name( WT_SESSION_IMPL *session, const char
extern int __wt_meta_checkpoint_clear(WT_SESSION_IMPL *session, const char *fname) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern int __wt_meta_ckptlist_get( WT_SESSION_IMPL *session, const char *fname, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern int __wt_meta_ckptlist_set(WT_SESSION_IMPL *session, const char *fname, WT_CKPT *ckptbase, WT_LSN *ckptlsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
-extern void __wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT *ckptbase) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
+extern void __wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT **ckptbasep) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern void __wt_meta_checkpoint_free(WT_SESSION_IMPL *session, WT_CKPT *ckpt) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern int __wt_ext_metadata_insert(WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
extern int __wt_ext_metadata_remove( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("hidden")));
diff --git a/src/meta/meta_ckpt.c b/src/meta/meta_ckpt.c
index b985104c2eb..151bbe0e081 100644
--- a/src/meta/meta_ckpt.c
+++ b/src/meta/meta_ckpt.c
@@ -297,7 +297,7 @@ __wt_meta_ckptlist_get(
*ckptbasep = ckptbase;
if (0) {
-err: __wt_meta_ckptlist_free(session, ckptbase);
+err: __wt_meta_ckptlist_free(session, &ckptbase);
}
__wt_free(session, config);
__wt_scr_free(session, &buf);
@@ -463,16 +463,16 @@ err: __wt_scr_free(session, &buf);
* Discard the checkpoint array.
*/
void
-__wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT *ckptbase)
+__wt_meta_ckptlist_free(WT_SESSION_IMPL *session, WT_CKPT **ckptbasep)
{
- WT_CKPT *ckpt;
+ WT_CKPT *ckpt, *ckptbase;
- if (ckptbase == NULL)
+ if ((ckptbase = *ckptbasep) == NULL)
return;
WT_CKPT_FOREACH(ckptbase, ckpt)
__wt_meta_checkpoint_free(session, ckpt);
- __wt_free(session, ckptbase);
+ __wt_free(session, *ckptbasep);
}
/*
diff --git a/src/meta/meta_ext.c b/src/meta/meta_ext.c
index 50e7568fe77..aa1ea8b974d 100644
--- a/src/meta/meta_ext.c
+++ b/src/meta/meta_ext.c
@@ -102,5 +102,5 @@ void
__wt_metadata_free_ckptlist(WT_SESSION *session, WT_CKPT *ckptbase)
WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
{
- __wt_meta_ckptlist_free((WT_SESSION_IMPL *)session, ckptbase);
+ __wt_meta_ckptlist_free((WT_SESSION_IMPL *)session, &ckptbase);
}
diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c
index 7c96dd8b8a8..95fb6a6f90e 100644
--- a/src/session/session_dhandle.c
+++ b/src/session/session_dhandle.c
@@ -270,6 +270,16 @@ __wt_session_release_btree(WT_SESSION_IMPL *session)
if (F_ISSET(dhandle, WT_DHANDLE_DISCARD_FORCE)) {
ret = __wt_conn_btree_sync_and_close(session, false, true);
F_CLR(dhandle, WT_DHANDLE_DISCARD_FORCE);
+ } else if (F_ISSET(btree, WT_BTREE_BULK)) {
+ WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE) &&
+ !F_ISSET(dhandle, WT_DHANDLE_DISCARD));
+ /*
+ * Acquire the schema lock while completing a bulk load. This
+ * avoids racing with a checkpoint while it gathers a set
+ * of handles.
+ */
+ WT_WITH_SCHEMA_LOCK(session, ret =
+ __wt_conn_btree_sync_and_close(session, false, false));
} else if (F_ISSET(dhandle, WT_DHANDLE_DISCARD) ||
F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS)) {
WT_ASSERT(session, F_ISSET(dhandle, WT_DHANDLE_EXCLUSIVE));
diff --git a/src/session/session_salvage.c b/src/session/session_salvage.c
index 983b28dd8ea..12ce71cdbb0 100644
--- a/src/session/session_salvage.c
+++ b/src/session/session_salvage.c
@@ -54,6 +54,6 @@ __wt_salvage(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_meta_ckptlist_set(
session, dhandle->name, ckptbase, NULL));
-err: __wt_meta_ckptlist_free(session, ckptbase);
+err: __wt_meta_ckptlist_free(session, &ckptbase);
return (ret);
}
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index 3eb07089b87..748f4aa2473 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -267,10 +267,9 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[])
{
WT_BTREE *btree;
WT_CONFIG_ITEM cval;
- WT_CURSOR *meta_cursor;
WT_DECL_RET;
const char *name;
- bool force, metadata_race;
+ bool force;
btree = S2BT(session);
@@ -291,6 +290,7 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[])
if (F_ISSET(btree, WT_BTREE_NO_CHECKPOINT))
return (0);
+#ifdef HAVE_DIAGNOSTIC
/*
* We may have raced between starting the checkpoint transaction and
* some operation completing on the handle that updated the metadata
@@ -298,10 +298,12 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[])
* exclusive access to the handle or hold the schema lock. We are now
* holding the schema lock and have an open btree handle, so if we
* can't update the metadata, then there has been some state change
- * invisible to the checkpoint transaction. Skip checkpointing such
- * files: they must have a recent durable point.
+ * invisible to the checkpoint transaction.
*/
if (!WT_IS_METADATA(session->dhandle)) {
+ WT_CURSOR *meta_cursor;
+ bool metadata_race;
+
WT_ASSERT(session, !F_ISSET(&session->txn, WT_TXN_ERROR));
WT_RET(__wt_metadata_cursor(session, &meta_cursor));
meta_cursor->set_key(meta_cursor, session->dhandle->name);
@@ -313,26 +315,9 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[])
metadata_race = false;
WT_TRET(__wt_metadata_cursor_release(session, &meta_cursor));
WT_RET(ret);
- if (metadata_race) {
- /*
- * The conflict registers as a rollback error: that can
- * safely be skipped here.
- */
- F_CLR(&session->txn, WT_TXN_ERROR);
- if (force) {
- WT_RET(__wt_msg(session,
- "forced or named checkpoint raced with "
- "a metadata update"));
- return (EBUSY);
- }
- __wt_verbose(session, WT_VERB_CHECKPOINT,
- "skipped checkpoint of %s with metadata conflict",
- session->dhandle->name);
- F_SET(btree, WT_BTREE_SKIP_CKPT);
- __checkpoint_update_generation(session);
- return (0);
- }
+ WT_ASSERT(session, !metadata_race);
}
+#endif
/*
* Decide whether the tree needs to be included in the checkpoint and
@@ -342,6 +327,7 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[])
session, true, force, true, cfg));
WT_RET(ret);
if (F_ISSET(btree, WT_BTREE_SKIP_CKPT)) {
+ WT_ASSERT(session, btree->ckpt == NULL);
__checkpoint_update_generation(session);
return (0);
}
@@ -567,8 +553,11 @@ __checkpoint_verbose_track(WT_SESSION_IMPL *session,
static void
__checkpoint_fail_reset(WT_SESSION_IMPL *session)
{
- S2BT(session)->modified = true;
- S2BT(session)->ckpt = NULL;
+ WT_BTREE *btree;
+
+ btree = S2BT(session);
+ btree->modified = true;
+ __wt_meta_ckptlist_free(session, &btree->ckpt);
}
/*
@@ -600,6 +589,8 @@ __checkpoint_prepare(WT_SESSION_IMPL *session, const char *cfg[])
*/
WT_RET(__wt_txn_begin(session, txn_cfg));
+ WT_DIAGNOSTIC_YIELD;
+
/* Ensure a transaction ID is allocated prior to sharing it globally */
WT_RET(__wt_txn_id_check(session));
@@ -1286,33 +1277,20 @@ __checkpoint_lock_dirty_tree(WT_SESSION_IMPL *session,
}
/*
- * There are special files: those being bulk-loaded, salvaged, upgraded
- * or verified during the checkpoint. We have to do something for those
- * objects because a checkpoint is an external name the application can
- * reference and the name must exist no matter what's happening during
- * the checkpoint. For bulk-loaded files, we could block until the load
- * completes, checkpoint the partial load, or magic up an empty-file
- * checkpoint. The first is too slow, the second is insane, so do the
- * third.
- * Salvage, upgrade and verify don't currently require any work, all
- * three hold the schema lock, blocking checkpoints. If we ever want to
- * fix that (and I bet we eventually will, at least for verify), we can
- * copy the last checkpoint the file has. That works if we guarantee
- * salvage, upgrade and verify act on objects with previous checkpoints
- * (true if handles are closed/re-opened between object creation and a
- * subsequent salvage, upgrade or verify operation). Presumably,
- * salvage and upgrade will discard all previous checkpoints when they
- * complete, which is fine with us. This change will require reference
- * counting checkpoints, and once that's done, we should use checkpoint
- * copy instead of forcing checkpoints on clean objects to associate
- * names with checkpoints.
+ * There are special tree: those being bulk-loaded, salvaged, upgraded
+ * or verified during the checkpoint. They should never be part of a
+ * checkpoint: we will fail to lock them because the operations have
+ * exclusive access to the handles. Named checkpoints will fail in that
+ * case, ordinary checkpoints will skip files that cannot be opened
+ * normally.
*/
WT_ASSERT(session,
!is_checkpoint || !F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS));
__wt_readunlock(session, &conn->hot_backup_lock);
- WT_ASSERT(session, btree->ckpt == NULL);
+ WT_ASSERT(session, btree->ckpt == NULL &&
+ !F_ISSET(btree, WT_BTREE_SKIP_CKPT));
btree->ckpt = ckptbase;
return (0);
@@ -1320,7 +1298,7 @@ __checkpoint_lock_dirty_tree(WT_SESSION_IMPL *session,
err: if (hot_backup_locked)
__wt_readunlock(session, &conn->hot_backup_lock);
- __wt_meta_ckptlist_free(session, ckptbase);
+ __wt_meta_ckptlist_free(session, &ckptbase);
__wt_free(session, name_alloc);
return (ret);
@@ -1554,8 +1532,7 @@ err: /*
S2C(session)->modified = true;
}
- __wt_meta_ckptlist_free(session, ckptbase);
- btree->ckpt = NULL;
+ __wt_meta_ckptlist_free(session, &btree->ckpt);
return (ret);
}