summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@mongodb.com>2016-01-11 14:26:52 -0500
committerMichael Cahill <michael.cahill@mongodb.com>2016-01-11 14:26:52 -0500
commitcae5fcf57a7b13d742f687f3f064574a004d0537 (patch)
tree9c264d99c79268f79d81dc560fa2aafad200822a
parent2182e6291977a8bf6b9ad21561f86700b3117bad (diff)
parent547de638fd2abb752b510629bf61c8a23c6bf6e5 (diff)
downloadmongodb-3.1.7.tar.gz
WT-2193 Backport 3.0. Handle read-committed metadata checkpoints during snapshot transactions
-rw-r--r--src/btree/bt_sync.c27
-rw-r--r--src/conn/conn_open.c6
-rw-r--r--src/include/connection.h2
-rw-r--r--src/include/extern.h2
-rw-r--r--src/meta/meta_track.c64
5 files changed, 95 insertions, 6 deletions
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index 939d8f808b1..5d5ad491145 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -22,16 +22,17 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
WT_PAGE_MODIFY *mod;
WT_REF *walk;
WT_TXN *txn;
- uint64_t internal_bytes, leaf_bytes;
- uint64_t internal_pages, leaf_pages;
+ uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages;
+ uint64_t saved_snap_min;
uint32_t flags;
bool evict_reset;
btree = S2BT(session);
- flags = WT_READ_CACHE | WT_READ_NO_GEN;
walk = NULL;
txn = &session->txn;
+ saved_snap_min = WT_SESSION_TXN_STATE(session)->snap_min;
+ flags = WT_READ_CACHE | WT_READ_NO_GEN;
internal_bytes = leaf_bytes = 0;
internal_pages = leaf_pages = 0;
@@ -80,6 +81,19 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
break;
case WT_SYNC_CHECKPOINT:
/*
+ * If we are flushing a file at read-committed isolation, which
+ * is of particular interest for flushing the metadata to make
+ * schema-changing operation durable, get a transactional
+ * snapshot now.
+ *
+ * All changes committed up to this point should be included.
+ * We don't update the snapshot in between pages because (a)
+ * the metadata shouldn't be that big, and (b) if we do ever
+ */
+ if (txn->isolation == WT_ISO_READ_COMMITTED)
+ __wt_txn_get_snapshot(session);
+
+ /*
* We cannot check the tree modified flag in the case of a
* checkpoint, the checkpoint code has already cleared it.
*
@@ -185,7 +199,12 @@ err: /* On error, clear any left-over tree walk. */
if (walk != NULL)
WT_TRET(__wt_page_release(session, walk, flags));
- if (txn->isolation == WT_ISO_READ_COMMITTED && session->ncursors == 0)
+ /*
+ * If we got a snapshot in order to write pages, and there was no
+ * snapshot active when we started, release it.
+ */
+ if (txn->isolation == WT_ISO_READ_COMMITTED &&
+ saved_snap_min == WT_TXN_NONE)
__wt_txn_release_snapshot(session);
if (btree->checkpointing) {
diff --git a/src/conn/conn_open.c b/src/conn/conn_open.c
index c86a6e039eb..826226a1975 100644
--- a/src/conn/conn_open.c
+++ b/src/conn/conn_open.c
@@ -122,6 +122,9 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
/* Close open data handles. */
WT_TRET(__wt_conn_dhandle_discard(session));
+ /* Shut down metadata tracking, required before creating tables. */
+ WT_TRET(__wt_meta_track_destroy(session));
+
/*
* Now that all data handles are closed, tell logging that a checkpoint
* has completed then shut down the log manager (only after closing
@@ -254,6 +257,9 @@ __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[])
*/
WT_RET(__wt_logmgr_open(session));
+ /* Initialize metadata tracking, required before creating tables. */
+ WT_RET(__wt_meta_track_init(session));
+
/* Start the optional checkpoint thread. */
WT_RET(__wt_checkpoint_server_create(session, cfg));
diff --git a/src/include/connection.h b/src/include/connection.h
index 6870fcb1640..691cca436dc 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -334,6 +334,8 @@ struct __wt_connection_impl {
uint32_t log_prealloc; /* Log file pre-allocation */
uint32_t txn_logsync; /* Log sync configuration */
+ WT_SESSION_IMPL *meta_ckpt_session;/* Metadata checkpoint session */
+
WT_SESSION_IMPL *sweep_session; /* Handle sweep session */
wt_thread_t sweep_tid; /* Handle sweep thread */
int sweep_tid_set; /* Handle sweep thread set */
diff --git a/src/include/extern.h b/src/include/extern.h
index d30bb916e12..00f97c3d818 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -443,6 +443,8 @@ extern int __wt_meta_track_update(WT_SESSION_IMPL *session, const char *key);
extern int __wt_meta_track_fileop( WT_SESSION_IMPL *session, const char *olduri, const char *newuri);
extern int __wt_meta_track_drop( WT_SESSION_IMPL *session, const char *filename);
extern int __wt_meta_track_handle_lock(WT_SESSION_IMPL *session, bool created);
+extern int __wt_meta_track_init(WT_SESSION_IMPL *session);
+extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session);
extern int __wt_turtle_init(WT_SESSION_IMPL *session);
extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep);
extern int __wt_turtle_update( WT_SESSION_IMPL *session, const char *key, const char *value);
diff --git a/src/meta/meta_track.c b/src/meta/meta_track.c
index ff1173585b4..b8d7aa930ea 100644
--- a/src/meta/meta_track.c
+++ b/src/meta/meta_track.c
@@ -261,6 +261,7 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll)
{
WT_DECL_RET;
WT_META_TRACK *trk, *trk_orig;
+ WT_SESSION_IMPL *ckpt_session;
WT_ASSERT(session,
WT_META_TRACKING(session) && session->meta_track_nest > 0);
@@ -304,8 +305,18 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll)
session, false, WT_TXN_LOG_CKPT_SYNC, NULL));
WT_RET(ret);
} else {
- WT_WITH_DHANDLE(session, session->meta_dhandle,
- ret = __wt_checkpoint(session, NULL));
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ ckpt_session = S2C(session)->meta_ckpt_session;
+ /*
+ * If this operation is part of a running transaction, that
+ * should be included in the checkpoint.
+ */
+ ckpt_session->txn.id = session->txn.id;
+ F_SET(ckpt_session, WT_SESSION_LOCKED_SCHEMA);
+ WT_WITH_DHANDLE(ckpt_session, session->meta_dhandle, ret =
+ __wt_checkpoint(ckpt_session, NULL));
+ F_CLR(ckpt_session, WT_SESSION_LOCKED_SCHEMA);
+ ckpt_session->txn.id = WT_TXN_NONE;
WT_RET(ret);
WT_WITH_DHANDLE(session, session->meta_dhandle,
ret = __wt_checkpoint_sync(session, NULL));
@@ -473,3 +484,52 @@ __wt_meta_track_handle_lock(WT_SESSION_IMPL *session, bool created)
trk->created = created;
return (0);
}
+
+/*
+ * __wt_meta_track_init --
+ * Intialize metadata tracking.
+ */
+int
+__wt_meta_track_init(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) {
+ WT_RET(__wt_open_internal_session(conn,
+ "metadata-ckpt", false, false,
+ &conn->meta_ckpt_session));
+
+ /*
+ * Sessions default to read-committed isolation, we rely on
+ * that for the correctness of metadata checkpoints.
+ */
+ WT_ASSERT(session, conn->meta_ckpt_session->txn.isolation ==
+ WT_ISO_READ_COMMITTED);
+ }
+
+ return (0);
+}
+
+/*
+ * __wt_meta_track_destroy --
+ * Release resources allocated for metadata tracking.
+ */
+int
+__wt_meta_track_destroy(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+
+ conn = S2C(session);
+
+ /* Close the session used for metadata checkpoints. */
+ if (conn->meta_ckpt_session != NULL) {
+ wt_session = &conn->meta_ckpt_session->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ conn->meta_ckpt_session = NULL;
+ }
+
+ return (ret);
+}