summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRamon Fernandez <ramon@mongodb.com>2016-01-12 11:51:53 -0500
committerRamon Fernandez <ramon@mongodb.com>2016-01-12 11:59:22 -0500
commitf23290ec159ca606a65da76654703f1210118726 (patch)
treebb826c9b77516ee3a67c9f9965569c1014ebfc56
parent28fe476b7e7e8d43461f20384b7072cd83ad30b7 (diff)
downloadmongo-f23290ec159ca606a65da76654703f1210118726.tar.gz
Import wiredtiger-wiredtiger-mongodb-3.0.8-15-gcae5fcf.tar.gz from wiredtiger branch mongodb-3.0
ref: a1ddc5e..cae5fcf WT-2193 Handle read-committed checkpoints during snapshot transactions WT-2196 Fix size only statistics cursors with LSM WT-2253 eviction could prioritize any page with a WT_READGEN_OLDEST generation
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_copyright3
-rw-r--r--src/third_party/wiredtiger/src/block/block_open.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c27
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_open.c6
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c5
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h2
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h4
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_tree.c2
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_track.c64
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_filesize.c12
-rw-r--r--src/third_party/wiredtiger/src/os_win/os_filesize.c17
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_stat.c26
12 files changed, 142 insertions, 28 deletions
diff --git a/src/third_party/wiredtiger/dist/s_copyright b/src/third_party/wiredtiger/dist/s_copyright
index 73f45ffc4aa..3408784820d 100755
--- a/src/third_party/wiredtiger/dist/s_copyright
+++ b/src/third_party/wiredtiger/dist/s_copyright
@@ -1,5 +1,8 @@
#! /bin/sh
+# Only run when building a release
+test -z "$WT_RELEASE_BUILD" && exit 0
+
# Check the copyrights.
c1=__wt.1$$
diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c
index 4f4b7c57279..5493d9a2a4c 100644
--- a/src/third_party/wiredtiger/src/block/block_open.c
+++ b/src/third_party/wiredtiger/src/block/block_open.c
@@ -426,7 +426,7 @@ __wt_block_manager_size(
{
wt_off_t filesize;
- WT_RET(__wt_filesize_name(session, filename, &filesize));
+ WT_RET(__wt_filesize_name(session, filename, false, &filesize));
WT_STAT_SET(stats, block_size, filesize);
return (0);
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index 939d8f808b1..5d5ad491145 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -22,16 +22,17 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
WT_PAGE_MODIFY *mod;
WT_REF *walk;
WT_TXN *txn;
- uint64_t internal_bytes, leaf_bytes;
- uint64_t internal_pages, leaf_pages;
+ uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages;
+ uint64_t saved_snap_min;
uint32_t flags;
bool evict_reset;
btree = S2BT(session);
- flags = WT_READ_CACHE | WT_READ_NO_GEN;
walk = NULL;
txn = &session->txn;
+ saved_snap_min = WT_SESSION_TXN_STATE(session)->snap_min;
+ flags = WT_READ_CACHE | WT_READ_NO_GEN;
internal_bytes = leaf_bytes = 0;
internal_pages = leaf_pages = 0;
@@ -80,6 +81,19 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
break;
case WT_SYNC_CHECKPOINT:
/*
+ * If we are flushing a file at read-committed isolation, which
+ * is of particular interest for flushing the metadata to make
+ * schema-changing operation durable, get a transactional
+ * snapshot now.
+ *
+ * All changes committed up to this point should be included.
+ * We don't update the snapshot in between pages because (a)
+ * the metadata shouldn't be that big, and (b) if we do ever
+ */
+ if (txn->isolation == WT_ISO_READ_COMMITTED)
+ __wt_txn_get_snapshot(session);
+
+ /*
* We cannot check the tree modified flag in the case of a
* checkpoint, the checkpoint code has already cleared it.
*
@@ -185,7 +199,12 @@ err: /* On error, clear any left-over tree walk. */
if (walk != NULL)
WT_TRET(__wt_page_release(session, walk, flags));
- if (txn->isolation == WT_ISO_READ_COMMITTED && session->ncursors == 0)
+ /*
+ * If we got a snapshot in order to write pages, and there was no
+ * snapshot active when we started, release it.
+ */
+ if (txn->isolation == WT_ISO_READ_COMMITTED &&
+ saved_snap_min == WT_TXN_NONE)
__wt_txn_release_snapshot(session);
if (btree->checkpointing) {
diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c
index c86a6e039eb..826226a1975 100644
--- a/src/third_party/wiredtiger/src/conn/conn_open.c
+++ b/src/third_party/wiredtiger/src/conn/conn_open.c
@@ -122,6 +122,9 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
/* Close open data handles. */
WT_TRET(__wt_conn_dhandle_discard(session));
+ /* Shut down metadata tracking, required before creating tables. */
+ WT_TRET(__wt_meta_track_destroy(session));
+
/*
* Now that all data handles are closed, tell logging that a checkpoint
* has completed then shut down the log manager (only after closing
@@ -254,6 +257,9 @@ __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[])
*/
WT_RET(__wt_logmgr_open(session));
+ /* Initialize metadata tracking, required before creating tables. */
+ WT_RET(__wt_meta_track_init(session));
+
/* Start the optional checkpoint thread. */
WT_RET(__wt_checkpoint_server_create(session, cfg));
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index eec7c3d2f0d..2ae3dd6b9a5 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -36,6 +36,10 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry)
page = entry->ref->page;
+ /* Any page set to the oldest generation should be discarded. */
+ if (page->read_gen == WT_READGEN_OLDEST)
+ return (WT_READGEN_OLDEST);
+
/* Any empty page (leaf or internal), is a good choice. */
if (__wt_page_is_empty(page))
return (WT_READGEN_OLDEST);
@@ -1221,6 +1225,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
* eviction, skip anything that isn't marked.
*/
if (LF_ISSET(WT_EVICT_PASS_WOULD_BLOCK) &&
+ page->memory_footprint < btree->maxmempage &&
page->read_gen != WT_READGEN_OLDEST)
continue;
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index 6870fcb1640..691cca436dc 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -334,6 +334,8 @@ struct __wt_connection_impl {
uint32_t log_prealloc; /* Log file pre-allocation */
uint32_t txn_logsync; /* Log sync configuration */
+ WT_SESSION_IMPL *meta_ckpt_session;/* Metadata checkpoint session */
+
WT_SESSION_IMPL *sweep_session; /* Handle sweep session */
wt_thread_t sweep_tid; /* Handle sweep thread */
int sweep_tid_set; /* Handle sweep thread set */
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 845102ca428..00f97c3d818 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -443,6 +443,8 @@ extern int __wt_meta_track_update(WT_SESSION_IMPL *session, const char *key);
extern int __wt_meta_track_fileop( WT_SESSION_IMPL *session, const char *olduri, const char *newuri);
extern int __wt_meta_track_drop( WT_SESSION_IMPL *session, const char *filename);
extern int __wt_meta_track_handle_lock(WT_SESSION_IMPL *session, bool created);
+extern int __wt_meta_track_init(WT_SESSION_IMPL *session);
+extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session);
extern int __wt_turtle_init(WT_SESSION_IMPL *session);
extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep);
extern int __wt_turtle_update( WT_SESSION_IMPL *session, const char *key, const char *value);
@@ -463,7 +465,7 @@ extern int __wt_exist(WT_SESSION_IMPL *session, const char *filename, bool *exis
extern void __wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh);
extern int __wt_fallocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len);
extern int __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep);
-extern int __wt_filesize_name( WT_SESSION_IMPL *session, const char *filename, wt_off_t *sizep);
+extern int __wt_filesize_name(WT_SESSION_IMPL *session, const char *filename, bool silent, wt_off_t *sizep);
extern int __wt_bytelock(WT_FH *fhp, wt_off_t byte, bool lock);
extern int __wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh);
extern int __wt_directory_sync(WT_SESSION_IMPL *session, char *path);
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
index d3979da0da1..4beb5f11f83 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
@@ -213,7 +213,7 @@ __wt_lsm_tree_set_chunk_size(
if (!WT_PREFIX_SKIP(filename, "file:"))
WT_RET_MSG(session, EINVAL,
"Expected a 'file:' URI: %s", chunk->uri);
- WT_RET(__wt_filesize_name(session, filename, &size));
+ WT_RET(__wt_filesize_name(session, filename, false, &size));
chunk->size = (uint64_t)size;
diff --git a/src/third_party/wiredtiger/src/meta/meta_track.c b/src/third_party/wiredtiger/src/meta/meta_track.c
index ff1173585b4..b8d7aa930ea 100644
--- a/src/third_party/wiredtiger/src/meta/meta_track.c
+++ b/src/third_party/wiredtiger/src/meta/meta_track.c
@@ -261,6 +261,7 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll)
{
WT_DECL_RET;
WT_META_TRACK *trk, *trk_orig;
+ WT_SESSION_IMPL *ckpt_session;
WT_ASSERT(session,
WT_META_TRACKING(session) && session->meta_track_nest > 0);
@@ -304,8 +305,18 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll)
session, false, WT_TXN_LOG_CKPT_SYNC, NULL));
WT_RET(ret);
} else {
- WT_WITH_DHANDLE(session, session->meta_dhandle,
- ret = __wt_checkpoint(session, NULL));
+ WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
+ ckpt_session = S2C(session)->meta_ckpt_session;
+ /*
+ * If this operation is part of a running transaction, that
+ * should be included in the checkpoint.
+ */
+ ckpt_session->txn.id = session->txn.id;
+ F_SET(ckpt_session, WT_SESSION_LOCKED_SCHEMA);
+ WT_WITH_DHANDLE(ckpt_session, session->meta_dhandle, ret =
+ __wt_checkpoint(ckpt_session, NULL));
+ F_CLR(ckpt_session, WT_SESSION_LOCKED_SCHEMA);
+ ckpt_session->txn.id = WT_TXN_NONE;
WT_RET(ret);
WT_WITH_DHANDLE(session, session->meta_dhandle,
ret = __wt_checkpoint_sync(session, NULL));
@@ -473,3 +484,52 @@ __wt_meta_track_handle_lock(WT_SESSION_IMPL *session, bool created)
trk->created = created;
return (0);
}
+
+/*
+ * __wt_meta_track_init --
+ * Intialize metadata tracking.
+ */
+int
+__wt_meta_track_init(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+
+ conn = S2C(session);
+ if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) {
+ WT_RET(__wt_open_internal_session(conn,
+ "metadata-ckpt", false, false,
+ &conn->meta_ckpt_session));
+
+ /*
+ * Sessions default to read-committed isolation, we rely on
+ * that for the correctness of metadata checkpoints.
+ */
+ WT_ASSERT(session, conn->meta_ckpt_session->txn.isolation ==
+ WT_ISO_READ_COMMITTED);
+ }
+
+ return (0);
+}
+
+/*
+ * __wt_meta_track_destroy --
+ * Release resources allocated for metadata tracking.
+ */
+int
+__wt_meta_track_destroy(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+ WT_SESSION *wt_session;
+
+ conn = S2C(session);
+
+ /* Close the session used for metadata checkpoints. */
+ if (conn->meta_ckpt_session != NULL) {
+ wt_session = &conn->meta_ckpt_session->iface;
+ WT_TRET(wt_session->close(wt_session, NULL));
+ conn->meta_ckpt_session = NULL;
+ }
+
+ return (ret);
+}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_filesize.c b/src/third_party/wiredtiger/src/os_posix/os_filesize.c
index b01fc91514b..c58f73b0665 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_filesize.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_filesize.c
@@ -34,8 +34,8 @@ __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
* Return the size of a file in bytes, given a file name.
*/
int
-__wt_filesize_name(
- WT_SESSION_IMPL *session, const char *filename, wt_off_t *sizep)
+__wt_filesize_name(WT_SESSION_IMPL *session,
+ const char *filename, bool silent, wt_off_t *sizep)
{
struct stat sb;
WT_DECL_RET;
@@ -52,5 +52,11 @@ __wt_filesize_name(
return (0);
}
- WT_RET_MSG(session, ret, "%s: fstat", filename);
+ /*
+ * Some callers of this function expect failure if the file doesn't
+ * exist, and don't want an error message logged.
+ */
+ if (!silent)
+ WT_RET_MSG(session, ret, "%s: fstat", filename);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/os_win/os_filesize.c b/src/third_party/wiredtiger/src/os_win/os_filesize.c
index dfeadc31fc4..7f231b5ba9a 100644
--- a/src/third_party/wiredtiger/src/os_win/os_filesize.c
+++ b/src/third_party/wiredtiger/src/os_win/os_filesize.c
@@ -15,8 +15,8 @@
int
__wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
{
- WT_DECL_RET;
LARGE_INTEGER size;
+ WT_DECL_RET;
WT_RET(__wt_verbose(
session, WT_VERB_FILEOPS, "%s: GetFileSizeEx", fh->name));
@@ -34,11 +34,11 @@ __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep)
* Return the size of a file in bytes, given a file name.
*/
int
-__wt_filesize_name(
- WT_SESSION_IMPL *session, const char *filename, wt_off_t *sizep)
+__wt_filesize_name(WT_SESSION_IMPL *session,
+ const char *filename, bool silent, wt_off_t *sizep)
{
- WT_DECL_RET;
WIN32_FILE_ATTRIBUTE_DATA data;
+ WT_DECL_RET;
char *path;
WT_RET(__wt_filename(session, filename, &path));
@@ -53,5 +53,12 @@ __wt_filesize_name(
return (0);
}
- WT_RET_MSG(session, __wt_errno(), "%s: GetFileAttributesEx", filename);
+ /*
+ * Some callers of this function expect failure if the file doesn't
+ * exist, and don't want an error message logged.
+ */
+ ret = __wt_errno();
+ if (!silent)
+ WT_RET_MSG(session, ret, "%s: GetFileAttributesEx", filename);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_stat.c b/src/third_party/wiredtiger/src/schema/schema_stat.c
index dba1dfe5f55..0b8c2c2951a 100644
--- a/src/third_party/wiredtiger/src/schema/schema_stat.c
+++ b/src/third_party/wiredtiger/src/schema/schema_stat.c
@@ -89,19 +89,23 @@ __curstat_size_only(WT_SESSION_IMPL *session,
/* Build up the file name from the table URI. */
WT_ERR(__wt_buf_fmt(
session, &namebuf, "%s.wt", uri + strlen("table:")));
+
/*
- * Get the size of the underlying file. There is nothing stopping a
- * race with schema level table operations (for example drop) if there
- * is a race there will be an error message generated.
+ * Get the size of the underlying file. This will fail for anything
+ * other than simple tables (LSM for example) and will fail if there
+ * are concurrent schema level operations (for example drop). That is
+ * fine - failing here results in falling back to the slow path of
+ * opening the handle.
+ * !!! Deliberately discard the return code from a failed call - the
+ * error is flagged by not setting fast to true.
*/
- WT_ERR(__wt_filesize_name(session, namebuf.data, &filesize));
-
- /* Setup and populate the statistics structure */
- __wt_stat_init_dsrc_stats(&cst->u.dsrc_stats);
- WT_STAT_SET(&cst->u.dsrc_stats, block_size, filesize);
- __wt_curstat_dsrc_final(cst);
-
- *was_fast = true;
+ if (__wt_filesize_name(session, namebuf.data, true, &filesize) == 0) {
+ /* Setup and populate the statistics structure */
+ __wt_stat_init_dsrc_stats(&cst->u.dsrc_stats);
+ WT_STAT_SET(&cst->u.dsrc_stats, block_size, filesize);
+ __wt_curstat_dsrc_final(cst);
+ *was_fast = true;
+ }
err: __wt_free(session, tableconf);
__wt_buf_free(session, &namebuf);