diff options
author | Ramon Fernandez <ramon@mongodb.com> | 2016-01-12 11:51:53 -0500 |
---|---|---|
committer | Ramon Fernandez <ramon@mongodb.com> | 2016-01-12 11:59:22 -0500 |
commit | f23290ec159ca606a65da76654703f1210118726 (patch) | |
tree | bb826c9b77516ee3a67c9f9965569c1014ebfc56 | |
parent | 28fe476b7e7e8d43461f20384b7072cd83ad30b7 (diff) | |
download | mongo-f23290ec159ca606a65da76654703f1210118726.tar.gz |
Import wiredtiger-wiredtiger-mongodb-3.0.8-15-gcae5fcf.tar.gz from wiredtiger branch mongodb-3.0
ref: a1ddc5e..cae5fcf
WT-2193 Handle read-committed checkpoints during snapshot transactions
WT-2196 Fix size only statistics cursors with LSM
WT-2253 eviction could prioritize any page with a WT_READGEN_OLDEST generation
-rwxr-xr-x | src/third_party/wiredtiger/dist/s_copyright | 3 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/block/block_open.c | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_sync.c | 27 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/conn/conn_open.c | 6 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/evict/evict_lru.c | 5 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/connection.h | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/extern.h | 4 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/lsm/lsm_tree.c | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/meta/meta_track.c | 64 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/os_posix/os_filesize.c | 12 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/os_win/os_filesize.c | 17 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/schema/schema_stat.c | 26 |
12 files changed, 142 insertions, 28 deletions
diff --git a/src/third_party/wiredtiger/dist/s_copyright b/src/third_party/wiredtiger/dist/s_copyright index 73f45ffc4aa..3408784820d 100755 --- a/src/third_party/wiredtiger/dist/s_copyright +++ b/src/third_party/wiredtiger/dist/s_copyright @@ -1,5 +1,8 @@ #! /bin/sh +# Only run when building a release +test -z "$WT_RELEASE_BUILD" && exit 0 + # Check the copyrights. c1=__wt.1$$ diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c index 4f4b7c57279..5493d9a2a4c 100644 --- a/src/third_party/wiredtiger/src/block/block_open.c +++ b/src/third_party/wiredtiger/src/block/block_open.c @@ -426,7 +426,7 @@ __wt_block_manager_size( { wt_off_t filesize; - WT_RET(__wt_filesize_name(session, filename, &filesize)); + WT_RET(__wt_filesize_name(session, filename, false, &filesize)); WT_STAT_SET(stats, block_size, filesize); return (0); diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c index 939d8f808b1..5d5ad491145 100644 --- a/src/third_party/wiredtiger/src/btree/bt_sync.c +++ b/src/third_party/wiredtiger/src/btree/bt_sync.c @@ -22,16 +22,17 @@ __sync_file(WT_SESSION_IMPL *session, int syncop) WT_PAGE_MODIFY *mod; WT_REF *walk; WT_TXN *txn; - uint64_t internal_bytes, leaf_bytes; - uint64_t internal_pages, leaf_pages; + uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages; + uint64_t saved_snap_min; uint32_t flags; bool evict_reset; btree = S2BT(session); - flags = WT_READ_CACHE | WT_READ_NO_GEN; walk = NULL; txn = &session->txn; + saved_snap_min = WT_SESSION_TXN_STATE(session)->snap_min; + flags = WT_READ_CACHE | WT_READ_NO_GEN; internal_bytes = leaf_bytes = 0; internal_pages = leaf_pages = 0; @@ -80,6 +81,19 @@ __sync_file(WT_SESSION_IMPL *session, int syncop) break; case WT_SYNC_CHECKPOINT: /* + * If we are flushing a file at read-committed isolation, which + * is of particular interest for flushing the metadata to make + * schema-changing operation durable, get a transactional + * snapshot now. + * + * All changes committed up to this point should be included. + * We don't update the snapshot in between pages because (a) + * the metadata shouldn't be that big, and (b) if we do ever + */ + if (txn->isolation == WT_ISO_READ_COMMITTED) + __wt_txn_get_snapshot(session); + + /* * We cannot check the tree modified flag in the case of a * checkpoint, the checkpoint code has already cleared it. * @@ -185,7 +199,12 @@ err: /* On error, clear any left-over tree walk. */ if (walk != NULL) WT_TRET(__wt_page_release(session, walk, flags)); - if (txn->isolation == WT_ISO_READ_COMMITTED && session->ncursors == 0) + /* + * If we got a snapshot in order to write pages, and there was no + * snapshot active when we started, release it. + */ + if (txn->isolation == WT_ISO_READ_COMMITTED && + saved_snap_min == WT_TXN_NONE) __wt_txn_release_snapshot(session); if (btree->checkpointing) { diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c index c86a6e039eb..826226a1975 100644 --- a/src/third_party/wiredtiger/src/conn/conn_open.c +++ b/src/third_party/wiredtiger/src/conn/conn_open.c @@ -122,6 +122,9 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn) /* Close open data handles. */ WT_TRET(__wt_conn_dhandle_discard(session)); + /* Shut down metadata tracking, required before creating tables. */ + WT_TRET(__wt_meta_track_destroy(session)); + /* * Now that all data handles are closed, tell logging that a checkpoint * has completed then shut down the log manager (only after closing @@ -254,6 +257,9 @@ __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[]) */ WT_RET(__wt_logmgr_open(session)); + /* Initialize metadata tracking, required before creating tables. */ + WT_RET(__wt_meta_track_init(session)); + /* Start the optional checkpoint thread. */ WT_RET(__wt_checkpoint_server_create(session, cfg)); diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c index eec7c3d2f0d..2ae3dd6b9a5 100644 --- a/src/third_party/wiredtiger/src/evict/evict_lru.c +++ b/src/third_party/wiredtiger/src/evict/evict_lru.c @@ -36,6 +36,10 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry) page = entry->ref->page; + /* Any page set to the oldest generation should be discarded. */ + if (page->read_gen == WT_READGEN_OLDEST) + return (WT_READGEN_OLDEST); + /* Any empty page (leaf or internal), is a good choice. */ if (__wt_page_is_empty(page)) return (WT_READGEN_OLDEST); @@ -1221,6 +1225,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags) * eviction, skip anything that isn't marked. */ if (LF_ISSET(WT_EVICT_PASS_WOULD_BLOCK) && + page->memory_footprint < btree->maxmempage && page->read_gen != WT_READGEN_OLDEST) continue; diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h index 6870fcb1640..691cca436dc 100644 --- a/src/third_party/wiredtiger/src/include/connection.h +++ b/src/third_party/wiredtiger/src/include/connection.h @@ -334,6 +334,8 @@ struct __wt_connection_impl { uint32_t log_prealloc; /* Log file pre-allocation */ uint32_t txn_logsync; /* Log sync configuration */ + WT_SESSION_IMPL *meta_ckpt_session;/* Metadata checkpoint session */ + WT_SESSION_IMPL *sweep_session; /* Handle sweep session */ wt_thread_t sweep_tid; /* Handle sweep thread */ int sweep_tid_set; /* Handle sweep thread set */ diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index 845102ca428..00f97c3d818 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -443,6 +443,8 @@ extern int __wt_meta_track_update(WT_SESSION_IMPL *session, const char *key); extern int __wt_meta_track_fileop( WT_SESSION_IMPL *session, const char *olduri, const char *newuri); extern int __wt_meta_track_drop( WT_SESSION_IMPL *session, const char *filename); extern int __wt_meta_track_handle_lock(WT_SESSION_IMPL *session, bool created); +extern int __wt_meta_track_init(WT_SESSION_IMPL *session); +extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session); extern int __wt_turtle_init(WT_SESSION_IMPL *session); extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep); extern int __wt_turtle_update( WT_SESSION_IMPL *session, const char *key, const char *value); @@ -463,7 +465,7 @@ extern int __wt_exist(WT_SESSION_IMPL *session, const char *filename, bool *exis extern void __wt_fallocate_config(WT_SESSION_IMPL *session, WT_FH *fh); extern int __wt_fallocate( WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t offset, wt_off_t len); extern int __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep); -extern int __wt_filesize_name( WT_SESSION_IMPL *session, const char *filename, wt_off_t *sizep); +extern int __wt_filesize_name(WT_SESSION_IMPL *session, const char *filename, bool silent, wt_off_t *sizep); extern int __wt_bytelock(WT_FH *fhp, wt_off_t byte, bool lock); extern int __wt_directory_sync_fh(WT_SESSION_IMPL *session, WT_FH *fh); extern int __wt_directory_sync(WT_SESSION_IMPL *session, char *path); diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c index d3979da0da1..4beb5f11f83 100644 --- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c +++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c @@ -213,7 +213,7 @@ __wt_lsm_tree_set_chunk_size( if (!WT_PREFIX_SKIP(filename, "file:")) WT_RET_MSG(session, EINVAL, "Expected a 'file:' URI: %s", chunk->uri); - WT_RET(__wt_filesize_name(session, filename, &size)); + WT_RET(__wt_filesize_name(session, filename, false, &size)); chunk->size = (uint64_t)size; diff --git a/src/third_party/wiredtiger/src/meta/meta_track.c b/src/third_party/wiredtiger/src/meta/meta_track.c index ff1173585b4..b8d7aa930ea 100644 --- a/src/third_party/wiredtiger/src/meta/meta_track.c +++ b/src/third_party/wiredtiger/src/meta/meta_track.c @@ -261,6 +261,7 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll) { WT_DECL_RET; WT_META_TRACK *trk, *trk_orig; + WT_SESSION_IMPL *ckpt_session; WT_ASSERT(session, WT_META_TRACKING(session) && session->meta_track_nest > 0); @@ -304,8 +305,18 @@ __wt_meta_track_off(WT_SESSION_IMPL *session, bool need_sync, bool unroll) session, false, WT_TXN_LOG_CKPT_SYNC, NULL)); WT_RET(ret); } else { - WT_WITH_DHANDLE(session, session->meta_dhandle, - ret = __wt_checkpoint(session, NULL)); + WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA)); + ckpt_session = S2C(session)->meta_ckpt_session; + /* + * If this operation is part of a running transaction, that + * should be included in the checkpoint. + */ + ckpt_session->txn.id = session->txn.id; + F_SET(ckpt_session, WT_SESSION_LOCKED_SCHEMA); + WT_WITH_DHANDLE(ckpt_session, session->meta_dhandle, ret = + __wt_checkpoint(ckpt_session, NULL)); + F_CLR(ckpt_session, WT_SESSION_LOCKED_SCHEMA); + ckpt_session->txn.id = WT_TXN_NONE; WT_RET(ret); WT_WITH_DHANDLE(session, session->meta_dhandle, ret = __wt_checkpoint_sync(session, NULL)); @@ -473,3 +484,52 @@ __wt_meta_track_handle_lock(WT_SESSION_IMPL *session, bool created) trk->created = created; return (0); } + +/* + * __wt_meta_track_init -- + * Intialize metadata tracking. + */ +int +__wt_meta_track_init(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + + conn = S2C(session); + if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) { + WT_RET(__wt_open_internal_session(conn, + "metadata-ckpt", false, false, + &conn->meta_ckpt_session)); + + /* + * Sessions default to read-committed isolation, we rely on + * that for the correctness of metadata checkpoints. + */ + WT_ASSERT(session, conn->meta_ckpt_session->txn.isolation == + WT_ISO_READ_COMMITTED); + } + + return (0); +} + +/* + * __wt_meta_track_destroy -- + * Release resources allocated for metadata tracking. + */ +int +__wt_meta_track_destroy(WT_SESSION_IMPL *session) +{ + WT_CONNECTION_IMPL *conn; + WT_DECL_RET; + WT_SESSION *wt_session; + + conn = S2C(session); + + /* Close the session used for metadata checkpoints. */ + if (conn->meta_ckpt_session != NULL) { + wt_session = &conn->meta_ckpt_session->iface; + WT_TRET(wt_session->close(wt_session, NULL)); + conn->meta_ckpt_session = NULL; + } + + return (ret); +} diff --git a/src/third_party/wiredtiger/src/os_posix/os_filesize.c b/src/third_party/wiredtiger/src/os_posix/os_filesize.c index b01fc91514b..c58f73b0665 100644 --- a/src/third_party/wiredtiger/src/os_posix/os_filesize.c +++ b/src/third_party/wiredtiger/src/os_posix/os_filesize.c @@ -34,8 +34,8 @@ __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) * Return the size of a file in bytes, given a file name. */ int -__wt_filesize_name( - WT_SESSION_IMPL *session, const char *filename, wt_off_t *sizep) +__wt_filesize_name(WT_SESSION_IMPL *session, + const char *filename, bool silent, wt_off_t *sizep) { struct stat sb; WT_DECL_RET; @@ -52,5 +52,11 @@ __wt_filesize_name( return (0); } - WT_RET_MSG(session, ret, "%s: fstat", filename); + /* + * Some callers of this function expect failure if the file doesn't + * exist, and don't want an error message logged. + */ + if (!silent) + WT_RET_MSG(session, ret, "%s: fstat", filename); + return (ret); } diff --git a/src/third_party/wiredtiger/src/os_win/os_filesize.c b/src/third_party/wiredtiger/src/os_win/os_filesize.c index dfeadc31fc4..7f231b5ba9a 100644 --- a/src/third_party/wiredtiger/src/os_win/os_filesize.c +++ b/src/third_party/wiredtiger/src/os_win/os_filesize.c @@ -15,8 +15,8 @@ int __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) { - WT_DECL_RET; LARGE_INTEGER size; + WT_DECL_RET; WT_RET(__wt_verbose( session, WT_VERB_FILEOPS, "%s: GetFileSizeEx", fh->name)); @@ -34,11 +34,11 @@ __wt_filesize(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t *sizep) * Return the size of a file in bytes, given a file name. */ int -__wt_filesize_name( - WT_SESSION_IMPL *session, const char *filename, wt_off_t *sizep) +__wt_filesize_name(WT_SESSION_IMPL *session, + const char *filename, bool silent, wt_off_t *sizep) { - WT_DECL_RET; WIN32_FILE_ATTRIBUTE_DATA data; + WT_DECL_RET; char *path; WT_RET(__wt_filename(session, filename, &path)); @@ -53,5 +53,12 @@ __wt_filesize_name( return (0); } - WT_RET_MSG(session, __wt_errno(), "%s: GetFileAttributesEx", filename); + /* + * Some callers of this function expect failure if the file doesn't + * exist, and don't want an error message logged. + */ + ret = __wt_errno(); + if (!silent) + WT_RET_MSG(session, ret, "%s: GetFileAttributesEx", filename); + return (ret); } diff --git a/src/third_party/wiredtiger/src/schema/schema_stat.c b/src/third_party/wiredtiger/src/schema/schema_stat.c index dba1dfe5f55..0b8c2c2951a 100644 --- a/src/third_party/wiredtiger/src/schema/schema_stat.c +++ b/src/third_party/wiredtiger/src/schema/schema_stat.c @@ -89,19 +89,23 @@ __curstat_size_only(WT_SESSION_IMPL *session, /* Build up the file name from the table URI. */ WT_ERR(__wt_buf_fmt( session, &namebuf, "%s.wt", uri + strlen("table:"))); + /* - * Get the size of the underlying file. There is nothing stopping a - * race with schema level table operations (for example drop) if there - * is a race there will be an error message generated. + * Get the size of the underlying file. This will fail for anything + * other than simple tables (LSM for example) and will fail if there + * are concurrent schema level operations (for example drop). That is + * fine - failing here results in falling back to the slow path of + * opening the handle. + * !!! Deliberately discard the return code from a failed call - the + * error is flagged by not setting fast to true. */ - WT_ERR(__wt_filesize_name(session, namebuf.data, &filesize)); - - /* Setup and populate the statistics structure */ - __wt_stat_init_dsrc_stats(&cst->u.dsrc_stats); - WT_STAT_SET(&cst->u.dsrc_stats, block_size, filesize); - __wt_curstat_dsrc_final(cst); - - *was_fast = true; + if (__wt_filesize_name(session, namebuf.data, true, &filesize) == 0) { + /* Setup and populate the statistics structure */ + __wt_stat_init_dsrc_stats(&cst->u.dsrc_stats); + WT_STAT_SET(&cst->u.dsrc_stats, block_size, filesize); + __wt_curstat_dsrc_final(cst); + *was_fast = true; + } err: __wt_free(session, tableconf); __wt_buf_free(session, &namebuf); |