summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@mongodb.com>2015-07-17 22:40:41 +1000
committerMichael Cahill <michael.cahill@mongodb.com>2015-07-17 22:40:41 +1000
commitd7e9b92a8117edab8869c132a4f7bfae3d3ff2ff (patch)
tree662253d1f3793c5119e5a9fdb608a3aa520621b6 /src/third_party/wiredtiger/src
parent5170a33c1a3632cff838c4b9291938cc3a4ad41c (diff)
downloadmongo-d7e9b92a8117edab8869c132a4f7bfae3d3ff2ff.tar.gz
Import wiredtiger-wiredtiger-2.6.1-284-g42823c9.tar.gz from wiredtiger branch mongodb-3.2
Diffstat (limited to 'src/third_party/wiredtiger/src')
-rw-r--r--src/third_party/wiredtiger/src/block/block_open.c14
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c16
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c23
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c2
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache_pool.c122
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_handle.c5
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_log.c18
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_sweep.c15
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_backup.c8
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_metadata.c75
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c4
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c3
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h5
-rw-r--r--src/third_party/wiredtiger/src/include/cache.h12
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h2
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.h3
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h3
-rw-r--r--src/third_party/wiredtiger/src/include/intpack.i1
-rw-r--r--src/third_party/wiredtiger/src/include/log.h15
-rw-r--r--src/third_party/wiredtiger/src/include/packing.i23
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h1
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h5
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i31
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in160
-rw-r--r--src/third_party/wiredtiger/src/log/log.c24
-rw-r--r--src/third_party/wiredtiger/src/log/log_slot.c101
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_track.c63
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c86
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_create.c16
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c21
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c2
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c58
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c95
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_dump.c123
36 files changed, 646 insertions, 513 deletions
diff --git a/src/third_party/wiredtiger/src/block/block_open.c b/src/third_party/wiredtiger/src/block/block_open.c
index 8e45ec85a97..df42a14816f 100644
--- a/src/third_party/wiredtiger/src/block/block_open.c
+++ b/src/third_party/wiredtiger/src/block/block_open.c
@@ -100,11 +100,10 @@ __wt_block_manager_create(
WT_TRET(__wt_close(session, &fh));
/*
- * If checkpoint syncing is enabled, some filesystems require that we
- * sync the directory to be confident that the file will appear.
+ * Some filesystems require that we sync the directory to be confident
+ * that the file will appear.
*/
- if (ret == 0 && F_ISSET(S2C(session), WT_CONN_CKPT_SYNC) &&
- (ret = __wt_filename(session, filename, &path)) == 0) {
+ if (ret == 0 && (ret = __wt_filename(session, filename, &path)) == 0) {
ret = __wt_directory_sync(session, path);
__wt_free(session, path);
}
@@ -180,10 +179,10 @@ __wt_block_open(WT_SESSION_IMPL *session,
WT_DECL_RET;
uint64_t bucket, hash;
- WT_TRET(__wt_verbose(session, WT_VERB_BLOCK, "open: %s", filename));
+ WT_RET(__wt_verbose(session, WT_VERB_BLOCK, "open: %s", filename));
conn = S2C(session);
- *blockp = NULL;
+ *blockp = block = NULL;
hash = __wt_hash_city64(filename, strlen(filename));
bucket = hash % WT_HASH_ARRAY_SIZE;
__wt_spin_lock(session, &conn->block_lock);
@@ -264,7 +263,8 @@ __wt_block_open(WT_SESSION_IMPL *session,
__wt_spin_unlock(session, &conn->block_lock);
return (0);
-err: WT_TRET(__block_destroy(session, block));
+err: if (block != NULL)
+ WT_TRET(__block_destroy(session, block));
__wt_spin_unlock(session, &conn->block_lock);
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index 4625865fbf7..77d80cdb3a2 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -334,6 +334,8 @@ __wt_debug_disk(
if (F_ISSET(dsk, WT_PAGE_COMPRESSED))
__dmsg(ds, ", compressed");
+ if (F_ISSET(dsk, WT_PAGE_ENCRYPTED))
+ __dmsg(ds, ", encrypted");
if (F_ISSET(dsk, WT_PAGE_EMPTY_V_ALL))
__dmsg(ds, ", empty-all");
if (F_ISSET(dsk, WT_PAGE_EMPTY_V_NONE))
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index f08909a4b85..86edd992b28 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -21,7 +21,7 @@ static int __inmem_row_leaf_entries(
* Check if a page matches the criteria for forced eviction.
*/
static int
-__evict_force_check(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
+__evict_force_check(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_BTREE *btree;
@@ -35,10 +35,6 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
if (WT_PAGE_IS_INTERNAL(page))
return (0);
- /* Eviction may be turned off. */
- if (LF_ISSET(WT_READ_NO_EVICT) || F_ISSET(btree, WT_BTREE_NO_EVICTION))
- return (0);
-
/*
* It's hard to imagine a page with a huge memory footprint that has
* never been modified, but check to be sure.
@@ -68,11 +64,14 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
#endif
)
{
+ WT_BTREE *btree;
WT_DECL_RET;
WT_PAGE *page;
u_int sleep_cnt, wait_cnt;
int busy, cache_work, force_attempts, oldgen;
+ btree = S2BT(session);
+
for (force_attempts = oldgen = 0, wait_cnt = 0;;) {
switch (ref->state) {
case WT_REF_DISK:
@@ -115,7 +114,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
* be evicting if no hazard pointer is required, we're
* done.
*/
- if (F_ISSET(S2BT(session), WT_BTREE_IN_MEMORY))
+ if (F_ISSET(btree, WT_BTREE_IN_MEMORY))
goto skip_evict;
/*
@@ -140,7 +139,8 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
* the page's generation number. If eviction isn't being
* done on this file, we're done.
*/
- if (F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION))
+ if (LF_ISSET(WT_READ_NO_EVICT) ||
+ F_ISSET(btree, WT_BTREE_NO_EVICTION))
goto skip_evict;
/*
@@ -148,7 +148,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
*/
page = ref->page;
if (force_attempts < 10 &&
- __evict_force_check(session, page, flags)) {
+ __evict_force_check(session, page)) {
++force_attempts;
ret = __wt_page_release_evict(session, ref);
/* If forced eviction fails, stall. */
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 6f27e077109..dbd4042129d 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -753,7 +753,7 @@ __split_multi_inmem(
/*
* We modified the page above, which will have set the first dirty
- * transaction to the last transaction current running. However, the
+ * transaction to the last transaction currently running. However, the
* updates we installed may be older than that. Set the first dirty
* transaction to an impossibly old value so this page is never skipped
* in a checkpoint.
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index 6c5b1fb98e8..838d778dadf 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -109,17 +109,6 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
/* Write all dirty in-cache pages. */
flags |= WT_READ_NO_EVICT;
for (walk = NULL;;) {
- /*
- * If we have a page, and it was ever modified, track
- * the highest transaction ID in the tree. We do this
- * here because we want the value after reconciling
- * dirty pages.
- */
- if (walk != NULL && walk->page != NULL &&
- (mod = walk->page->modify) != NULL &&
- WT_TXNID_LT(btree->rec_max_txn, mod->rec_max_txn))
- btree->rec_max_txn = mod->rec_max_txn;
-
WT_ERR(__wt_tree_walk(session, &walk, NULL, flags));
if (walk == NULL)
break;
@@ -190,6 +179,18 @@ err: /* On error, clear any left-over tree walk. */
if (btree->checkpointing) {
/*
+ * Update the checkpoint generation for this handle so visible
+ * updates newer than the checkpoint can be evicted.
+ *
+ * This has to be published before eviction is enabled again,
+ * so that eviction knows that the checkpoint has completed.
+ */
+ WT_PUBLISH(btree->checkpoint_gen,
+ S2C(session)->txn_global.checkpoint_gen);
+ WT_STAT_FAST_DATA_SET(session,
+ btree_checkpoint_generation, btree->checkpoint_gen);
+
+ /*
* Clear the checkpoint flag and push the change; not required,
* but publishing the change means stalled eviction gets moving
* as soon as possible.
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index f7b65a8f73d..73837c46ee8 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -57,7 +57,7 @@ static const WT_CONFIG_CHECK
confchk_wiredtiger_open_file_manager_subconfigs[] = {
{ "close_handle_minimum", "int", NULL, "min=0", NULL, 0 },
{ "close_idle_time", "int",
- NULL, "min=1,max=100000",
+ NULL, "min=0,max=100000",
NULL, 0 },
{ "close_scan_interval", "int",
NULL, "min=1,max=100000",
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
index de7e9e3486f..fdc95a32387 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
@@ -11,15 +11,29 @@
/*
* Tuning constants.
*/
-/* Threshold when a connection is allocated more cache */
-#define WT_CACHE_POOL_BUMP_THRESHOLD 6
-/* Threshold when a connection is allocated less cache */
-#define WT_CACHE_POOL_REDUCE_THRESHOLD 2
+/*
+ * Threshold when a connection is allocated more cache, as a percentage of
+ * the amount of pressure the busiest participant has.
+ */
+#define WT_CACHE_POOL_BUMP_THRESHOLD 60
+/*
+ * Threshold when a connection is allocated less cache, as a percentage of
+ * the amount of pressure the busiest participant has.
+ */
+#define WT_CACHE_POOL_REDUCE_THRESHOLD 20
/* Balancing passes after a bump before a connection is a candidate. */
#define WT_CACHE_POOL_BUMP_SKIPS 10
/* Balancing passes after a reduction before a connection is a candidate. */
#define WT_CACHE_POOL_REDUCE_SKIPS 5
+/*
+ * Constants that control how much influence different metrics have on
+ * the pressure calculation.
+ */
+#define WT_CACHE_POOL_APP_EVICT_MULTIPLIER 10
+#define WT_CACHE_POOL_APP_WAIT_MULTIPLIER 50
+#define WT_CACHE_POOL_READ_MULTIPLIER 1
+
static int __cache_pool_adjust(WT_SESSION_IMPL *, uint64_t, uint64_t, int *);
static int __cache_pool_assess(WT_SESSION_IMPL *, uint64_t *);
static int __cache_pool_balance(WT_SESSION_IMPL *);
@@ -441,10 +455,12 @@ __cache_pool_assess(WT_SESSION_IMPL *session, uint64_t *phighest)
WT_CACHE_POOL *cp;
WT_CACHE *cache;
WT_CONNECTION_IMPL *entry;
- uint64_t entries, highest, new;
+ uint64_t app_evicts, app_waits, reads;
+ uint64_t entries, highest, tmp;
cp = __wt_process.cache_pool;
- entries = highest = 0;
+ entries = 0;
+ highest = 1; /* Avoid divide by zero */
/* Generate read pressure information. */
TAILQ_FOREACH(entry, &cp->cache_pool_qh, cpq) {
@@ -453,22 +469,54 @@ __cache_pool_assess(WT_SESSION_IMPL *session, uint64_t *phighest)
continue;
cache = entry->cache;
++entries;
- new = cache->bytes_read;
- /* Handle wrapping of eviction requests. */
- if (new >= cache->cp_saved_read)
- cache->cp_current_read = new - cache->cp_saved_read;
+
+ /*
+ * Figure out a delta since the last time we did an assessment
+ * for each metric we are tracking. Watch out for wrapping
+ * of values.
+ */
+ tmp = cache->bytes_read;
+ if (tmp >= cache->cp_saved_read)
+ reads = tmp - cache->cp_saved_read;
else
- cache->cp_current_read = new;
- cache->cp_saved_read = new;
- if (cache->cp_current_read > highest)
- highest = cache->cp_current_read;
+ reads = (UINT64_MAX - cache->cp_saved_read) + tmp;
+ cache->cp_saved_read = tmp;
+
+ /* Update the application eviction count information */
+ tmp = cache->app_evicts;
+ if (tmp >= cache->cp_saved_app_evicts)
+ app_evicts = tmp - cache->cp_saved_app_evicts;
+ else
+ app_evicts =
+ (UINT64_MAX - cache->cp_saved_app_evicts) + tmp;
+ cache->cp_saved_app_evicts = tmp;
+
+ /* Update the eviction wait information */
+ tmp = cache->app_waits;
+ if (tmp >= cache->cp_saved_app_waits)
+ app_waits = tmp - cache->cp_saved_app_waits;
+ else
+ app_waits =
+ (UINT64_MAX - cache->cp_saved_app_waits) + tmp;
+ cache->cp_saved_app_waits = tmp;
+
+ /* Calculate the weighted pressure for this member */
+ cache->cp_pass_pressure =
+ (app_evicts * WT_CACHE_POOL_APP_EVICT_MULTIPLIER) +
+ (app_waits * WT_CACHE_POOL_APP_WAIT_MULTIPLIER) +
+ (reads * WT_CACHE_POOL_READ_MULTIPLIER);
+
+ if (cache->cp_pass_pressure > highest)
+ highest = cache->cp_pass_pressure;
+
+ WT_RET(__wt_verbose(session, WT_VERB_SHARED_CACHE,
+ "Assess entry. reads: %" PRIu64 ", app evicts: %" PRIu64
+ ", app waits: %" PRIu64 ", pressure: %" PRIu64,
+ reads, app_evicts, app_waits, cache->cp_pass_pressure));
}
WT_RET(__wt_verbose(session, WT_VERB_SHARED_CACHE,
"Highest eviction count: %" PRIu64 ", entries: %" PRIu64,
highest, entries));
- /* Normalize eviction information across connections. */
- highest = highest / (entries + 1);
- ++highest; /* Avoid divide by zero. */
*phighest = highest;
return (0);
@@ -487,18 +535,21 @@ __cache_pool_adjust(WT_SESSION_IMPL *session,
WT_CACHE_POOL *cp;
WT_CACHE *cache;
WT_CONNECTION_IMPL *entry;
- uint64_t adjusted, reserved, read_pressure;
+ uint64_t adjusted, highest_percentile, pressure, reserved;
int force, grew;
*adjustedp = 0;
cp = __wt_process.cache_pool;
force = (cp->currently_used > cp->size);
grew = 0;
+ /* Highest as a percentage, avoid 0 */
+ highest_percentile = (highest / 100) + 1;
+
if (WT_VERBOSE_ISSET(session, WT_VERB_SHARED_CACHE)) {
WT_RET(__wt_verbose(session,
WT_VERB_SHARED_CACHE, "Cache pool distribution: "));
WT_RET(__wt_verbose(session, WT_VERB_SHARED_CACHE,
- "\t" "cache_size, read_pressure, skips: "));
+ "\t" "cache_size, pressure, skips: "));
}
TAILQ_FOREACH(entry, &cp->cache_pool_qh, cpq) {
@@ -506,10 +557,17 @@ __cache_pool_adjust(WT_SESSION_IMPL *session,
reserved = cache->cp_reserved;
adjusted = 0;
- read_pressure = cache->cp_current_read / highest;
+ /*
+ * The read pressure is calculated as a percentage of how
+ * much read pressure there is on this participant compared
+ * to the participant with the most activity. The closer we
+ * are to the most active the more cache we should get
+ * assigned.
+ */
+ pressure = cache->cp_pass_pressure / highest_percentile;
WT_RET(__wt_verbose(session, WT_VERB_SHARED_CACHE,
"\t%" PRIu64 ", %" PRIu64 ", %" PRIu32,
- entry->cache_size, read_pressure, cache->cp_skip_count));
+ entry->cache_size, pressure, cache->cp_skip_count));
/* Allow to stabilize after changes. */
if (cache->cp_skip_count > 0 && --cache->cp_skip_count > 0)
@@ -523,6 +581,7 @@ __cache_pool_adjust(WT_SESSION_IMPL *session,
if (entry->cache_size < reserved) {
grew = 1;
adjusted = reserved - entry->cache_size;
+
/*
* Conditions for reducing the amount of resources for an
* entry:
@@ -534,9 +593,9 @@ __cache_pool_adjust(WT_SESSION_IMPL *session,
* space in the pool.
*/
} else if ((force && entry->cache_size > reserved) ||
- (read_pressure < WT_CACHE_POOL_REDUCE_THRESHOLD &&
- highest > 1 && entry->cache_size > reserved &&
- cp->currently_used >= cp->size)) {
+ (pressure < WT_CACHE_POOL_REDUCE_THRESHOLD &&
+ highest > 1 && entry->cache_size > reserved &&
+ cp->currently_used >= cp->size)) {
grew = 0;
/*
* Shrink by a chunk size if that doesn't drop us
@@ -553,14 +612,15 @@ __cache_pool_adjust(WT_SESSION_IMPL *session,
* - This entry is using less than the entire cache pool
* - The connection is using enough cache to require eviction
* - There is space available in the pool
- * - Additional cache would benefit the connection
+ * - Additional cache would benefit the connection OR
+ * - The pool is less than half distributed
*/
- } else if (highest > 1 &&
- entry->cache_size < cp->size &&
- cache->bytes_inmem >=
- (entry->cache_size * cache->eviction_target) / 100 &&
- cp->currently_used < cp->size &&
- read_pressure > bump_threshold) {
+ } else if (entry->cache_size < cp->size &&
+ __wt_cache_bytes_inuse(cache) >=
+ (entry->cache_size * cache->eviction_target) / 100 &&
+ ((cp->currently_used < cp->size &&
+ pressure > bump_threshold) ||
+ cp->currently_used < cp->size * 0.5)) {
grew = 1;
adjusted = WT_MIN(cp->chunk,
cp->size - cp->currently_used);
diff --git a/src/third_party/wiredtiger/src/conn/conn_handle.c b/src/third_party/wiredtiger/src/conn/conn_handle.c
index 0e7ab0a03d8..94e69897c1d 100644
--- a/src/third_party/wiredtiger/src/conn/conn_handle.c
+++ b/src/third_party/wiredtiger/src/conn/conn_handle.c
@@ -53,7 +53,8 @@ __wt_connection_init(WT_CONNECTION_IMPL *conn)
WT_RET(__wt_spin_init(session, &conn->dhandle_lock, "data handle"));
WT_RET(__wt_spin_init(session, &conn->encryptor_lock, "encryptor"));
WT_RET(__wt_spin_init(session, &conn->fh_lock, "file list"));
- WT_RET(__wt_spin_init(session, &conn->hot_backup_lock, "hot backup"));
+ WT_RET(__wt_rwlock_alloc(session,
+ &conn->hot_backup_lock, "hot backup"));
WT_RET(__wt_spin_init(session, &conn->reconfig_lock, "reconfigure"));
WT_RET(__wt_spin_init(session, &conn->schema_lock, "schema"));
WT_RET(__wt_spin_init(session, &conn->table_lock, "table creation"));
@@ -136,7 +137,7 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn)
__wt_spin_destroy(session, &conn->dhandle_lock);
__wt_spin_destroy(session, &conn->encryptor_lock);
__wt_spin_destroy(session, &conn->fh_lock);
- __wt_spin_destroy(session, &conn->hot_backup_lock);
+ WT_TRET(__wt_rwlock_destroy(session, &conn->hot_backup_lock));
__wt_spin_destroy(session, &conn->reconfig_lock);
__wt_spin_destroy(session, &conn->schema_lock);
__wt_spin_destroy(session, &conn->table_lock);
diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c
index bc80152f6bf..be7ce2e9344 100644
--- a/src/third_party/wiredtiger/src/conn/conn_log.c
+++ b/src/third_party/wiredtiger/src/conn/conn_log.c
@@ -139,7 +139,7 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file)
* We can only archive files if a hot backup is not in progress or
* if we are the backup.
*/
- __wt_spin_lock(session, &conn->hot_backup_lock);
+ WT_RET(__wt_readlock(session, conn->hot_backup_lock));
locked = 1;
if (conn->hot_backup == 0 || backup_file != 0) {
for (i = 0; i < logcount; i++) {
@@ -151,7 +151,7 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file)
}
}
}
- __wt_spin_unlock(session, &conn->hot_backup_lock);
+ WT_ERR(__wt_readunlock(session, conn->hot_backup_lock));
locked = 0;
__wt_log_files_free(session, logfiles, logcount);
logfiles = NULL;
@@ -167,7 +167,7 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file)
if (0)
err: __wt_err(session, ret, "log archive server error");
if (locked)
- __wt_spin_unlock(session, &conn->hot_backup_lock);
+ WT_TRET(__wt_readunlock(session, conn->hot_backup_lock));
if (logfiles != NULL)
__wt_log_files_free(session, logfiles, logcount);
return (ret);
@@ -207,9 +207,8 @@ __log_prealloc_once(WT_SESSION_IMPL *session)
if (log->prep_missed > 0) {
conn->log_prealloc += log->prep_missed;
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
- "Now pre-allocating up to %" PRIu32,
- conn->log_prealloc));
- log->prep_missed = 0;
+ "Missed %" PRIu32 ". Now pre-allocating up to %" PRIu32,
+ log->prep_missed, conn->log_prealloc));
}
WT_STAT_FAST_CONN_SET(session,
log_prealloc_max, conn->log_prealloc);
@@ -221,6 +220,13 @@ __log_prealloc_once(WT_SESSION_IMPL *session)
session, ++log->prep_fileid, WT_LOG_PREPNAME, 1));
WT_STAT_FAST_CONN_INCR(session, log_prealloc_files);
}
+ /*
+ * Reset the missed count now. If we missed during pre-allocating
+ * the log files, it means the allocation is not keeping up, not that
+ * we didn't allocate enough. So we don't just want to keep adding
+ * in more.
+ */
+ log->prep_missed = 0;
if (0)
err: __wt_err(session, ret, "log pre-alloc server error");
diff --git a/src/third_party/wiredtiger/src/conn/conn_sweep.c b/src/third_party/wiredtiger/src/conn/conn_sweep.c
index 08137c9c9ff..ec6f628a02e 100644
--- a/src/third_party/wiredtiger/src/conn/conn_sweep.c
+++ b/src/third_party/wiredtiger/src/conn/conn_sweep.c
@@ -35,7 +35,8 @@ __sweep_mark(WT_SESSION_IMPL *session, int *dead_handlesp)
continue;
}
if (dhandle->session_inuse != 0 ||
- now <= dhandle->timeofdeath + conn->sweep_idle_time)
+ now <= dhandle->timeofdeath + conn->sweep_idle_time ||
+ conn->sweep_idle_time == 0)
continue;
if (dhandle->timeofdeath == 0) {
dhandle->timeofdeath = now;
@@ -121,6 +122,10 @@ __sweep_expire(WT_SESSION_IMPL *session)
conn = S2C(session);
+ /* If sweep_idle_time is 0, then we won't expire any cursors */
+ if (conn->sweep_idle_time == 0)
+ return (0);
+
/* Don't discard handles that have been open recently. */
WT_RET(__wt_seconds(session, &now));
@@ -265,8 +270,14 @@ __sweep_server(void *arg)
*/
WT_ERR(__sweep_mark(session, &dead_handles));
+ /*
+ * We only want to flush and expire if there are no dead handles
+ * and if either the sweep_idle_time is not 0, or if we have
+ * reached the configured limit of handles.
+ */
if (dead_handles == 0 &&
- conn->open_file_count < conn->sweep_handles_min)
+ (conn->open_file_count < conn->sweep_handles_min ||
+ conn->sweep_idle_time != 0))
continue;
/* Close handles if we have reached the configured limit */
diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c
index 8f43e98e2f7..60d94697189 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_backup.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c
@@ -217,9 +217,9 @@ __backup_start(
* could start a hot backup that would race with an already-started
* checkpoint.
*/
- __wt_spin_lock(session, &conn->hot_backup_lock);
+ WT_RET(__wt_writelock(session, conn->hot_backup_lock));
conn->hot_backup = 1;
- __wt_spin_unlock(session, &conn->hot_backup_lock);
+ WT_ERR(__wt_writeunlock(session, conn->hot_backup_lock));
/* Create the hot backup file. */
WT_ERR(__backup_file_create(session, cb, 0));
@@ -318,9 +318,9 @@ __backup_stop(WT_SESSION_IMPL *session)
ret = __wt_backup_file_remove(session);
/* Checkpoint deletion can proceed, as can the next hot backup. */
- __wt_spin_lock(session, &conn->hot_backup_lock);
+ WT_TRET(__wt_writelock(session, conn->hot_backup_lock));
conn->hot_backup = 0;
- __wt_spin_unlock(session, &conn->hot_backup_lock);
+ WT_TRET(__wt_writeunlock(session, conn->hot_backup_lock));
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_metadata.c b/src/third_party/wiredtiger/src/cursor/cur_metadata.c
index 9860eb65a55..460c46c0d29 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_metadata.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_metadata.c
@@ -30,15 +30,42 @@
WT_CURSTD_VALUE_EXT); \
} while (0)
-#define WT_MD_SET_KEY_VALUE(c, mc, fc) do { \
- (c)->key.data = (fc)->key.data; \
- (c)->key.size = (fc)->key.size; \
- (c)->value.data = (fc)->value.data; \
- (c)->value.size = (fc)->value.size; \
- F_SET((c), WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT); \
- F_CLR((mc), WT_MDC_ONMETADATA); \
- F_SET((mc), WT_MDC_POSITIONED); \
-} while (0)
+/*
+ * __curmetadata_setkv --
+ * Copy key/value into the public cursor, stripping internal metadata for
+ * "create-only" cursors.
+ */
+static int
+__curmetadata_setkv(WT_CURSOR_METADATA *mdc, WT_CURSOR *fc)
+{
+ WT_CURSOR *c;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+ char *value;
+
+ c = &mdc->iface;
+ session = (WT_SESSION_IMPL *)c->session;
+
+ c->key.data = fc->key.data;
+ c->key.size = fc->key.size;
+ if (F_ISSET(mdc, WT_MDC_CREATEONLY)) {
+ WT_RET(__wt_schema_create_strip(
+ session, fc->value.data, NULL, &value));
+ ret = __wt_buf_set(
+ session, &c->value, value, strlen(value) + 1);
+ __wt_free(session, value);
+ WT_RET(ret);
+ } else {
+ c->value.data = fc->value.data;
+ c->value.size = fc->value.size;
+ }
+
+ F_SET(c, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
+ F_CLR(mdc, WT_MDC_ONMETADATA);
+ F_SET(mdc, WT_MDC_POSITIONED);
+
+ return (0);
+}
/*
* Check if a key matches the metadata. The public value is "metadata:",
@@ -57,17 +84,21 @@ __curmetadata_metadata_search(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
{
WT_CURSOR_METADATA *mdc;
WT_DECL_RET;
- char *value;
+ char *value, *stripped;
mdc = (WT_CURSOR_METADATA *)cursor;
/* The metadata search interface allocates a new string in value. */
WT_RET(__wt_metadata_search(session, WT_METAFILE_URI, &value));
- /*
- * Copy the value to the underlying btree cursor's tmp item which will
- * be freed when the cursor is closed.
- */
+ if (F_ISSET(mdc, WT_MDC_CREATEONLY)) {
+ ret = __wt_schema_create_strip(
+ session, value, NULL, &stripped);
+ __wt_free(session, value);
+ WT_RET(ret);
+ value = stripped;
+ }
+
ret = __wt_buf_setstr(session, &cursor->value, value);
__wt_free(session, value);
WT_RET(ret);
@@ -141,7 +172,7 @@ __curmetadata_next(WT_CURSOR *cursor)
WT_ERR(__curmetadata_metadata_search(session, cursor));
else {
WT_ERR(file_cursor->next(mdc->file_cursor));
- WT_MD_SET_KEY_VALUE(cursor, mdc, file_cursor);
+ WT_ERR(__curmetadata_setkv(mdc, file_cursor));
}
err: if (ret != 0) {
@@ -174,9 +205,9 @@ __curmetadata_prev(WT_CURSOR *cursor)
}
ret = file_cursor->prev(file_cursor);
- if (ret == 0) {
- WT_MD_SET_KEY_VALUE(cursor, mdc, file_cursor);
- } else if (ret == WT_NOTFOUND)
+ if (ret == 0)
+ WT_ERR(__curmetadata_setkv(mdc, file_cursor));
+ else if (ret == WT_NOTFOUND)
WT_ERR(__curmetadata_metadata_search(session, cursor));
err: if (ret != 0) {
@@ -234,7 +265,7 @@ __curmetadata_search(WT_CURSOR *cursor)
WT_ERR(__curmetadata_metadata_search(session, cursor));
else {
WT_ERR(file_cursor->search(file_cursor));
- WT_MD_SET_KEY_VALUE(cursor, mdc, file_cursor);
+ WT_ERR(__curmetadata_setkv(mdc, file_cursor));
}
err: if (ret != 0) {
@@ -268,7 +299,7 @@ __curmetadata_search_near(WT_CURSOR *cursor, int *exact)
*exact = 1;
} else {
WT_ERR(file_cursor->search_near(file_cursor, exact));
- WT_MD_SET_KEY_VALUE(cursor, mdc, file_cursor);
+ WT_ERR(__curmetadata_setkv(mdc, file_cursor));
}
err: if (ret != 0) {
@@ -438,6 +469,10 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session,
WT_ERR(__wt_cursor_init(cursor, uri, owner, cfg, cursorp));
+ /* If we are only returning create config, strip internal metadata. */
+ if (WT_STREQ(uri, "metadata:create"))
+ F_SET(mdc, WT_MDC_CREATEONLY);
+
/*
* Metadata cursors default to readonly; if not set to not-readonly,
* they are permanently readonly and cannot be reconfigured.
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 1bf62fc7130..513da401ae6 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -1475,7 +1475,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, int busy, int pct_full)
* to make sure there is free space in the cache.
*/
txn_global = &conn->txn_global;
- txn_state = &txn_global->states[session->id];
+ txn_state = WT_SESSION_TXN_STATE(session);
txn_busy = txn_state->id != WT_TXN_NONE ||
session->nhazard > 0 ||
(txn_state->snap_min != WT_TXN_NONE &&
@@ -1512,6 +1512,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, int busy, int pct_full)
q_found = 0;
switch (ret = __evict_page(session, 0)) {
case 0:
+ cache->app_evicts++;
if (--count == 0)
return (0);
@@ -1550,6 +1551,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, int busy, int pct_full)
WT_RET(
__wt_cond_wait(session, cache->evict_waiter_cond, 100000));
+ cache->app_waits++;
/* Check if things have changed so that we are busy. */
if (!busy && txn_state->snap_min != WT_TXN_NONE &&
txn_global->current != txn_global->oldest_id)
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 8680a644421..1e5faf45de2 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -59,6 +59,9 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int closing)
conn = S2C(session);
+ /* Checkpoints should never do eviction. */
+ WT_ASSERT(session, !WT_SESSION_IS_CHECKPOINT(session));
+
page = ref->page;
forced_eviction = page->read_gen == WT_READGEN_OLDEST;
inmem_split = 0;
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 4809d257e7e..f13504d66ca 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -195,6 +195,11 @@ struct __wt_page_modify {
/* The largest update transaction ID (approximate). */
uint64_t update_txn;
+#ifdef HAVE_DIAGNOSTIC
+ /* Check that transaction time moves forward. */
+ uint64_t last_oldest_id;
+#endif
+
/* Dirty bytes added to the cache. */
size_t bytes_dirty;
diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h
index cb7e66d2bbd..ed93f82538c 100644
--- a/src/third_party/wiredtiger/src/include/cache.h
+++ b/src/third_party/wiredtiger/src/include/cache.h
@@ -65,6 +65,9 @@ struct __wt_cache {
uint64_t pages_dirty;
uint64_t bytes_read; /* Bytes read into memory */
+ uint64_t app_evicts; /* Pages evicted by user threads */
+ uint64_t app_waits; /* User threads waited for cache */
+
uint64_t evict_max_page_size; /* Largest page seen at eviction */
/*
@@ -105,12 +108,15 @@ struct __wt_cache {
/*
* Cache pool information.
*/
- uint64_t cp_saved_read; /* Read count from last pass */
- uint64_t cp_current_read; /* Read count from current pass */
- uint32_t cp_skip_count; /* Post change stabilization */
+ uint64_t cp_pass_pressure; /* Calculated pressure from this pass */
uint64_t cp_reserved; /* Base size for this cache */
WT_SESSION_IMPL *cp_session; /* May be used for cache management */
+ uint32_t cp_skip_count; /* Post change stabilization */
wt_thread_t cp_tid; /* Thread ID for cache pool manager */
+ /* State seen at the last pass of the shared cache manager */
+ uint64_t cp_saved_app_evicts; /* User eviction count at last review */
+ uint64_t cp_saved_app_waits; /* User wait count at last review */
+ uint64_t cp_saved_read; /* Read count at last review */
/*
* Flags.
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index d6a2bb0b17a..cd55aadfc07 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -266,7 +266,7 @@ struct __wt_connection_impl {
WT_TXN_GLOBAL txn_global; /* Global transaction state */
- WT_SPINLOCK hot_backup_lock; /* Hot backup serialization */
+ WT_RWLOCK *hot_backup_lock; /* Hot backup serialization */
int hot_backup;
WT_SESSION_IMPL *ckpt_session; /* Checkpoint thread session */
diff --git a/src/third_party/wiredtiger/src/include/cursor.h b/src/third_party/wiredtiger/src/include/cursor.h
index 9b61318aacc..36f36f2c46c 100644
--- a/src/third_party/wiredtiger/src/include/cursor.h
+++ b/src/third_party/wiredtiger/src/include/cursor.h
@@ -291,8 +291,9 @@ struct __wt_cursor_metadata {
WT_CURSOR *file_cursor; /* Queries of regular metadata */
-#define WT_MDC_POSITIONED 0x01
+#define WT_MDC_CREATEONLY 0x01
#define WT_MDC_ONMETADATA 0x02
+#define WT_MDC_POSITIONED 0x04
uint32_t flags;
};
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index e17b309cf5d..87099ac839f 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -361,7 +361,6 @@ extern int __wt_log_slot_notify(WT_SESSION_IMPL *session, WT_LOGSLOT *slot);
extern int __wt_log_slot_wait(WT_SESSION_IMPL *session, WT_LOGSLOT *slot);
extern int64_t __wt_log_slot_release(WT_LOGSLOT *slot, uint64_t size);
extern int __wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot);
-extern int __wt_log_slot_grow_buffers(WT_SESSION_IMPL *session, size_t newsize);
extern int __wt_clsm_request_switch(WT_CURSOR_LSM *clsm);
extern int __wt_clsm_await_switch(WT_CURSOR_LSM *clsm);
extern int __wt_clsm_init_merge( WT_CURSOR *cursor, u_int start_chunk, uint32_t start_id, u_int nchunks);
@@ -533,6 +532,7 @@ extern int __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk);
extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk);
extern int __wt_bulk_insert_fix(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk);
extern int __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk);
+extern int __wt_schema_create_strip(WT_SESSION_IMPL *session, const char *v1, const char *v2, char **value_ret);
extern int __wt_direct_io_size_check(WT_SESSION_IMPL *session, const char **cfg, const char *config_name, uint32_t *allocsizep);
extern int __wt_schema_colgroup_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, const char *config, WT_ITEM *buf);
extern int __wt_schema_index_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, const char *config, WT_ITEM *buf);
@@ -575,7 +575,6 @@ extern int __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri, int (*f
extern int __wt_session_reset_cursors(WT_SESSION_IMPL *session, int free_buffers);
extern int __wt_session_copy_values(WT_SESSION_IMPL *session);
extern int __wt_open_cursor(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp);
-extern int __wt_session_create_strip(WT_SESSION *wt_session, const char *v1, const char *v2, char **value_ret);
extern int __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name, int uses_dhandles, int open_metadata, WT_SESSION_IMPL **sessionp);
extern int __wt_open_session(WT_CONNECTION_IMPL *conn, WT_EVENT_HANDLER *event_handler, const char *config, int open_metadata, WT_SESSION_IMPL **sessionp);
extern int __wt_compact_uri_analyze(WT_SESSION_IMPL *session, const char *uri, int *skip);
diff --git a/src/third_party/wiredtiger/src/include/intpack.i b/src/third_party/wiredtiger/src/include/intpack.i
index e1bcdb42ebd..d3fdfeaf1a6 100644
--- a/src/third_party/wiredtiger/src/include/intpack.i
+++ b/src/third_party/wiredtiger/src/include/intpack.i
@@ -300,7 +300,6 @@ __wt_vunpack_int(const uint8_t **pp, size_t maxlen, int64_t *xp)
*xp = (int64_t)(GET_BITS(*p++, 5, 0) << 8);
*xp |= *p++;
*xp += NEG_2BYTE_MIN;
- p += 2;
break;
case NEG_1BYTE_MARKER:
case NEG_1BYTE_MARKER | 0x10:
diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h
index 3de72b8b9a6..051f9fb262e 100644
--- a/src/third_party/wiredtiger/src/include/log.h
+++ b/src/third_party/wiredtiger/src/include/log.h
@@ -12,7 +12,7 @@
/* Logging subsystem declarations. */
#define WT_LOG_ALIGN 128
-#define WT_LOG_SLOT_BUF_INIT_SIZE 64 * 1024
+#define WT_LOG_SLOT_BUF_SIZE 256 * 1024
#define WT_INIT_LSN(l) do { \
(l)->file = 1; \
@@ -91,11 +91,10 @@ typedef WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) struct {
WT_ITEM slot_buf; /* Buffer for grouped writes */
int32_t slot_churn; /* Active slots are scarce. */
-#define WT_SLOT_BUF_GROW 0x01 /* Grow buffer on release */
-#define WT_SLOT_BUFFERED 0x02 /* Buffer writes */
-#define WT_SLOT_CLOSEFH 0x04 /* Close old fh on release */
-#define WT_SLOT_SYNC 0x08 /* Needs sync on release */
-#define WT_SLOT_SYNC_DIR 0x10 /* Directory sync on release */
+#define WT_SLOT_BUFFERED 0x01 /* Buffer writes */
+#define WT_SLOT_CLOSEFH 0x02 /* Close old fh on release */
+#define WT_SLOT_SYNC 0x04 /* Needs sync on release */
+#define WT_SLOT_SYNC_DIR 0x08 /* Directory sync on release */
uint32_t flags; /* Flags */
} WT_LOGSLOT;
@@ -117,6 +116,7 @@ typedef struct {
*/
uint32_t fileid; /* Current log file number */
uint32_t prep_fileid; /* Pre-allocated file number */
+ uint32_t tmp_fileid; /* Temporary file number */
uint32_t prep_missed; /* Pre-allocated file misses */
WT_FH *log_fh; /* Logging file handle */
WT_FH *log_close_fh; /* Logging file handle to close */
@@ -157,10 +157,11 @@ typedef struct {
* slot count of one.
*/
#define WT_SLOT_ACTIVE 1
-#define WT_SLOT_POOL 16
+#define WT_SLOT_POOL 128
uint32_t pool_index; /* Global pool index */
WT_LOGSLOT *slot_array[WT_SLOT_ACTIVE]; /* Active slots */
WT_LOGSLOT slot_pool[WT_SLOT_POOL]; /* Pool of all slots */
+ uint32_t slot_buf_size; /* Buffer size for slots */
#define WT_LOG_FORCE_CONSOLIDATE 0x01 /* Disable direct writes */
uint32_t flags;
diff --git a/src/third_party/wiredtiger/src/include/packing.i b/src/third_party/wiredtiger/src/include/packing.i
index b97b3a322ce..bf6b5abce67 100644
--- a/src/third_party/wiredtiger/src/include/packing.i
+++ b/src/third_party/wiredtiger/src/include/packing.i
@@ -181,6 +181,7 @@ next: if (pack->cur == pack->end)
/* Integral types repeat <size> times. */
if (pv->size == 0)
goto next;
+ pv->havesize = 0;
pack->repeats = pv->size - 1;
pack->lastv = *pv;
return (0);
@@ -322,18 +323,19 @@ __pack_write(
*pp += pv->size;
break;
case 's':
+ WT_SIZE_CHECK(pv->size, maxlen);
+ memcpy(*pp, pv->u.s, pv->size);
+ *pp += pv->size;
+ break;
case 'S':
- /*
- * XXX if pv->havesize, only want to know if there is a
- * '\0' in the first pv->size characters.
- */
s = strlen(pv->u.s);
- if ((pv->type == 's' || pv->havesize) && pv->size < s) {
- s = pv->size;
- pad = 0;
- } else if (pv->havesize)
- pad = pv->size - s;
- else
+ if (pv->havesize) {
+ if (pv->size < s) {
+ s = pv->size;
+ pad = 0;
+ } else
+ pad = pv->size - s;
+ } else
pad = 1;
WT_SIZE_CHECK(s + pad, maxlen);
if (s > 0)
@@ -665,6 +667,7 @@ __wt_struct_unpackv(WT_SESSION_IMPL *session,
if (fmt[0] != '\0' && fmt[1] == '\0') {
pv.type = fmt[0];
+ pv.size = 1;
if ((ret = __unpack_read(session, &pv, &p, size)) == 0)
WT_UNPACK_PUT(session, pv, ap);
return (0);
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 728c8c9fe8e..f05d87c058b 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -203,7 +203,6 @@ struct __wt_connection_stats {
WT_STATS dh_session_handles;
WT_STATS dh_session_sweeps;
WT_STATS file_open;
- WT_STATS log_buffer_grow;
WT_STATS log_buffer_size;
WT_STATS log_bytes_payload;
WT_STATS log_bytes_written;
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 85c11e19685..7a67f713244 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -25,6 +25,9 @@
#define WT_SESSION_TXN_STATE(s) (&S2C(s)->txn_global.states[(s)->id])
+#define WT_SESSION_IS_CHECKPOINT(s) \
+ ((s)->id != 0 && (s)->id == S2C(s)->txn_global.checkpoint_id)
+
struct __wt_named_snapshot {
const char *name;
@@ -64,7 +67,7 @@ struct __wt_txn_global {
*/
volatile uint32_t checkpoint_id; /* Checkpoint's session ID */
volatile uint64_t checkpoint_gen;
- volatile uint64_t checkpoint_snap_min;
+ volatile uint64_t checkpoint_pinned;
/* Named snapshot state. */
WT_RWLOCK *nsnap_rwlock;
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 1e3afbd4df3..a9b54d26e47 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -105,19 +105,20 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
WT_TXN_GLOBAL *txn_global;
- uint64_t checkpoint_snap_min, oldest_id;
- uint32_t checkpoint_id;
+ uint64_t checkpoint_gen, checkpoint_pinned, oldest_id;
txn_global = &S2C(session)->txn_global;
btree = S2BT_SAFE(session);
/*
* Take a local copy of these IDs in case they are updated while we are
- * checking visibility.
+ * checking visibility. Only the generation needs to be carefully
+ * ordered: if a checkpoint is starting and the generation is bumped,
+ * we take the minimum of the other two IDs, which is what we want.
*/
- checkpoint_id = txn_global->checkpoint_id;
- checkpoint_snap_min = txn_global->checkpoint_snap_min;
oldest_id = txn_global->oldest_id;
+ WT_ORDERED_READ(checkpoint_gen, txn_global->checkpoint_gen);
+ checkpoint_pinned = txn_global->checkpoint_pinned;
/*
* Checkpoint transactions often fall behind ordinary application
@@ -129,17 +130,13 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session)
* checkpoint, or this handle is up to date with the active checkpoint
* then it's safe to ignore the checkpoint ID in the visibility check.
*/
- if (checkpoint_snap_min != WT_TXN_NONE &&
- checkpoint_id != session->id && (btree == NULL ||
- btree->checkpoint_gen != txn_global->checkpoint_gen) &&
- WT_TXNID_LT(checkpoint_snap_min, oldest_id))
- /*
- * Use the checkpoint ID for the visibility check if it is the
- * oldest ID in the system.
- */
- oldest_id = checkpoint_snap_min;
+ if (checkpoint_pinned == WT_TXN_NONE ||
+ WT_TXNID_LT(oldest_id, checkpoint_pinned) ||
+ WT_SESSION_IS_CHECKPOINT(session) ||
+ (btree != NULL && btree->checkpoint_gen == checkpoint_gen))
+ return (oldest_id);
- return (oldest_id);
+ return (checkpoint_pinned);
}
/*
@@ -355,7 +352,7 @@ __wt_txn_id_check(WT_SESSION_IMPL *session)
if (!F_ISSET(txn, WT_TXN_HAS_ID)) {
conn = S2C(session);
txn_global = &conn->txn_global;
- txn_state = &txn_global->states[session->id];
+ txn_state = WT_SESSION_TXN_STATE(session);
WT_ASSERT(session, txn_state->id == WT_TXN_NONE);
@@ -447,7 +444,7 @@ __wt_txn_cursor_op(WT_SESSION_IMPL *session)
txn = &session->txn;
txn_global = &S2C(session)->txn_global;
- txn_state = &txn_global->states[session->id];
+ txn_state = WT_SESSION_TXN_STATE(session);
/*
* If there is no transaction running (so we don't have an ID), and no
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 77cccfcf9d3..096fea3eeb3 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -1721,8 +1721,9 @@ struct __wt_connection {
* handles open before the file manager will look for handles to close.,
* an integer greater than or equal to 0; default \c 250.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;close_idle_time, amount of time in
- * seconds a file handle needs to be idle before attempting to close
- * it., an integer between 1 and 100000; default \c 30.}
+ * seconds a file handle needs to be idle before attempting to close it.
+ * A setting of 0 means that idle handles are not closed., an integer
+ * between 0 and 100000; default \c 30.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;close_scan_interval, interval in
* seconds at which to check for files that are inactive and close
* them., an integer between 1 and 100000; default \c 10.}
@@ -2152,11 +2153,12 @@ struct __wt_connection {
* before the file manager will look for handles to close., an integer greater
* than or equal to 0; default \c 250.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;close_idle_time, amount of time in seconds a
- * file handle needs to be idle before attempting to close it., an integer
- * between 1 and 100000; default \c 30.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;close_scan_interval, interval in seconds at
- * which to check for files that are inactive and close them., an integer
- * between 1 and 100000; default \c 10.}
+ * file handle needs to be idle before attempting to close it. A setting of 0
+ * means that idle handles are not closed., an integer between 0 and 100000;
+ * default \c 30.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;close_scan_interval, interval
+ * in seconds at which to check for files that are inactive and close them., an
+ * integer between 1 and 100000; default \c 10.}
* @config{ ),,}
* @config{hazard_max, maximum number of simultaneous hazard pointers per
* session handle., an integer greater than or equal to 15; default \c 1000.}
@@ -3682,150 +3684,148 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_DH_SESSION_SWEEPS 1069
/*! connection: files currently open */
#define WT_STAT_CONN_FILE_OPEN 1070
-/*! log: log buffer size increases */
-#define WT_STAT_CONN_LOG_BUFFER_GROW 1071
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1072
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1071
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1073
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1072
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1074
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1073
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1075
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1074
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1076
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1075
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1077
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1076
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1078
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1077
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1079
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1078
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1080
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1079
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1081
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1080
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1082
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1081
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1083
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1082
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1084
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1083
/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1085
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1084
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1086
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1085
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1087
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1086
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1088
+#define WT_STAT_CONN_LOG_SCANS 1087
/*! log: consolidated slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1089
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1088
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1090
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1089
/*! log: consolidated slot joins */
-#define WT_STAT_CONN_LOG_SLOT_JOINS 1091
+#define WT_STAT_CONN_LOG_SLOT_JOINS 1090
/*! log: consolidated slot join races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1092
+#define WT_STAT_CONN_LOG_SLOT_RACES 1091
/*! log: slots selected for switching that were unavailable */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1093
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1092
/*! log: record size exceeded maximum */
-#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1094
+#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1093
/*! log: failed to find a slot large enough for record */
-#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1095
+#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1094
/*! log: consolidated slot join transitions */
-#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1096
+#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1095
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1097
+#define WT_STAT_CONN_LOG_SYNC 1096
/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1098
+#define WT_STAT_CONN_LOG_SYNC_DIR 1097
/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1099
+#define WT_STAT_CONN_LOG_WRITE_LSN 1098
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1100
+#define WT_STAT_CONN_LOG_WRITES 1099
/*! LSM: sleep for LSM checkpoint throttle */
-#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1101
+#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1100
/*! LSM: sleep for LSM merge throttle */
-#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1102
+#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1101
/*! LSM: rows merged in an LSM tree */
-#define WT_STAT_CONN_LSM_ROWS_MERGED 1103
+#define WT_STAT_CONN_LSM_ROWS_MERGED 1102
/*! LSM: application work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1104
+#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1103
/*! LSM: merge work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1105
+#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1104
/*! LSM: tree queue hit maximum */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1106
+#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1105
/*! LSM: switch work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1107
+#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1106
/*! LSM: tree maintenance operations scheduled */
-#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1108
+#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1107
/*! LSM: tree maintenance operations discarded */
-#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1109
+#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1108
/*! LSM: tree maintenance operations executed */
-#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1110
+#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1109
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1111
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1110
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1112
+#define WT_STAT_CONN_MEMORY_FREE 1111
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1113
+#define WT_STAT_CONN_MEMORY_GROW 1112
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1114
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1113
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1115
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1114
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1116
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1115
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1117
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1116
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1118
+#define WT_STAT_CONN_PAGE_SLEEP 1117
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1119
+#define WT_STAT_CONN_READ_IO 1118
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1120
+#define WT_STAT_CONN_REC_PAGES 1119
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1121
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1120
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1122
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1121
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1123
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1122
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1124
+#define WT_STAT_CONN_RWLOCK_READ 1123
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1125
+#define WT_STAT_CONN_RWLOCK_WRITE 1124
/*! session: open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1126
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1125
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1127
+#define WT_STAT_CONN_SESSION_OPEN 1126
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1128
+#define WT_STAT_CONN_TXN_BEGIN 1127
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1129
+#define WT_STAT_CONN_TXN_CHECKPOINT 1128
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1130
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1129
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1131
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1130
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1132
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1131
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1133
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1132
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1134
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1133
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1135
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1134
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1136
+#define WT_STAT_CONN_TXN_COMMIT 1135
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1137
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1136
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1138
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1137
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1139
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1138
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1140
+#define WT_STAT_CONN_TXN_ROLLBACK 1139
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1141
+#define WT_STAT_CONN_TXN_SYNC 1140
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1142
+#define WT_STAT_CONN_WRITE_IO 1141
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index 7776b68e3d2..77ae0383cbe 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -577,7 +577,6 @@ __log_file_header(
tmp.slot_fh = fh;
} else {
WT_ASSERT(session, fh == NULL);
- log->prep_missed++;
WT_ERR(__log_acquire(session, logrec->len, &tmp));
}
WT_ERR(__log_fill(session, &myslot, 1, buf, NULL));
@@ -777,25 +776,28 @@ __wt_log_allocfile(
WT_DECL_RET;
WT_FH *log_fh;
WT_LOG *log;
+ uint32_t tmp_id;
conn = S2C(session);
log = conn->log;
log_fh = NULL;
+
/*
* Preparing a log file entails creating a temporary file:
* - Writing the header.
* - Truncating to the offset of the first record.
* - Pre-allocating the file if needed.
- * - Renaming it to the pre-allocated file name.
+ * - Renaming it to the desired file name.
*/
WT_RET(__wt_scr_alloc(session, 0, &from_path));
WT_ERR(__wt_scr_alloc(session, 0, &to_path));
- WT_ERR(__log_filename(session, lognum, WT_LOG_TMPNAME, from_path));
+ tmp_id = WT_ATOMIC_ADD4(log->tmp_fileid, 1);
+ WT_ERR(__log_filename(session, tmp_id, WT_LOG_TMPNAME, from_path));
WT_ERR(__log_filename(session, lognum, dest, to_path));
/*
* Set up the temporary file.
*/
- WT_ERR(__log_openfile(session, 1, &log_fh, WT_LOG_TMPNAME, lognum));
+ WT_ERR(__log_openfile(session, 1, &log_fh, WT_LOG_TMPNAME, tmp_id));
WT_ERR(__log_file_header(session, log_fh, NULL, 1));
WT_ERR(__wt_ftruncate(session, log_fh, WT_LOG_FIRST_RECORD));
if (prealloc)
@@ -1245,9 +1247,12 @@ __wt_log_newfile(WT_SESSION_IMPL *session, int conn_create, int *created)
/*
* If we need to create the log file, do so now.
*/
- if (create_log && (ret = __wt_log_allocfile(
- session, log->fileid, WT_LOG_FILENAME, 0)) != 0)
- return (ret);
+ if (create_log) {
+ log->prep_missed++;
+ if ((ret = __wt_log_allocfile(
+ session, log->fileid, WT_LOG_FILENAME, 0)) != 0)
+ return (ret);
+ }
WT_RET(__log_openfile(session,
0, &log->log_fh, WT_LOG_FILENAME, log->fileid));
/*
@@ -1811,11 +1816,6 @@ __log_write_internal(WT_SESSION_IMPL *session, WT_ITEM *record, WT_LSN *lsnp,
session, record, lsnp, flags)) == EAGAIN)
;
WT_ERR(ret);
- /*
- * Increase the buffer size of any slots we can get access
- * to, so future consolidations are likely to succeed.
- */
- WT_ERR(__wt_log_slot_grow_buffers(session, 4 * rdup_len));
return (0);
}
WT_ERR(ret);
diff --git a/src/third_party/wiredtiger/src/log/log_slot.c b/src/third_party/wiredtiger/src/log/log_slot.c
index 741a8caf108..8723d492e13 100644
--- a/src/third_party/wiredtiger/src/log/log_slot.c
+++ b/src/third_party/wiredtiger/src/log/log_slot.c
@@ -54,13 +54,18 @@ __wt_log_slot_init(WT_SESSION_IMPL *session)
* Allocate memory for buffers now that the arrays are setup. Split
* this out to make error handling simpler.
*/
+ /*
+ * Cap the slot buffer to the log file size.
+ */
+ log->slot_buf_size = (uint32_t)WT_MIN(
+ conn->log_file_max, WT_LOG_SLOT_BUF_SIZE);
for (i = 0; i < WT_SLOT_POOL; i++) {
WT_ERR(__wt_buf_init(session,
- &log->slot_pool[i].slot_buf, WT_LOG_SLOT_BUF_INIT_SIZE));
+ &log->slot_pool[i].slot_buf, (size_t)log->slot_buf_size));
F_SET(&log->slot_pool[i], WT_SLOT_INIT_FLAGS);
}
WT_STAT_FAST_CONN_INCRV(session,
- log_buffer_size, WT_LOG_SLOT_BUF_INIT_SIZE * WT_SLOT_POOL);
+ log_buffer_size, log->slot_buf_size * WT_SLOT_POOL);
if (0) {
err: while (--i >= 0)
__wt_buf_free(session, &log->slot_pool[i].slot_buf);
@@ -101,12 +106,16 @@ __wt_log_slot_join(WT_SESSION_IMPL *session, uint64_t mysize,
WT_LOG *log;
WT_LOGSLOT *slot;
int64_t new_state, old_state;
- uint32_t allocated_slot, slot_grow_attempts;
+ uint32_t allocated_slot, slot_attempts;
conn = S2C(session);
log = conn->log;
- slot_grow_attempts = 0;
+ slot_attempts = 0;
+ if (mysize >= (uint64_t)log->slot_buf_size) {
+ WT_STAT_FAST_CONN_INCR(session, log_slot_toobig);
+ return (ENOMEM);
+ }
find_slot:
#if WT_SLOT_ACTIVE == 1
allocated_slot = 0;
@@ -146,12 +155,11 @@ join_slot:
goto find_slot;
}
/*
- * If the slot buffer isn't big enough to hold this update, mark
- * the slot for a buffer size increase and find another slot.
+ * If the slot buffer isn't big enough to hold this update, try
+ * to find another slot.
*/
if (new_state > (int64_t)slot->slot_buf.memsize) {
- F_SET(slot, WT_SLOT_BUF_GROW);
- if (++slot_grow_attempts > 5) {
+ if (++slot_attempts > 5) {
WT_STAT_FAST_CONN_INCR(session, log_slot_toosmall);
return (ENOMEM);
}
@@ -310,24 +318,8 @@ __wt_log_slot_release(WT_LOGSLOT *slot, uint64_t size)
int
__wt_log_slot_free(WT_SESSION_IMPL *session, WT_LOGSLOT *slot)
{
- WT_DECL_RET;
- ret = 0;
- /*
- * Grow the buffer if needed before returning it to the pool.
- */
- if (F_ISSET(slot, WT_SLOT_BUF_GROW)) {
- WT_STAT_FAST_CONN_INCR(session, log_buffer_grow);
- WT_STAT_FAST_CONN_INCRV(session,
- log_buffer_size, slot->slot_buf.memsize);
- WT_ERR(__wt_buf_grow(session,
- &slot->slot_buf, slot->slot_buf.memsize * 2));
- }
-err:
- /*
- * No matter if there is an error, we always want to free
- * the slot back to the pool.
- */
+ WT_UNUSED(session);
/*
* Make sure flags don't get retained between uses.
* We have to reset them them here because multiple threads may
@@ -335,62 +327,5 @@ err:
*/
slot->flags = WT_SLOT_INIT_FLAGS;
slot->slot_state = WT_LOG_SLOT_FREE;
- return (ret);
-}
-
-/*
- * __wt_log_slot_grow_buffers --
- * Increase the buffer size of all available slots in the buffer pool.
- * Go to some lengths to include active (but unused) slots to handle
- * the case where all log write record sizes exceed the size of the
- * active buffer.
- */
-int
-__wt_log_slot_grow_buffers(WT_SESSION_IMPL *session, size_t newsize)
-{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
- WT_LOG *log;
- WT_LOGSLOT *slot;
- int64_t orig_state;
- uint64_t old_size, total_growth;
- int i;
-
- conn = S2C(session);
- log = conn->log;
- total_growth = 0;
- WT_STAT_FAST_CONN_INCR(session, log_buffer_grow);
- /*
- * Take the log slot lock to prevent other threads growing buffers
- * at the same time. Could tighten the scope of this lock, or have
- * a separate lock if there is contention.
- */
- __wt_spin_lock(session, &log->log_slot_lock);
- for (i = 0; i < WT_SLOT_POOL; i++) {
- slot = &log->slot_pool[i];
-
- /* Don't keep growing unrelated buffers. */
- if (slot->slot_buf.memsize > (10 * newsize) &&
- !F_ISSET(slot, WT_SLOT_BUF_GROW))
- continue;
-
- /* Avoid atomic operations if they won't succeed. */
- orig_state = slot->slot_state;
- if ((orig_state != WT_LOG_SLOT_FREE &&
- orig_state != WT_LOG_SLOT_READY) ||
- !WT_ATOMIC_CAS8(
- slot->slot_state, orig_state, WT_LOG_SLOT_PENDING))
- continue;
-
- /* We have a slot - now go ahead and grow the buffer. */
- old_size = slot->slot_buf.memsize;
- F_CLR(slot, WT_SLOT_BUF_GROW);
- WT_ERR(__wt_buf_grow(session, &slot->slot_buf,
- WT_MAX(slot->slot_buf.memsize * 2, newsize)));
- slot->slot_state = orig_state;
- total_growth += slot->slot_buf.memsize - old_size;
- }
-err: __wt_spin_unlock(session, &log->log_slot_lock);
- WT_STAT_FAST_CONN_INCRV(session, log_buffer_size, total_growth);
- return (ret);
+ return (0);
}
diff --git a/src/third_party/wiredtiger/src/meta/meta_track.c b/src/third_party/wiredtiger/src/meta/meta_track.c
index 5946f81290d..c887af58540 100644
--- a/src/third_party/wiredtiger/src/meta/meta_track.c
+++ b/src/third_party/wiredtiger/src/meta/meta_track.c
@@ -15,7 +15,7 @@
*/
typedef struct __wt_meta_track {
enum {
- WT_ST_EMPTY, /* Unused slot */
+ WT_ST_EMPTY = 0, /* Unused slot */
WT_ST_CHECKPOINT, /* Complete a checkpoint */
WT_ST_DROP_COMMIT, /* Drop post commit */
WT_ST_FILEOP, /* File operation */
@@ -67,6 +67,35 @@ __meta_track_next(WT_SESSION_IMPL *session, WT_META_TRACK **trkp)
}
/*
+ * __meta_track_clear --
+ * Clear the structure.
+ */
+static void
+__meta_track_clear(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
+{
+ __wt_free(session, trk->a);
+ __wt_free(session, trk->b);
+ memset(trk, 0, sizeof(WT_META_TRACK));
+}
+
+/*
+ * __meta_track_err --
+ * Drop the last operation off the end of the list, something went wrong
+ * during initialization.
+ */
+static void
+__meta_track_err(WT_SESSION_IMPL *session)
+{
+ WT_META_TRACK *trk;
+
+ trk = session->meta_track_next;
+ --trk;
+ __meta_track_clear(session, trk);
+
+ session->meta_track_next = trk;
+}
+
+/*
* __wt_meta_track_discard --
* Cleanup metadata tracking when closing a session.
*/
@@ -185,10 +214,7 @@ __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk, int unroll)
WT_ILLEGAL_VALUE(session);
}
-free: trk->op = WT_ST_EMPTY;
- __wt_free(session, trk->a);
- __wt_free(session, trk->b);
- trk->dhandle = NULL;
+free: __meta_track_clear(session, trk);
return (ret);
}
@@ -346,14 +372,17 @@ __wt_meta_track_checkpoint(WT_SESSION_IMPL *session)
int
__wt_meta_track_insert(WT_SESSION_IMPL *session, const char *key)
{
+ WT_DECL_RET;
WT_META_TRACK *trk;
WT_RET(__meta_track_next(session, &trk));
trk->op = WT_ST_REMOVE;
- WT_RET(__wt_strdup(session, key, &trk->a));
-
+ WT_ERR(__wt_strdup(session, key, &trk->a));
return (0);
+
+err: __meta_track_err(session);
+ return (ret);
}
/*
@@ -369,7 +398,7 @@ __wt_meta_track_update(WT_SESSION_IMPL *session, const char *key)
WT_RET(__meta_track_next(session, &trk));
trk->op = WT_ST_SET;
- WT_RET(__wt_strdup(session, key, &trk->a));
+ WT_ERR(__wt_strdup(session, key, &trk->a));
/*
* If there was a previous value, keep it around -- if not, then this
@@ -380,6 +409,10 @@ __wt_meta_track_update(WT_SESSION_IMPL *session, const char *key)
trk->op = WT_ST_REMOVE;
ret = 0;
}
+ WT_ERR(ret);
+ return (0);
+
+err: __meta_track_err(session);
return (ret);
}
@@ -391,14 +424,18 @@ int
__wt_meta_track_fileop(
WT_SESSION_IMPL *session, const char *olduri, const char *newuri)
{
+ WT_DECL_RET;
WT_META_TRACK *trk;
WT_RET(__meta_track_next(session, &trk));
trk->op = WT_ST_FILEOP;
- WT_RET(__wt_strdup(session, olduri, &trk->a));
- WT_RET(__wt_strdup(session, newuri, &trk->b));
+ WT_ERR(__wt_strdup(session, olduri, &trk->a));
+ WT_ERR(__wt_strdup(session, newuri, &trk->b));
return (0);
+
+err: __meta_track_err(session);
+ return (ret);
}
/*
@@ -409,13 +446,17 @@ int
__wt_meta_track_drop(
WT_SESSION_IMPL *session, const char *filename)
{
+ WT_DECL_RET;
WT_META_TRACK *trk;
WT_RET(__meta_track_next(session, &trk));
trk->op = WT_ST_DROP_COMMIT;
- WT_RET(__wt_strdup(session, filename, &trk->a));
+ WT_ERR(__wt_strdup(session, filename, &trk->a));
return (0);
+
+err: __meta_track_err(session);
+ return (ret);
}
/*
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index c69344cb6b6..703bebb1597 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -29,7 +29,7 @@ typedef struct {
/* Track whether all changes to the page are written. */
uint64_t max_txn;
- uint64_t skipped_txn;
+ uint64_t first_dirty_txn;
uint32_t orig_write_gen;
/*
@@ -162,7 +162,7 @@ typedef struct {
* be evicted as new, in-memory pages, restoring the updates on
* those pages.
*/
- WT_UPD_SKIPPED *skip; /* Skipped updates */
+ WT_UPD_SKIPPED *skip; /* Skipped updates */
uint32_t skip_next;
size_t skip_allocated;
@@ -363,6 +363,19 @@ __wt_reconcile(WT_SESSION_IMPL *session,
WT_STAT_FAST_DATA_INCR(session, rec_pages_eviction);
}
+#ifdef HAVE_DIAGNOSTIC
+ {
+ /*
+ * Check that transaction time always moves forward for a given page.
+ * If this check fails, reconciliation can free something that a future
+ * reconciliation will need.
+ */
+ uint64_t oldest_id = __wt_txn_oldest_id(session);
+ WT_ASSERT(session, WT_TXNID_LE(mod->last_oldest_id, oldest_id));
+ mod->last_oldest_id = oldest_id;
+ }
+#endif
+
/* Record the most recent transaction ID we will *not* write. */
mod->disk_snap_min = session->txn.snap_min;
@@ -689,7 +702,7 @@ __rec_write_init(WT_SESSION_IMPL *session,
* Running transactions may update the page after we write it, so
* this is the highest ID we can be confident we will see.
*/
- r->skipped_txn = S2C(session)->txn_global.last_running;
+ r->first_dirty_txn = S2C(session)->txn_global.last_running;
return (0);
}
@@ -838,6 +851,7 @@ static inline int
__rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
WT_INSERT *ins, WT_ROW *rip, WT_CELL_UNPACK *vpack, WT_UPDATE **updp)
{
+ WT_DECL_RET;
WT_ITEM ovfl;
WT_PAGE *page;
WT_UPDATE *upd, *upd_list, *upd_ovfl;
@@ -850,12 +864,17 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
page = r->page;
/*
- * If we're called with an WT_INSERT reference, use its WT_UPDATE
- * list, else is an on-page row-store WT_UPDATE list.
+ * If called with a WT_INSERT item, use its WT_UPDATE list (which must
+ * exist), otherwise check for an on-page row-store WT_UPDATE list
+ * (which may not exist). Return immediately if the item has no updates.
*/
- upd_list = ins == NULL ? WT_ROW_UPDATE(page, rip) : ins->upd;
- skipped = 0;
+ if (ins == NULL) {
+ if ((upd_list = WT_ROW_UPDATE(page, rip)) == NULL)
+ return (0);
+ } else
+ upd_list = ins->upd;
+ skipped = 0;
for (max_txn = WT_TXN_NONE, min_txn = UINT64_MAX, upd = upd_list;
upd != NULL; upd = upd->next) {
if ((txnid = upd->txnid) == WT_TXN_ABORTED)
@@ -866,9 +885,9 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
max_txn = txnid;
if (WT_TXNID_LT(txnid, min_txn))
min_txn = txnid;
- if (WT_TXNID_LT(txnid, r->skipped_txn) &&
+ if (WT_TXNID_LT(txnid, r->first_dirty_txn) &&
!__wt_txn_visible_all(session, txnid))
- r->skipped_txn = txnid;
+ r->first_dirty_txn = txnid;
/*
* Record whether any updates were skipped on the way to finding
@@ -898,15 +917,15 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
r->max_txn = max_txn;
/*
- * If all updates are globally visible and no updates were skipped, the
+ * If no updates were skipped and all updates are globally visible, the
* page can be marked clean and we're done, regardless of whether we're
* evicting or checkpointing.
*
- * The oldest transaction ID may have moved while we were scanning the
- * page, so it is possible to skip an update but then find that by the
- * end of the scan, all updates are stable.
+ * We have to check both: the oldest transaction ID may have moved while
+ * we were scanning the update list, so it is possible to skip an update
+ * but then find that by the end of the scan, all updates are stable.
*/
- if (__wt_txn_visible_all(session, max_txn) && !skipped)
+ if (!skipped && __wt_txn_visible_all(session, max_txn))
return (0);
/*
@@ -976,8 +995,11 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
*/
if (vpack != NULL && vpack->raw == WT_CELL_VALUE_OVFL_RM &&
!__wt_txn_visible_all(session, min_txn)) {
- WT_RET(__wt_ovfl_txnc_search(
- page, vpack->data, vpack->size, &ovfl));
+ if ((ret = __wt_ovfl_txnc_search(
+ page, vpack->data, vpack->size, &ovfl)) != 0)
+ WT_PANIC_RET(session, ret,
+ "cached overflow item discarded early");
+
/*
* Create an update structure with an impossibly low transaction
* ID and append it to the update list we're about to save.
@@ -5064,23 +5086,37 @@ err: __wt_scr_free(session, &tkey);
* be set before a subsequent checkpoint reads it, and because the
* current checkpoint is waiting on this reconciliation to complete,
* there's no risk of that happening).
- *
- * Otherwise, if no updates were skipped, we have a new maximum
- * transaction written for the page (used to decide if a clean page can
- * be evicted). The page only might be clean; if the write generation
- * is unchanged since reconciliation started, clear it and update cache
- * dirty statistics, if the write generation changed, then the page has
- * been written since we started reconciliation, it cannot be
- * discarded.
*/
if (r->leave_dirty) {
- mod->first_dirty_txn = r->skipped_txn;
+ mod->first_dirty_txn = r->first_dirty_txn;
btree->modified = 1;
WT_FULL_BARRIER();
} else {
+ /*
+ * If no updates were skipped, we have a new maximum transaction
+ * written for the page (used to decide if a clean page can be
+ * evicted). Set the highest transaction ID for the page.
+ *
+ * Track the highest transaction ID for the tree (used to decide
+ * if it's safe to discard all of the pages in the tree without
+ * further checking). Reconciliation in the service of eviction
+ * is multi-threaded, only update the tree's maximum transaction
+ * ID when doing a checkpoint. That's sufficient, we only care
+ * about the highest transaction ID of any update currently in
+ * the tree, and checkpoint visits every dirty page in the tree.
+ */
mod->rec_max_txn = r->max_txn;
+ if (!F_ISSET(r, WT_EVICTING) &&
+ !WT_TXNID_LT(btree->rec_max_txn, r->max_txn))
+ btree->rec_max_txn = r->max_txn;
+ /*
+ * The page only might be clean; if the write generation is
+ * unchanged since reconciliation started, it's clean. If the
+ * write generation changed, the page has been written since
+ * we started reconciliation and remains dirty.
+ */
if (WT_ATOMIC_CAS4(mod->write_gen, r->orig_write_gen, 0))
__wt_cache_dirty_decr(session, page);
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_create.c b/src/third_party/wiredtiger/src/schema/schema_create.c
index c7c47a88f3c..4041a1d7b9f 100644
--- a/src/third_party/wiredtiger/src/schema/schema_create.c
+++ b/src/third_party/wiredtiger/src/schema/schema_create.c
@@ -9,6 +9,22 @@
#include "wt_internal.h"
/*
+ * __wt_schema_create_strip --
+ * Discard any configuration information from a schema entry that is not
+ * applicable to an session.create call, here for the wt dump command utility,
+ * which only wants to dump the schema information needed for load.
+ */
+int
+__wt_schema_create_strip(WT_SESSION_IMPL *session,
+ const char *v1, const char *v2, char **value_ret)
+{
+ const char *cfg[] =
+ { WT_CONFIG_BASE(session, WT_SESSION_create), v1, v2, NULL };
+
+ return (__wt_config_collapse(session, cfg, value_ret));
+}
+
+/*
* __wt_direct_io_size_check --
* Return a size from the configuration, complaining if it's insufficient
* for direct I/O.
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index 4f698806511..ef9735a8b98 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -314,8 +314,10 @@ __wt_open_cursor(WT_SESSION_IMPL *session,
* copied.
*/
if ((*cursorp)->uri == NULL &&
- (ret = __wt_strdup(session, uri, &(*cursorp)->uri)) != 0)
+ (ret = __wt_strdup(session, uri, &(*cursorp)->uri)) != 0) {
WT_TRET((*cursorp)->close(*cursorp));
+ *cursorp = NULL;
+ }
return (ret);
}
@@ -381,23 +383,6 @@ err: if (cursor != NULL)
}
/*
- * __wt_session_create_strip --
- * Discard any configuration information from a schema entry that is not
- * applicable to an session.create call, here for the wt dump command utility,
- * which only wants to dump the schema information needed for load.
- */
-int
-__wt_session_create_strip(WT_SESSION *wt_session,
- const char *v1, const char *v2, char **value_ret)
-{
- WT_SESSION_IMPL *session = (WT_SESSION_IMPL *)wt_session;
- const char *cfg[] =
- { WT_CONFIG_BASE(session, WT_SESSION_create), v1, v2, NULL };
-
- return (__wt_config_collapse(session, cfg, value_ret));
-}
-
-/*
* __session_create --
* WT_SESSION->create method.
*/
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 44c2daa3802..0310fdc207c 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -458,7 +458,6 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats)
stats->log_slot_joins.desc = "log: consolidated slot joins";
stats->log_slot_toosmall.desc =
"log: failed to find a slot large enough for record";
- stats->log_buffer_grow.desc = "log: log buffer size increases";
stats->log_bytes_payload.desc = "log: log bytes of payload data";
stats->log_bytes_written.desc = "log: log bytes written";
stats->log_compress_writes.desc = "log: log records compressed";
@@ -631,7 +630,6 @@ __wt_stat_refresh_connection_stats(void *stats_arg)
stats->log_slot_transitions.v = 0;
stats->log_slot_joins.v = 0;
stats->log_slot_toosmall.v = 0;
- stats->log_buffer_grow.v = 0;
stats->log_bytes_payload.v = 0;
stats->log_bytes_written.v = 0;
stats->log_compress_writes.v = 0;
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 432746186fc..c9924056e91 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -98,7 +98,6 @@ __wt_txn_release_snapshot(WT_SESSION_IMPL *session)
WT_ASSERT(session,
txn_state->snap_min == WT_TXN_NONE ||
session->txn.isolation == WT_ISO_READ_UNCOMMITTED ||
- session->id == S2C(session)->txn_global.checkpoint_id ||
!__wt_txn_visible_all(session, txn_state->snap_min));
txn_state->snap_min = WT_TXN_NONE;
@@ -118,13 +117,13 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
WT_TXN_STATE *s, *txn_state;
uint64_t current_id, id;
uint64_t prev_oldest_id, snap_min;
- uint32_t ckpt_id, i, n, session_cnt;
+ uint32_t i, n, session_cnt;
int32_t count;
conn = S2C(session);
txn = &session->txn;
txn_global = &conn->txn_global;
- txn_state = &txn_global->states[session->id];
+ txn_state = WT_SESSION_TXN_STATE(session);
current_id = snap_min = txn_global->current;
prev_oldest_id = txn_global->oldest_id;
@@ -157,12 +156,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
/* Walk the array of concurrent transactions. */
WT_ORDERED_READ(session_cnt, conn->session_cnt);
- ckpt_id = txn_global->checkpoint_id;
for (i = n = 0, s = txn_global->states; i < session_cnt; i++, s++) {
- /* Skip the checkpoint transaction; it is never read from. */
- if (i == ckpt_id)
- continue;
-
/*
* Build our snapshot of any concurrent transaction IDs.
*
@@ -221,7 +215,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force)
WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *s;
uint64_t current_id, id, oldest_id, prev_oldest_id, snap_min;
- uint32_t ckpt_id, i, session_cnt;
+ uint32_t i, session_cnt;
int32_t count;
int last_running_moved;
@@ -257,12 +251,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force)
/* Walk the array of concurrent transactions. */
WT_ORDERED_READ(session_cnt, conn->session_cnt);
- ckpt_id = txn_global->checkpoint_id;
for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
- /* Skip the checkpoint transaction; it is never read from. */
- if (i == ckpt_id)
- continue;
-
/*
* Update the oldest ID.
*
@@ -310,15 +299,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, int force)
if (WT_TXNID_LT(prev_oldest_id, oldest_id) &&
WT_ATOMIC_CAS4(txn_global->scan_count, 1, -1)) {
WT_ORDERED_READ(session_cnt, conn->session_cnt);
- ckpt_id = txn_global->checkpoint_id;
for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
- /*
- * Skip the checkpoint transaction; it is never read
- * from.
- */
- if (i == ckpt_id)
- continue;
-
if ((id = s->id) != WT_TXN_NONE &&
WT_TXNID_LT(id, oldest_id))
oldest_id = id;
@@ -408,19 +389,31 @@ __wt_txn_release(WT_SESSION_IMPL *session)
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *txn_state;
+ int was_oldest;
txn = &session->txn;
WT_ASSERT(session, txn->mod_count == 0);
txn->notify = NULL;
txn_global = &S2C(session)->txn_global;
- txn_state = &txn_global->states[session->id];
+ txn_state = WT_SESSION_TXN_STATE(session);
+ was_oldest = 0;
/* Clear the transaction's ID from the global table. */
- if (F_ISSET(txn, WT_TXN_HAS_ID)) {
+ if (WT_SESSION_IS_CHECKPOINT(session)) {
+ WT_ASSERT(session, txn_state->id == WT_TXN_NONE);
+ txn->id = WT_TXN_NONE;
+
+ /* Clear the global checkpoint transaction IDs. */
+ txn_global->checkpoint_id = 0;
+ txn_global->checkpoint_pinned = WT_TXN_NONE;
+ } else if (F_ISSET(txn, WT_TXN_HAS_ID)) {
WT_ASSERT(session, txn_state->id != WT_TXN_NONE &&
txn->id != WT_TXN_NONE);
WT_PUBLISH(txn_state->id, WT_TXN_NONE);
+
+ /* Quick check for the oldest transaction. */
+ was_oldest = (txn->id == txn_global->last_running);
txn->id = WT_TXN_NONE;
}
@@ -439,6 +432,14 @@ __wt_txn_release(WT_SESSION_IMPL *session)
txn->isolation = session->isolation;
/* Ensure the transaction flags are cleared on exit */
txn->flags = 0;
+
+ /*
+ * When the oldest transaction in the system completes, bump the oldest
+ * ID. This is racy and so not guaranteed, but in practice it keeps
+ * the oldest ID from falling too far behind.
+ */
+ if (was_oldest)
+ __wt_txn_update_oldest(session, 1);
}
/*
@@ -518,6 +519,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
*/
__wt_txn_release_snapshot(session);
ret = __wt_txn_log_commit(session, cfg);
+ WT_ASSERT(session, ret == 0);
}
/*
@@ -648,19 +650,19 @@ __wt_txn_stats_update(WT_SESSION_IMPL *session)
WT_TXN_GLOBAL *txn_global;
WT_CONNECTION_IMPL *conn;
WT_CONNECTION_STATS *stats;
- uint64_t checkpoint_snap_min;
+ uint64_t checkpoint_pinned;
conn = S2C(session);
txn_global = &conn->txn_global;
stats = &conn->stats;
- checkpoint_snap_min = txn_global->checkpoint_snap_min;
+ checkpoint_pinned = txn_global->checkpoint_pinned;
WT_STAT_SET(stats, txn_pinned_range,
txn_global->current - txn_global->oldest_id);
WT_STAT_SET(stats, txn_pinned_checkpoint_range,
- checkpoint_snap_min == WT_TXN_NONE ?
- 0 : txn_global->current - checkpoint_snap_min);
+ checkpoint_pinned == WT_TXN_NONE ?
+ 0 : txn_global->current - checkpoint_pinned);
}
/*
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index cfc993418c5..f317a3dc697 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -349,6 +349,7 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_TXN *txn;
WT_TXN_GLOBAL *txn_global;
WT_TXN_ISOLATION saved_isolation;
+ WT_TXN_STATE *txn_state;
void *saved_meta_next;
u_int i;
int full, fullckpt_logging, idle, tracking;
@@ -358,6 +359,7 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
conn = S2C(session);
txn = &session->txn;
txn_global = &conn->txn_global;
+ txn_state = WT_SESSION_TXN_STATE(session);
saved_isolation = session->isolation;
full = fullckpt_logging = idle = tracking = 0;
@@ -429,6 +431,22 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__checkpoint_verbose_track(session,
"starting transaction", &verb_timer));
+ if (full)
+ WT_ERR(__wt_epoch(session, &start));
+
+ /*
+ * Bump the global checkpoint generation, used to figure out whether
+ * checkpoint has visited a tree. There is no need for this to be
+ * atomic: it is only written while holding the checkpoint lock.
+ *
+ * We do need to update it before clearing the checkpoint's entry out
+ * of the transaction table, or a thread evicting in a tree could
+ * ignore the checkpoint's transaction.
+ */
+ ++txn_global->checkpoint_gen;
+ WT_STAT_FAST_CONN_SET(session,
+ txn_checkpoint_generation, txn_global->checkpoint_gen);
+
/*
* Start a snapshot transaction for the checkpoint.
*
@@ -436,30 +454,44 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* side effects on cursors, which applications can hold open across
* calls to checkpoint.
*/
- if (full)
- WT_ERR(__wt_epoch(session, &start));
WT_ERR(__wt_txn_begin(session, txn_cfg));
/* Ensure a transaction ID is allocated prior to sharing it globally */
WT_ERR(__wt_txn_id_check(session));
/*
- * Save a copy of the checkpoint session ID so that refresh can skip
- * the checkpoint transactions. We never do checkpoints in the default
- * session with id zero. Save a copy of the snap min so that visibility
- * checks for the checkpoint use the right ID.
+ * Save the checkpoint session ID. We never do checkpoints in the
+ * default session (with id zero).
*/
- WT_ASSERT(session, session->id != 0);
+ WT_ASSERT(session, session->id != 0 && txn_global->checkpoint_id == 0);
txn_global->checkpoint_id = session->id;
- txn_global->checkpoint_snap_min = session->txn.snap_min;
+
+ txn_global->checkpoint_pinned =
+ WT_MIN(txn_state->id, txn_state->snap_min);
/*
- * No need for this to be atomic it is only written while holding the
- * checkpoint lock.
+ * We're about to clear the checkpoint transaction from the global
+ * state table so the oldest ID can move forward. Make sure everything
+ * we've done above is scheduled.
*/
- txn_global->checkpoint_gen += 1;
- WT_STAT_FAST_CONN_SET(session,
- txn_checkpoint_generation, txn_global->checkpoint_gen);
+ WT_FULL_BARRIER();
+
+ /*
+ * Sanity check that the oldest ID hasn't moved on before we have
+ * cleared our entry.
+ */
+ WT_ASSERT(session,
+ WT_TXNID_LE(txn_global->oldest_id, txn_state->id) &&
+ WT_TXNID_LE(txn_global->oldest_id, txn_state->snap_min));
+
+ /*
+ * Clear our entry from the global transaction session table. Any
+ * operation that needs to know about the ID for this checkpoint will
+ * consider the checkpoint ID in the global structure. Most operations
+ * can safely ignore the checkpoint ID (see the visible all check for
+ * details).
+ */
+ txn_state->id = txn_state->snap_min = WT_TXN_NONE;
/* Tell logging that we have started a database checkpoint. */
if (fullckpt_logging)
@@ -478,10 +510,6 @@ __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
/* Release the snapshot so we aren't pinning pages in cache. */
__wt_txn_release_snapshot(session);
- /* Clear the global checkpoint transaction IDs */
- txn_global->checkpoint_id = 0;
- txn_global->checkpoint_snap_min = WT_TXN_NONE;
-
WT_ERR(__checkpoint_verbose_track(session,
"committing transaction", &verb_timer));
@@ -558,10 +586,6 @@ err: /*
WT_TRET(__wt_txn_rollback(session, NULL));
}
- /* Ensure the checkpoint IDs are cleared on the error path. */
- txn_global->checkpoint_id = 0;
- txn_global->checkpoint_snap_min = WT_TXN_NONE;
-
/*
* Tell logging that we have finished a database checkpoint. Do not
* write a log record if the database was idle.
@@ -813,10 +837,8 @@ __checkpoint_worker(
force = 1;
}
if (!btree->modified && !force) {
- if (!is_checkpoint) {
- F_SET(btree, WT_BTREE_SKIP_CKPT);
- goto done;
- }
+ if (!is_checkpoint)
+ goto nockpt;
deleted = 0;
WT_CKPT_FOREACH(ckptbase, ckpt)
@@ -835,7 +857,12 @@ __checkpoint_worker(
(WT_PREFIX_MATCH(name, WT_CHECKPOINT) &&
WT_PREFIX_MATCH((ckpt - 1)->name, WT_CHECKPOINT))) &&
deleted < 2) {
- F_SET(btree, WT_BTREE_SKIP_CKPT);
+nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT);
+ WT_PUBLISH(btree->checkpoint_gen,
+ S2C(session)->txn_global.checkpoint_gen);
+ WT_STAT_FAST_DATA_SET(session,
+ btree_checkpoint_generation,
+ btree->checkpoint_gen);
goto done;
}
}
@@ -853,7 +880,7 @@ __checkpoint_worker(
* Hold the lock until we're done (blocking hot backups from starting),
* we don't want to race with a future hot backup.
*/
- __wt_spin_lock(session, &conn->hot_backup_lock);
+ WT_ERR(__wt_readlock(session, conn->hot_backup_lock));
hot_backup_locked = 1;
if (conn->hot_backup)
WT_CKPT_FOREACH(ckptbase, ckpt) {
@@ -1063,16 +1090,8 @@ fake: /*
WT_ERR(__wt_txn_checkpoint_log(
session, 0, WT_TXN_LOG_CKPT_STOP, NULL));
- /*
- * Update the checkpoint generation for this handle so visible
- * updates newer than the checkpoint can be evicted.
- */
-done: btree->checkpoint_gen = conn->txn_global.checkpoint_gen;
- WT_STAT_FAST_DATA_SET(session,
- btree_checkpoint_generation, btree->checkpoint_gen);
-
-err:
- /*
+done:
+err: /*
* If the checkpoint didn't complete successfully, make sure the
* tree is marked dirty.
*/
@@ -1080,7 +1099,7 @@ err:
btree->modified = 1;
if (hot_backup_locked)
- __wt_spin_unlock(session, &conn->hot_backup_lock);
+ WT_TRET(__wt_readunlock(session, conn->hot_backup_lock));
__wt_meta_ckptlist_free(session, ckptbase);
__wt_free(session, name_alloc);
diff --git a/src/third_party/wiredtiger/src/utilities/util_dump.c b/src/third_party/wiredtiger/src/utilities/util_dump.c
index 0ae201ea21e..28d0309242d 100644
--- a/src/third_party/wiredtiger/src/utilities/util_dump.c
+++ b/src/third_party/wiredtiger/src/utilities/util_dump.c
@@ -149,9 +149,9 @@ dump_config(WT_SESSION *session, const char *uri, int hex)
/* Open a metadata cursor. */
if ((ret = session->open_cursor(
- session, WT_METADATA_URI, NULL, NULL, &cursor)) != 0) {
- fprintf(stderr, "%s: %s: session.open_cursor: %s\n",
- progname, WT_METADATA_URI, session->strerror(session, ret));
+ session, "metadata:create", NULL, NULL, &cursor)) != 0) {
+ fprintf(stderr, "%s: %s: session.open_cursor: %s\n", progname,
+ "metadata:create", session->strerror(session, ret));
return (1);
}
/*
@@ -225,7 +225,7 @@ dump_json_table_begin(
{
WT_DECL_RET;
const char *name;
- char *jsonconfig, *stripped;
+ char *jsonconfig;
jsonconfig = NULL;
@@ -236,12 +236,7 @@ dump_json_table_begin(
}
++name;
- if ((ret =
- __wt_session_create_strip(session, config, NULL, &stripped)) != 0)
- return (util_err(session, ret, NULL));
- ret = dup_json_string(stripped, &jsonconfig);
- free(stripped);
- if (ret != 0)
+ if ((ret = dup_json_string(config, &jsonconfig)) != 0)
return (util_cerr(cursor, "config dup", ret));
if (printf(" \"%s\" : [\n {\n", uri) < 0)
goto eio;
@@ -278,7 +273,7 @@ dump_json_table_cg(WT_SESSION *session, WT_CURSOR *cursor,
WT_DECL_RET;
const char *key, *skip, *value;
int exact, once;
- char *jsonconfig, *stripped;
+ char *jsonconfig;
static const char * const indent = " ";
once = 0;
@@ -326,12 +321,7 @@ match: if ((ret = cursor->get_key(cursor, &key)) != 0)
if ((ret = cursor->get_value(cursor, &value)) != 0)
return (util_cerr(cursor, "get_value", ret));
- if ((ret = __wt_session_create_strip(
- session, value, NULL, &stripped)) != 0)
- return (util_err(session, ret, NULL));
- ret = dup_json_string(stripped, &jsonconfig);
- free(stripped);
- if (ret != 0)
+ if ((ret = dup_json_string(value, &jsonconfig)) != 0)
return (util_cerr(cursor, "config dup", ret));
ret = printf("%s\n"
"%s{\n"
@@ -362,67 +352,42 @@ dump_json_table_config(WT_SESSION *session, const char *uri)
{
WT_CURSOR *cursor;
WT_DECL_RET;
- WT_EXTENSION_API *wtext;
int tret;
char *value;
/* Dump the config. */
- if (WT_PREFIX_MATCH(uri, "table:")) {
- /* Open a metadata cursor. */
- if ((ret = session->open_cursor(
- session, WT_METADATA_URI, NULL, NULL, &cursor)) != 0) {
- fprintf(stderr, "%s: %s: session.open_cursor: %s\n",
- progname, WT_METADATA_URI,
- session->strerror(session, ret));
- return (1);
- }
+ /* Open a metadata cursor. */
+ if ((ret = session->open_cursor(
+ session, "metadata:create", NULL, NULL, &cursor)) != 0) {
+ fprintf(stderr, "%s: %s: session.open_cursor: %s\n",
+ progname, "metadata:create",
+ session->strerror(session, ret));
+ return (1);
+ }
- /*
- * Search for the object itself, to make sure it
- * exists, and get its config string. This where we
- * find out a table object doesn't exist, use a simple
- * error message.
- */
- cursor->set_key(cursor, uri);
- if ((ret = cursor->search(cursor)) == 0) {
- if ((ret = cursor->get_value(cursor, &value)) != 0)
- ret = util_cerr(cursor, "get_value", ret);
- else if (dump_json_table_begin(
- session, cursor, uri, value) != 0)
- ret = 1;
- } else if (ret == WT_NOTFOUND)
- ret = util_err(
- session, 0, "%s: No such object exists", uri);
- else
- ret = util_err(session, ret, "%s", uri);
+ /*
+ * Search for the object itself, to make sure it
+ * exists, and get its config string. This where we
+ * find out a table object doesn't exist, use a simple
+ * error message.
+ */
+ cursor->set_key(cursor, uri);
+ if ((ret = cursor->search(cursor)) == 0) {
+ if ((ret = cursor->get_value(cursor, &value)) != 0)
+ ret = util_cerr(cursor, "get_value", ret);
+ else if (dump_json_table_begin(
+ session, cursor, uri, value) != 0)
+ ret = 1;
+ } else if (ret == WT_NOTFOUND)
+ ret = util_err(
+ session, 0, "%s: No such object exists", uri);
+ else
+ ret = util_err(session, ret, "%s", uri);
- if ((tret = cursor->close(cursor)) != 0) {
- tret = util_cerr(cursor, "close", tret);
- if (ret == 0)
- ret = tret;
- }
- } else {
- /*
- * We want to be able to dump the metadata file itself, but the
- * configuration for that file lives in the turtle file. Reach
- * down into the library and ask for the file's configuration,
- * that will work in all cases.
- *
- * This where we find out a file object doesn't exist, use a
- * simple error message.
- */
- wtext = session->
- connection->get_extension_api(session->connection);
- if ((ret =
- wtext->metadata_search(wtext, session, uri, &value)) == 0) {
- if (dump_json_table_begin(
- session, NULL, uri, value) != 0)
- ret = 1;
- } else if (ret == WT_NOTFOUND)
- ret = util_err(
- session, 0, "%s: No such object exists", uri);
- else
- ret = util_err(session, ret, "%s", uri);
+ if ((tret = cursor->close(cursor)) != 0) {
+ tret = util_cerr(cursor, "close", tret);
+ if (ret == 0)
+ ret = tret;
}
return (ret);
@@ -687,17 +652,19 @@ print_config(WT_SESSION *session,
{
WT_DECL_RET;
char *value_ret;
+ const char *cfg[] = { v1, v2, NULL };
/*
- * The underlying call will ignore v2 if v1 is NULL -- check here and
- * swap in that case.
+ * The underlying call will stop if the first string is NULL -- check
+ * here and swap in that case.
*/
- if (v1 == NULL) {
- v1 = v2;
- v2 = NULL;
+ if (cfg[0] == NULL) {
+ cfg[0] = cfg[1];
+ cfg[1] = NULL;
}
- if ((ret = __wt_session_create_strip(session, v1, v2, &value_ret)) != 0)
+ if ((ret = __wt_config_collapse(
+ (WT_SESSION_IMPL *)session, cfg, &value_ret)) != 0)
return (util_err(session, ret, NULL));
ret = printf("%s\n%s\n", key, value_ret);
free((char *)value_ret);