summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/async/async_worker.c2
-rw-r--r--src/btree/bt_cursor.c2
-rw-r--r--src/btree/bt_handle.c13
-rw-r--r--src/btree/bt_sync.c19
-rw-r--r--src/checksum/power8/README.md2
-rw-r--r--src/checksum/power8/crc32.sx (renamed from src/checksum/power8/crc32.S)0
-rw-r--r--src/checksum/zseries/crc32le-vx.sx (renamed from src/checksum/zseries/crc32le-vx.S)0
-rw-r--r--src/config/config_collapse.c4
-rw-r--r--src/config/config_def.c61
-rw-r--r--src/conn/conn_cache.c63
-rw-r--r--src/conn/conn_ckpt.c46
-rw-r--r--src/conn/conn_log.c4
-rw-r--r--src/conn/conn_stat.c2
-rw-r--r--src/conn/conn_sweep.c2
-rw-r--r--src/docs/wtperf.dox83
-rw-r--r--src/evict/evict_lru.c123
-rw-r--r--src/include/api.h4
-rw-r--r--src/include/btree.h27
-rw-r--r--src/include/btree.i12
-rw-r--r--src/include/cache.i2
-rw-r--r--src/include/connection.h6
-rw-r--r--src/include/extern.h2
-rw-r--r--src/include/extern_posix.h2
-rw-r--r--src/include/extern_win.h2
-rw-r--r--src/include/misc.i6
-rw-r--r--src/include/stat.h3
-rw-r--r--src/include/txn.h10
-rw-r--r--src/include/txn.i6
-rw-r--r--src/include/wiredtiger.in82
-rw-r--r--src/log/log.c16
-rw-r--r--src/lsm/lsm_cursor.c19
-rw-r--r--src/lsm/lsm_manager.c4
-rw-r--r--src/lsm/lsm_merge.c15
-rw-r--r--src/lsm/lsm_tree.c8
-rw-r--r--src/lsm/lsm_work_unit.c2
-rw-r--r--src/meta/meta_ckpt.c2
-rw-r--r--src/os_posix/os_mtx_cond.c2
-rw-r--r--src/os_posix/os_time.c14
-rw-r--r--src/os_win/os_time.c6
-rw-r--r--src/reconcile/rec_write.c29
-rw-r--r--src/session/session_api.c15
-rw-r--r--src/session/session_compact.c9
-rw-r--r--src/session/session_dhandle.c11
-rw-r--r--src/support/err.c39
-rw-r--r--src/support/rand.c6
-rw-r--r--src/support/stat.c12
-rw-r--r--src/support/thread_group.c10
-rw-r--r--src/txn/txn.c40
-rw-r--r--src/txn/txn_ckpt.c80
-rw-r--r--src/txn/txn_nsnap.c55
50 files changed, 540 insertions, 444 deletions
diff --git a/src/async/async_worker.c b/src/async/async_worker.c
index 401d0616eab..b1bc3902f7c 100644
--- a/src/async/async_worker.c
+++ b/src/async/async_worker.c
@@ -216,7 +216,7 @@ __async_worker_execop(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op,
break;
case WT_AOP_NONE:
WT_RET_MSG(session, EINVAL,
- "Unknown async optype %d\n", op->optype);
+ "Unknown async optype %d", op->optype);
}
return (0);
}
diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c
index 3690b41ead4..41ae457b0fe 100644
--- a/src/btree/bt_cursor.c
+++ b/src/btree/bt_cursor.c
@@ -1217,7 +1217,7 @@ err: if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED))
/*
* __wt_btcur_init --
- * Initialize an cursor used for internal purposes.
+ * Initialize a cursor used for internal purposes.
*/
void
__wt_btcur_init(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index 9591023e163..337a3ea036f 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -271,6 +271,17 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
else
F_CLR(btree, WT_BTREE_IN_MEMORY | WT_BTREE_NO_EVICTION);
+ WT_RET(__wt_config_gets(session,
+ cfg, "ignore_in_memory_cache_size", &cval));
+ if (cval.val) {
+ if (!F_ISSET(conn, WT_CONN_IN_MEMORY))
+ WT_RET_MSG(session, EINVAL,
+ "ignore_in_memory_cache_size setting is only valid "
+ "with databases configured to run in-memory");
+ F_SET(btree, WT_BTREE_IGNORE_CACHE);
+ } else
+ F_CLR(btree, WT_BTREE_IGNORE_CACHE);
+
WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval));
if (cval.val)
F_CLR(btree, WT_BTREE_NO_LOGGING);
@@ -353,7 +364,7 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
WT_RET(__wt_spin_init(session, &btree->flush_lock, "btree flush"));
btree->checkpointing = WT_CKPT_OFF; /* Not checkpointing */
- btree->modified = 0; /* Clean */
+ btree->modified = false; /* Clean */
btree->write_gen = ckpt->write_gen; /* Write generation */
return (0);
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index b41179a565d..7b583bd9c1e 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -24,20 +24,20 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
WT_REF *walk;
WT_TXN *txn;
uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages;
- uint64_t oldest_id, saved_snap_min;
+ uint64_t oldest_id, saved_pinned_id;
uint32_t flags;
conn = S2C(session);
btree = S2BT(session);
walk = NULL;
txn = &session->txn;
- saved_snap_min = WT_SESSION_TXN_STATE(session)->snap_min;
+ saved_pinned_id = WT_SESSION_TXN_STATE(session)->pinned_id;
flags = WT_READ_CACHE | WT_READ_NO_GEN;
internal_bytes = leaf_bytes = 0;
internal_pages = leaf_pages = 0;
if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT))
- WT_RET(__wt_epoch(session, &start));
+ __wt_epoch(session, &start);
switch (syncop) {
case WT_SYNC_WRITE_LEAVES:
@@ -205,15 +205,14 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
}
if (WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT)) {
- WT_ERR(__wt_epoch(session, &end));
+ __wt_epoch(session, &end);
__wt_verbose(session, WT_VERB_CHECKPOINT,
- "__sync_file WT_SYNC_%s wrote:\n\t %" PRIu64
- " bytes, %" PRIu64 " pages of leaves\n\t %" PRIu64
- " bytes, %" PRIu64 " pages of internal\n\t"
- "Took: %" PRIu64 "ms",
+ "__sync_file WT_SYNC_%s wrote: %" PRIu64
+ " leaf pages (%" PRIu64 "B), %" PRIu64
+ " internal pages (%" PRIu64 "B), and took %" PRIu64 "ms",
syncop == WT_SYNC_WRITE_LEAVES ?
"WRITE_LEAVES" : "CHECKPOINT",
- leaf_bytes, leaf_pages, internal_bytes, internal_pages,
+ leaf_pages, leaf_bytes, internal_pages, internal_bytes,
WT_TIMEDIFF_MS(end, start));
}
@@ -226,7 +225,7 @@ err: /* On error, clear any left-over tree walk. */
* snapshot active when we started, release it.
*/
if (txn->isolation == WT_ISO_READ_COMMITTED &&
- saved_snap_min == WT_TXN_NONE)
+ saved_pinned_id == WT_TXN_NONE)
__wt_txn_release_snapshot(session);
/* Clear the checkpoint flag and push the change. */
diff --git a/src/checksum/power8/README.md b/src/checksum/power8/README.md
index 3e2976650cd..579d841a02c 100644
--- a/src/checksum/power8/README.md
+++ b/src/checksum/power8/README.md
@@ -39,7 +39,7 @@ Quick start
- Type make to create the constants (crc32_constants.h)
-- Import the code into your application (crc32.S crc32_wrapper.c
+- Import the code into your application (crc32.sx crc32_wrapper.c
crc32_constants.h ppc-opcode.h) and call the CRC:
```
diff --git a/src/checksum/power8/crc32.S b/src/checksum/power8/crc32.sx
index 0b7870668b5..0b7870668b5 100644
--- a/src/checksum/power8/crc32.S
+++ b/src/checksum/power8/crc32.sx
diff --git a/src/checksum/zseries/crc32le-vx.S b/src/checksum/zseries/crc32le-vx.sx
index 0f1392b0952..0f1392b0952 100644
--- a/src/checksum/zseries/crc32le-vx.S
+++ b/src/checksum/zseries/crc32le-vx.sx
diff --git a/src/config/config_collapse.c b/src/config/config_collapse.c
index ea956ebfff9..7fe78d06ba7 100644
--- a/src/config/config_collapse.c
+++ b/src/config/config_collapse.c
@@ -47,7 +47,7 @@ __wt_config_collapse(
if (k.type != WT_CONFIG_ITEM_STRING &&
k.type != WT_CONFIG_ITEM_ID)
WT_ERR_MSG(session, EINVAL,
- "Invalid configuration key found: '%s'\n", k.str);
+ "Invalid configuration key found: '%s'", k.str);
WT_ERR(__wt_config_get(session, cfg, &k, &v));
/* Include the quotes around string keys/values. */
if (k.type == WT_CONFIG_ITEM_STRING) {
@@ -132,7 +132,7 @@ __config_merge_scan(WT_SESSION_IMPL *session,
if (k.type != WT_CONFIG_ITEM_STRING &&
k.type != WT_CONFIG_ITEM_ID)
WT_ERR_MSG(session, EINVAL,
- "Invalid configuration key found: '%s'\n", k.str);
+ "Invalid configuration key found: '%s'", k.str);
/* Include the quotes around string keys/values. */
if (k.type == WT_CONFIG_ITEM_STRING) {
diff --git a/src/config/config_def.c b/src/config/config_def.c
index 7bad5f12a9f..7bce4bc9cef 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -246,6 +246,9 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = {
{ "format", "string", NULL, "choices=[\"btree\"]", NULL, 0 },
{ "huffman_key", "string", NULL, NULL, NULL, 0 },
{ "huffman_value", "string", NULL, NULL, NULL, 0 },
+ { "ignore_in_memory_cache_size", "boolean",
+ NULL, NULL,
+ NULL, 0 },
{ "immutable", "boolean", NULL, NULL, NULL, 0 },
{ "internal_item_max", "int", NULL, "min=0", NULL, 0 },
{ "internal_key_max", "int", NULL, "min=0", NULL, 0 },
@@ -413,6 +416,9 @@ static const WT_CONFIG_CHECK confchk_file_config[] = {
{ "format", "string", NULL, "choices=[\"btree\"]", NULL, 0 },
{ "huffman_key", "string", NULL, NULL, NULL, 0 },
{ "huffman_value", "string", NULL, NULL, NULL, 0 },
+ { "ignore_in_memory_cache_size", "boolean",
+ NULL, NULL,
+ NULL, 0 },
{ "internal_item_max", "int", NULL, "min=0", NULL, 0 },
{ "internal_key_max", "int", NULL, "min=0", NULL, 0 },
{ "internal_key_truncate", "boolean", NULL, NULL, NULL, 0 },
@@ -471,6 +477,9 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = {
{ "huffman_key", "string", NULL, NULL, NULL, 0 },
{ "huffman_value", "string", NULL, NULL, NULL, 0 },
{ "id", "string", NULL, NULL, NULL, 0 },
+ { "ignore_in_memory_cache_size", "boolean",
+ NULL, NULL,
+ NULL, 0 },
{ "internal_item_max", "int", NULL, "min=0", NULL, 0 },
{ "internal_key_max", "int", NULL, "min=0", NULL, 0 },
{ "internal_key_truncate", "boolean", NULL, NULL, NULL, 0 },
@@ -544,6 +553,9 @@ static const WT_CONFIG_CHECK confchk_lsm_meta[] = {
{ "format", "string", NULL, "choices=[\"btree\"]", NULL, 0 },
{ "huffman_key", "string", NULL, NULL, NULL, 0 },
{ "huffman_value", "string", NULL, NULL, NULL, 0 },
+ { "ignore_in_memory_cache_size", "boolean",
+ NULL, NULL,
+ NULL, 0 },
{ "internal_item_max", "int", NULL, "min=0", NULL, 0 },
{ "internal_key_max", "int", NULL, "min=0", NULL, 0 },
{ "internal_key_truncate", "boolean", NULL, NULL, NULL, 0 },
@@ -1053,18 +1065,18 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"block_compressor=,cache_resident=false,checksum=uncompressed,"
"colgroups=,collator=,columns=,dictionary=0,encryption=(keyid=,"
"name=),exclusive=false,extractor=,format=btree,huffman_key=,"
- "huffman_value=,immutable=false,internal_item_max=0,"
- "internal_key_max=0,internal_key_truncate=true,"
- "internal_page_max=4KB,key_format=u,key_gap=10,leaf_item_max=0,"
- "leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=0,"
- "log=(enabled=true),lsm=(auto_throttle=true,bloom=true,"
- "bloom_bit_count=16,bloom_config=,bloom_hash_count=8,"
+ "huffman_value=,ignore_in_memory_cache_size=false,immutable=false"
+ ",internal_item_max=0,internal_key_max=0,"
+ "internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
+ "key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
+ "leaf_value_max=0,log=(enabled=true),lsm=(auto_throttle=true,"
+ "bloom=true,bloom_bit_count=16,bloom_config=,bloom_hash_count=8,"
"bloom_oldest=false,chunk_count_limit=0,chunk_max=5GB,"
"chunk_size=10MB,merge_max=15,merge_min=0),memory_page_max=5MB,"
"os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false,"
"prefix_compression_min=4,source=,split_deepen_min_child=0,"
"split_deepen_per_child=0,split_pct=75,type=file,value_format=u",
- confchk_WT_SESSION_create, 40
+ confchk_WT_SESSION_create, 41
},
{ "WT_SESSION.drop",
"checkpoint_wait=true,force=false,lock_wait=true,"
@@ -1148,7 +1160,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"allocation_size=4KB,app_metadata=,block_allocation=best,"
"block_compressor=,cache_resident=false,checksum=uncompressed,"
"collator=,columns=,dictionary=0,encryption=(keyid=,name=),"
- "format=btree,huffman_key=,huffman_value=,internal_item_max=0,"
+ "format=btree,huffman_key=,huffman_value=,"
+ "ignore_in_memory_cache_size=false,internal_item_max=0,"
"internal_key_max=0,internal_key_truncate=true,"
"internal_page_max=4KB,key_format=u,key_gap=10,leaf_item_max=0,"
"leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=0,"
@@ -1156,14 +1169,15 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"os_cache_max=0,prefix_compression=false,prefix_compression_min=4"
",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=75,"
"value_format=u",
- confchk_file_config, 33
+ confchk_file_config, 34
},
{ "file.meta",
"allocation_size=4KB,app_metadata=,block_allocation=best,"
"block_compressor=,cache_resident=false,checkpoint=,"
"checkpoint_lsn=,checksum=uncompressed,collator=,columns=,"
"dictionary=0,encryption=(keyid=,name=),format=btree,huffman_key="
- ",huffman_value=,id=,internal_item_max=0,internal_key_max=0,"
+ ",huffman_value=,id=,ignore_in_memory_cache_size=false,"
+ "internal_item_max=0,internal_key_max=0,"
"internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
"key_gap=10,leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
"leaf_value_max=0,log=(enabled=true),memory_page_max=5MB,"
@@ -1171,7 +1185,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"prefix_compression_min=4,split_deepen_min_child=0,"
"split_deepen_per_child=0,split_pct=75,value_format=u,"
"version=(major=0,minor=0)",
- confchk_file_meta, 37
+ confchk_file_meta, 38
},
{ "index.meta",
"app_metadata=,collator=,columns=,extractor=,immutable=false,"
@@ -1183,18 +1197,19 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"block_compressor=,cache_resident=false,checksum=uncompressed,"
"chunks=,collator=,columns=,dictionary=0,encryption=(keyid=,"
"name=),format=btree,huffman_key=,huffman_value=,"
- "internal_item_max=0,internal_key_max=0,"
- "internal_key_truncate=true,internal_page_max=4KB,key_format=u,"
- "key_gap=10,last=,leaf_item_max=0,leaf_key_max=0,"
- "leaf_page_max=32KB,leaf_value_max=0,log=(enabled=true),"
- "lsm=(auto_throttle=true,bloom=true,bloom_bit_count=16,"
- "bloom_config=,bloom_hash_count=8,bloom_oldest=false,"
- "chunk_count_limit=0,chunk_max=5GB,chunk_size=10MB,merge_max=15,"
- "merge_min=0),memory_page_max=5MB,old_chunks=,"
- "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false,"
- "prefix_compression_min=4,split_deepen_min_child=0,"
- "split_deepen_per_child=0,split_pct=75,value_format=u",
- confchk_lsm_meta, 37
+ "ignore_in_memory_cache_size=false,internal_item_max=0,"
+ "internal_key_max=0,internal_key_truncate=true,"
+ "internal_page_max=4KB,key_format=u,key_gap=10,last=,"
+ "leaf_item_max=0,leaf_key_max=0,leaf_page_max=32KB,"
+ "leaf_value_max=0,log=(enabled=true),lsm=(auto_throttle=true,"
+ "bloom=true,bloom_bit_count=16,bloom_config=,bloom_hash_count=8,"
+ "bloom_oldest=false,chunk_count_limit=0,chunk_max=5GB,"
+ "chunk_size=10MB,merge_max=15,merge_min=0),memory_page_max=5MB,"
+ "old_chunks=,os_cache_dirty_max=0,os_cache_max=0,"
+ "prefix_compression=false,prefix_compression_min=4,"
+ "split_deepen_min_child=0,split_deepen_per_child=0,split_pct=75,"
+ "value_format=u",
+ confchk_lsm_meta, 38
},
{ "table.meta",
"app_metadata=,colgroups=,collator=,columns=,key_format=u,"
diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c
index 6788b1f7f47..1b8b3183d3c 100644
--- a/src/conn/conn_cache.c
+++ b/src/conn/conn_cache.c
@@ -42,47 +42,38 @@ __cache_config_local(WT_SESSION_IMPL *session, bool shared, const char *cfg[])
WT_RET(__wt_config_gets(session, cfg, "eviction_trigger", &cval));
cache->eviction_trigger = (u_int)cval.val;
- if (F_ISSET(conn, WT_CONN_IN_MEMORY))
- cache->eviction_checkpoint_target =
- cache->eviction_dirty_target =
- cache->eviction_dirty_trigger = 100U;
- else {
- WT_RET(__wt_config_gets(
- session, cfg, "eviction_checkpoint_target", &cval));
- cache->eviction_checkpoint_target = (u_int)cval.val;
+ WT_RET(__wt_config_gets(
+ session, cfg, "eviction_checkpoint_target", &cval));
+ cache->eviction_checkpoint_target = (u_int)cval.val;
- WT_RET(__wt_config_gets(
- session, cfg, "eviction_dirty_target", &cval));
- cache->eviction_dirty_target = (u_int)cval.val;
+ WT_RET(__wt_config_gets(session, cfg, "eviction_dirty_target", &cval));
+ cache->eviction_dirty_target = (u_int)cval.val;
- /*
- * Don't allow the dirty target to be larger than the overall
- * target.
- */
- if (cache->eviction_dirty_target > cache->eviction_target)
- cache->eviction_dirty_target = cache->eviction_target;
+ /*
+ * Don't allow the dirty target to be larger than the overall
+ * target.
+ */
+ if (cache->eviction_dirty_target > cache->eviction_target)
+ cache->eviction_dirty_target = cache->eviction_target;
- /*
- * Sanity check the checkpoint target: don't allow a value
- * lower than the dirty target.
- */
- if (cache->eviction_checkpoint_target > 0 &&
- cache->eviction_checkpoint_target <
- cache->eviction_dirty_target)
- cache->eviction_checkpoint_target =
- cache->eviction_dirty_target;
+ /*
+ * Sanity check the checkpoint target: don't allow a value
+ * lower than the dirty target.
+ */
+ if (cache->eviction_checkpoint_target > 0 &&
+ cache->eviction_checkpoint_target < cache->eviction_dirty_target)
+ cache->eviction_checkpoint_target =
+ cache->eviction_dirty_target;
- WT_RET(__wt_config_gets(
- session, cfg, "eviction_dirty_trigger", &cval));
- cache->eviction_dirty_trigger = (u_int)cval.val;
+ WT_RET(__wt_config_gets(session, cfg, "eviction_dirty_trigger", &cval));
+ cache->eviction_dirty_trigger = (u_int)cval.val;
- /*
- * Don't allow the dirty trigger to be larger than the overall
- * trigger or we can get stuck with a cache full of dirty data.
- */
- if (cache->eviction_dirty_trigger > cache->eviction_trigger)
- cache->eviction_dirty_trigger = cache->eviction_trigger;
- }
+ /*
+ * Don't allow the dirty trigger to be larger than the overall
+ * trigger or we can get stuck with a cache full of dirty data.
+ */
+ if (cache->eviction_dirty_trigger > cache->eviction_trigger)
+ cache->eviction_dirty_trigger = cache->eviction_trigger;
WT_RET(__wt_config_gets(session, cfg, "eviction.threads_max", &cval));
WT_ASSERT(session, cval.val > 0);
diff --git a/src/conn/conn_ckpt.c b/src/conn/conn_ckpt.c
index b4913043d63..faeef4e71a2 100644
--- a/src/conn/conn_ckpt.c
+++ b/src/conn/conn_ckpt.c
@@ -87,22 +87,36 @@ __ckpt_server(void *arg)
*/
__wt_cond_wait(session, conn->ckpt_cond, conn->ckpt_usecs);
- /* Checkpoint the database. */
- WT_ERR(wt_session->checkpoint(wt_session, NULL));
-
- /* Reset. */
- if (conn->ckpt_logsize) {
- __wt_log_written_reset(session);
- conn->ckpt_signalled = false;
-
- /*
- * In case we crossed the log limit during the
- * checkpoint and the condition variable was already
- * signalled, do a tiny wait to clear it so we don't do
- * another checkpoint immediately.
- */
- __wt_cond_wait(session, conn->ckpt_cond, 1);
- }
+ /*
+ * Checkpoint the database if the connection is marked dirty.
+ * A connection is marked dirty whenever a btree gets marked
+ * dirty, which reflects upon a change in the database that
+ * needs to be checkpointed. Said that, there can be short
+ * instances when a btree gets marked dirty and the connection
+ * is yet to be. We might skip a checkpoint in that short
+ * instance, which is okay because by the next time we get to
+ * checkpoint, the connection would have been marked dirty and
+ * hence the checkpoint will not be skipped this time.
+ */
+ if (conn->modified) {
+ WT_ERR(wt_session->checkpoint(wt_session, NULL));
+
+ /* Reset. */
+ if (conn->ckpt_logsize) {
+ __wt_log_written_reset(session);
+ conn->ckpt_signalled = false;
+
+ /*
+ * In case we crossed the log limit during the
+ * checkpoint and the condition variable was
+ * already signalled, do a tiny wait to clear
+ * it so we don't do another checkpoint
+ * immediately.
+ */
+ __wt_cond_wait(session, conn->ckpt_cond, 1);
+ }
+ } else
+ WT_STAT_CONN_INCR(session, txn_checkpoint_skipped);
}
if (0) {
diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c
index 2786526c2fa..34743034877 100644
--- a/src/conn/conn_log.c
+++ b/src/conn/conn_log.c
@@ -839,10 +839,10 @@ __log_server(void *arg)
/* Wait until the next event. */
- WT_ERR(__wt_epoch(session, &start));
+ __wt_epoch(session, &start);
__wt_cond_auto_wait_signal(session,
conn->log_cond, did_work, &signalled);
- WT_ERR(__wt_epoch(session, &now));
+ __wt_epoch(session, &now);
timediff = WT_TIMEDIFF_MS(now, start);
}
diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c
index 66979dfd023..d5a31c671c0 100644
--- a/src/conn/conn_stat.c
+++ b/src/conn/conn_stat.c
@@ -415,7 +415,7 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp)
conn = S2C(session);
/* Get the current local time of day. */
- WT_RET(__wt_epoch(session, &ts));
+ __wt_epoch(session, &ts);
tm = localtime_r(&ts.tv_sec, &_tm);
/* Create the logging path name for this time of day. */
diff --git a/src/conn/conn_sweep.c b/src/conn/conn_sweep.c
index 03593f8951a..dba37fa2eb0 100644
--- a/src/conn/conn_sweep.c
+++ b/src/conn/conn_sweep.c
@@ -271,7 +271,7 @@ __sweep_server(void *arg)
/* Wait until the next event. */
__wt_cond_wait(session,
conn->sweep_cond, conn->sweep_interval * WT_MILLION);
- WT_ERR(__wt_seconds(session, &now));
+ __wt_seconds(session, &now);
WT_STAT_CONN_INCR(session, dh_sweeps);
diff --git a/src/docs/wtperf.dox b/src/docs/wtperf.dox
index a49d0d9f871..df66ad43355 100644
--- a/src/docs/wtperf.dox
+++ b/src/docs/wtperf.dox
@@ -150,33 +150,25 @@ number of async worker threads
@par checkpoint_interval (unsigned int, default=120)
checkpoint every interval seconds during the workload phase.
@par checkpoint_stress_rate (unsigned int, default=0)
-checkpoint every rate operations during the populate phase in the
-populate thread(s), 0 to disable
+checkpoint every rate operations during the populate phase in the populate thread(s), 0 to disable
@par checkpoint_threads (unsigned int, default=0)
number of checkpoint threads
-@par conn_config (string, default=create)
+@par conn_config (string, default="create")
connection configuration string
@par compact (boolean, default=false)
post-populate compact for LSM merging activity
-@par compression (string, default=none)
-compression extension. Allowed configuration values are: 'none',
-'lz4', 'snappy', 'zlib'
+@par compression (string, default="none")
+compression extension. Allowed configuration values are: 'none', 'lz4', 'snappy', 'zlib'
@par create (boolean, default=true)
do population phase; false to use existing database
@par database_count (unsigned int, default=1)
-number of WiredTiger databases to use. Each database will execute the
-workload using a separate home directory and complete set of worker
-threads
-@par drop_tables (unsigned int, default=0)
-Whether to drop all tables at the end of the run, and report time
-taken to do the drop.
+number of WiredTiger databases to use. Each database will execute the workload using a separate home directory and complete set of worker threads
+@par drop_tables (boolean, default=false)
+Whether to drop all tables at the end of the run, and report time taken to do the drop.
@par icount (unsigned int, default=5000)
-number of records to initially populate. If multiple tables are
-configured the count is spread evenly across all tables.
+number of records to initially populate. If multiple tables are configured the count is spread evenly across all tables.
@par idle_table_cycle (unsigned int, default=0)
-Enable regular create and drop of idle tables, value is the maximum
-number of seconds a create or drop is allowed before flagging an
-error. Default 0 which means disabled.
+Enable regular create and drop of idle tables, value is the maximum number of seconds a create or drop is allowed before flagging an error. Default 0 which means disabled.
@par index (boolean, default=false)
Whether to create an index on the value field.
@par insert_rmw (boolean, default=false)
@@ -188,28 +180,21 @@ perform partial logging on first table only.
@par log_like_table (boolean, default=false)
Append all modification operations to another shared table.
@par min_throughput (unsigned int, default=0)
-notify if any throughput measured is less than this amount. Aborts or
-prints warning based on min_throughput_fatal setting. Requires
-sample_interval to be configured
+notify if any throughput measured is less than this amount. Aborts or prints warning based on min_throughput_fatal setting. Requires sample_interval to be configured
@par min_throughput_fatal (boolean, default=false)
print warning (false) or abort (true) of min_throughput failure.
@par max_latency (unsigned int, default=0)
-notify if any latency measured exceeds this number of
-milliseconds.Aborts or prints warning based on min_throughput_fatal
-setting. Requires sample_interval to be configured
+notify if any latency measured exceeds this number of milliseconds. Aborts or prints warning based on min_throughput_fatal setting. Requires sample_interval to be configured
@par max_latency_fatal (boolean, default=false)
print warning (false) or abort (true) of max_latency failure.
@par pareto (unsigned int, default=0)
-use pareto distribution for random numbers. Zero to disable, otherwise
-a percentage indicating how aggressive the distribution should be.
+use pareto distribution for random numbers. Zero to disable, otherwise a percentage indicating how aggressive the distribution should be.
@par populate_ops_per_txn (unsigned int, default=0)
-number of operations to group into each transaction in the populate
-phase, zero for auto-commit
+number of operations to group into each transaction in the populate phase, zero for auto-commit
@par populate_threads (unsigned int, default=1)
number of populate threads, 1 for bulk load
@par random_range (unsigned int, default=0)
-if non zero choose a value from within this range as the key for
-insert operations
+if non zero choose a value from within this range as the key for insert operations
@par random_value (boolean, default=false)
generate random content for the value
@par range_partition (boolean, default=false)
@@ -217,9 +202,7 @@ partition data by range (vs hash)
@par read_range (unsigned int, default=0)
scan a range of keys after each search
@par readonly (boolean, default=false)
-reopen the connection between populate and workload phases in readonly
-mode. Requires reopen_connection turned on (default). Requires that
-read be the only workload specified
+reopen the connection between populate and workload phases in readonly mode. Requires reopen_connection turned on (default). Requires that read be the only workload specified
@par reopen_connection (boolean, default=true)
close and reopen the connection between populate and workload phases
@par report_interval (unsigned int, default=2)
@@ -231,40 +214,22 @@ total workload seconds
@par sample_interval (unsigned int, default=0)
performance logging every interval seconds, 0 to disable
@par sample_rate (unsigned int, default=50)
-how often the latency of operations is measured. One for every
-operation,two for every second operation, three for every third
-operation etc.
-@par sess_config (string, default=)
+how often the latency of operations is measured. One for every operation, two for every second operation, three for every third operation etc.
+@par sess_config (string, default="")
session configuration string
@par session_count_idle (unsigned int, default=0)
number of idle sessions to create. Default 0.
-@par table_config (string, default=key_format=S,value_format=S,type=lsm,exclusive=true,allocation_size=4kb,internal_page_max=64kb,leaf_page_max=4kb,split_pct=100)
+@par table_config (string, default="key_format=S,value_format=S,type=lsm,exclusive=true, allocation_size=4kb,internal_page_max=64kb,leaf_page_max=4kb, split_pct=100")
table configuration string
@par table_count (unsigned int, default=1)
-number of tables to run operations over. Keys are divided evenly over
-the tables. Cursors are held open on all tables. Default 1, maximum
-99999.
+number of tables to run operations over. Keys are divided evenly over the tables. Cursors are held open on all tables. Default 1, maximum 99999.
@par table_count_idle (unsigned int, default=0)
number of tables to create, that won't be populated. Default 0.
-@par threads (string, default=)
-workload configuration: each 'count' entry is the total number of
-threads, and the 'insert', 'read' and 'update' entries are the ratios
-of insert, read and update operations done by each worker thread; If a
-throttle value is provided each thread will do a maximum of that
-number of operations per second; multiple workload configurations may
-be specified per threads configuration; for example, a more complex
-threads configuration might be
-'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))'
-which would create 2 threads doing nothing but reads and 8 threads
-each doing 50% inserts and 25% reads and updates. Allowed
-configuration values are 'count', 'throttle', 'update_delta', 'reads',
-'inserts', 'updates', 'truncate', 'truncate_pct' and 'truncate_count'.
-There are also behavior modifiers, supported modifiers are
-'ops_per_txn'
-@par transaction_config (string, default=)
-transaction configuration string, relevant when populate_opts_per_txn
-is nonzero
-@par table_name (string, default=test)
+@par threads (string, default="")
+workload configuration: each 'count' entry is the total number of threads, and the 'insert', 'read' and 'update' entries are the ratios of insert, read and update operations done by each worker thread; If a throttle value is provided each thread will do a maximum of that number of operations per second; multiple workload configurations may be specified per threads configuration; for example, a more complex threads configuration might be 'threads=((count=2,reads=1)(count=8,reads=1,inserts=2,updates=1))' which would create 2 threads doing nothing but reads and 8 threads each doing 50% inserts and 25% reads and updates. Allowed configuration values are 'count', 'throttle', 'update_delta', 'reads', 'inserts', 'updates', 'truncate', 'truncate_pct' and 'truncate_count'. There are also behavior modifiers, supported modifiers are 'ops_per_txn'
+@par transaction_config (string, default="")
+WT_SESSION.begin_transaction configuration string, applied during the populate phase when populate_ops_per_txn is nonzero
+@par table_name (string, default="test")
table name
@par truncate_single_ops (boolean, default=false)
Implement truncate via cursor remove instead of session API
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index acc81f566a5..45ec9bce3b5 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -233,10 +233,10 @@ __wt_evict_thread_run(WT_SESSION_IMPL *session, WT_THREAD *thread)
#ifdef HAVE_DIAGNOSTIC
/*
- * Ensure the cache stuck timer is initialized when starting eviction
+ * Ensure the cache stuck timer is initialized when starting eviction.
*/
if (thread->id == 0)
- WT_ERR(__wt_epoch(session, &cache->stuck_ts));
+ __wt_epoch(session, &cache->stuck_ts);
#endif
while (F_ISSET(conn, WT_CONN_EVICTION_RUN) &&
@@ -350,10 +350,10 @@ __evict_server(WT_SESSION_IMPL *session, bool *did_work)
} else if (cache->pages_evicted != cache->pages_evict) {
cache->pages_evicted = cache->pages_evict;
#ifdef HAVE_DIAGNOSTIC
- WT_RET(__wt_epoch(session, &cache->stuck_ts));
+ __wt_epoch(session, &cache->stuck_ts);
} else {
/* After being stuck for 5 minutes, give up. */
- WT_RET(__wt_epoch(session, &now));
+ __wt_epoch(session, &now);
if (WT_TIMEDIFF_SEC(now, cache->stuck_ts) > 300) {
ret = ETIMEDOUT;
__wt_err(session, ret,
@@ -465,16 +465,16 @@ __evict_update_work(WT_SESSION_IMPL *session)
*/
bytes_max = conn->cache_size + 1;
bytes_inuse = __wt_cache_bytes_inuse(cache);
- if (bytes_inuse > (cache->eviction_target * bytes_max) / 100)
- F_SET(cache, WT_CACHE_EVICT_CLEAN);
if (__wt_eviction_clean_needed(session, NULL))
F_SET(cache, WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_CLEAN_HARD);
+ else if (bytes_inuse > (cache->eviction_target * bytes_max) / 100)
+ F_SET(cache, WT_CACHE_EVICT_CLEAN);
dirty_inuse = __wt_cache_dirty_leaf_inuse(cache);
- if (dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100)
- F_SET(cache, WT_CACHE_EVICT_DIRTY);
if (__wt_eviction_dirty_needed(session, NULL))
F_SET(cache, WT_CACHE_EVICT_DIRTY | WT_CACHE_EVICT_DIRTY_HARD);
+ else if (dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100)
+ F_SET(cache, WT_CACHE_EVICT_DIRTY);
/*
* If application threads are blocked by the total volume of data in
@@ -506,12 +506,6 @@ __evict_update_work(WT_SESSION_IMPL *session)
F_CLR(cache, WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_CLEAN_HARD);
}
- /* If threads are blocked by eviction we should be looking for pages. */
- WT_ASSERT(session, !F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD) ||
- F_ISSET(cache, WT_CACHE_EVICT_CLEAN));
- WT_ASSERT(session, !F_ISSET(cache, WT_CACHE_EVICT_DIRTY_HARD) ||
- F_ISSET(cache, WT_CACHE_EVICT_DIRTY));
-
WT_STAT_CONN_SET(session, cache_eviction_state,
F_MASK(cache, WT_CACHE_EVICT_MASK));
@@ -543,7 +537,7 @@ __evict_pass(WT_SESSION_IMPL *session)
/* Evict pages from the cache. */
for (loop = 0; cache->pass_intr == 0; loop++) {
- WT_RET(__wt_epoch(session, &now));
+ __wt_epoch(session, &now);
if (loop == 0)
prev = now;
@@ -895,12 +889,11 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
/* Fill the next queue (that isn't the urgent queue). */
queue = cache->evict_fill_queue;
other_queue = cache->evict_queues + (1 - (queue - cache->evict_queues));
+ cache->evict_fill_queue = other_queue;
/* If this queue is full, try the other one. */
if (__evict_queue_full(queue) && !__evict_queue_full(other_queue))
queue = other_queue;
- cache->evict_fill_queue =
- &cache->evict_queues[1 - (queue - cache->evict_queues)];
/*
* If both queues are full and haven't been empty on recent refills,
@@ -1079,6 +1072,17 @@ __evict_walk(WT_SESSION_IMPL *session, WT_EVICT_QUEUE *queue)
start_slot = slot = queue->evict_entries;
max_entries = WT_MIN(slot + WT_EVICT_WALK_INCR, cache->evict_slots);
+ /*
+ * Another pathological case: if there are only a tiny number of
+ * candidate pages in cache, don't put all of them on one queue.
+ */
+ if (F_ISSET(cache, WT_CACHE_EVICT_CLEAN))
+ max_entries = WT_MIN(max_entries,
+ 1 + (uint32_t)(__wt_cache_pages_inuse(cache) / 2));
+ else
+ max_entries = WT_MIN(max_entries,
+ 1 + (uint32_t)(cache->pages_dirty_leaf / 2));
+
retry: while (slot < max_entries) {
/*
* If another thread is waiting on the eviction server to clear
@@ -1508,19 +1512,22 @@ fast: /* If the page can't be evicted, give up. */
btree->evict_walk_period = 0;
/*
- * If we happen to end up on the root page, clear it. We have to track
- * hazard pointers, and the root page complicates that calculation.
+ * If we happen to end up on the root page or a page requiring urgent
+ * eviction, clear it. We have to track hazard pointers, and the root
+ * page complicates that calculation.
*
* Likewise if we found no new candidates during the walk: there is no
- * point keeping a page pinned, since it may be the only candidate in an
- * idle tree.
+ * point keeping a page pinned, since it may be the only candidate in
+ * an idle tree.
*
* If we land on a page requiring forced eviction, move on to the next
* page: we want this page evicted as quickly as possible.
*/
if ((ref = btree->evict_ref) != NULL) {
/* Give up the walk occasionally. */
- if (__wt_ref_is_root(ref) || evict == start || give_up)
+ if (__wt_ref_is_root(ref) || evict == start || give_up ||
+ ref->page->read_gen == WT_READGEN_OLDEST ||
+ ref->page->memory_footprint >= btree->splitmempage)
WT_RET(__evict_clear_walk(session, restarts == 0));
else if (ref->page->read_gen == WT_READGEN_OLDEST)
WT_RET_NOTFOUND_OK(__wt_tree_walk_count(
@@ -1543,14 +1550,14 @@ __evict_get_ref(
WT_SESSION_IMPL *session, bool is_server, WT_BTREE **btreep, WT_REF **refp)
{
WT_CACHE *cache;
- WT_DECL_RET;
WT_EVICT_ENTRY *evict;
WT_EVICT_QUEUE *queue, *other_queue, *urgent_queue;
uint32_t candidates;
- bool is_app, urgent_ok;
+ bool is_app, server_only, urgent_ok;
cache = S2C(session)->cache;
is_app = !F_ISSET(session, WT_SESSION_INTERNAL);
+ server_only = is_server && !WT_EVICT_HAS_WORKERS(session);
urgent_ok = (!is_app && !is_server) ||
!WT_EVICT_HAS_WORKERS(session) ||
__wt_cache_aggressive(session);
@@ -1569,7 +1576,8 @@ __evict_get_ref(
}
/*
- * The server repopulates whenever the other queue is not full.
+ * The server repopulates whenever the other queue is not full, as long
+ * as at least one page has been evicted out of the current queue.
*
* Note that there are pathological cases where there are only enough
* eviction candidates in the cache to fill one queue. In that case,
@@ -1577,18 +1585,14 @@ __evict_get_ref(
* Such cases are extremely rare in real applications.
*/
if (is_server &&
+ (!urgent_ok || __evict_queue_empty(urgent_queue, false)) &&
+ !__evict_queue_full(cache->evict_current_queue) &&
+ !__evict_queue_full(cache->evict_fill_queue) &&
(cache->evict_empty_score > WT_EVICT_SCORE_CUTOFF ||
- __evict_queue_empty(cache->evict_fill_queue, false))) {
- while ((ret = __wt_spin_trylock(
- session, &cache->evict_queue_lock)) == EBUSY)
- if ((!urgent_ok ||
- __evict_queue_empty(urgent_queue, false)) &&
- !__evict_queue_full(cache->evict_fill_queue))
- return (WT_NOTFOUND);
+ __evict_queue_empty(cache->evict_fill_queue, false)))
+ return (WT_NOTFOUND);
- WT_RET(ret);
- } else
- __wt_spin_lock(session, &cache->evict_queue_lock);
+ __wt_spin_lock(session, &cache->evict_queue_lock);
/* Check the urgent queue first. */
if (urgent_ok && !__evict_queue_empty(urgent_queue, false))
@@ -1596,17 +1600,15 @@ __evict_get_ref(
else {
/*
* Check if the current queue needs to change.
- * The current queue could have changed while we waited for
- * the lock.
*
* The server will only evict half of the pages before looking
- * for more. The remainder are left to eviction workers (if any
- * configured), or application threads if necessary.
+ * for more, but should only switch queues if there are no
+ * other eviction workers.
*/
queue = cache->evict_current_queue;
other_queue = cache->evict_other_queue;
- if (__evict_queue_empty(queue, is_server) &&
- !__evict_queue_empty(other_queue, is_server)) {
+ if (__evict_queue_empty(queue, server_only) &&
+ !__evict_queue_empty(other_queue, server_only)) {
cache->evict_current_queue = other_queue;
cache->evict_other_queue = queue;
}
@@ -1715,15 +1717,19 @@ __evict_get_ref(
static int
__evict_page(WT_SESSION_IMPL *session, bool is_server)
{
+ struct timespec enter, leave;
WT_BTREE *btree;
WT_CACHE *cache;
WT_DECL_RET;
WT_REF *ref;
+ bool app_timer;
WT_RET(__evict_get_ref(session, is_server, &btree, &ref));
WT_ASSERT(session, ref->state == WT_REF_LOCKED);
+ app_timer = false;
cache = S2C(session)->cache;
+
/*
* An internal session flags either the server itself or an eviction
* worker thread.
@@ -1739,6 +1745,10 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
WT_STAT_CONN_INCR(session, cache_eviction_app_dirty);
WT_STAT_CONN_INCR(session, cache_eviction_app);
cache->app_evicts++;
+ if (WT_STAT_ENABLED(session)) {
+ app_timer = true;
+ __wt_epoch(session, &enter);
+ }
}
/*
@@ -1756,6 +1766,11 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
(void)__wt_atomic_subv32(&btree->evict_busy, 1);
+ if (app_timer) {
+ __wt_epoch(session, &leave);
+ WT_STAT_CONN_INCRV(session,
+ application_evict_time, WT_TIMEDIFF_US(leave, enter));
+ }
return (ret);
}
@@ -1767,6 +1782,7 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
int
__wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
{
+ struct timespec enter, leave;
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
@@ -1792,9 +1808,11 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
/* Wake the eviction server if we need to do work. */
__wt_evict_server_wake(session);
- init_evict_count = cache->pages_evict;
+ /* Track how long application threads spend doing eviction. */
+ if (WT_STAT_ENABLED(session) && !F_ISSET(session, WT_SESSION_INTERNAL))
+ __wt_epoch(session, &enter);
- for (;;) {
+ for (init_evict_count = cache->pages_evict;; ret = 0) {
/*
* A pathological case: if we're the oldest transaction in the
* system and the eviction server is stuck trying to find space,
@@ -1804,7 +1822,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
if (__wt_cache_stuck(session) && __wt_txn_am_oldest(session)) {
--cache->evict_aggressive_score;
WT_STAT_CONN_INCR(session, txn_fail_cache);
- return (WT_ROLLBACK);
+ WT_ERR(WT_ROLLBACK);
}
/*
@@ -1816,7 +1834,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
* limit the work to 5 evictions and return. If that's not the
* case, we can do more.
*/
- if (!busy && txn_state->snap_min != WT_TXN_NONE &&
+ if (!busy && txn_state->pinned_id != WT_TXN_NONE &&
txn_global->current != txn_global->oldest_id)
busy = true;
max_pages_evicted = busy ? 5 : 20;
@@ -1825,7 +1843,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
if (!__wt_eviction_needed(session, busy, &pct_full) ||
(pct_full < 100 &&
cache->pages_evict > init_evict_count + max_pages_evicted))
- return (0);
+ break;
/*
* Don't make application threads participate in scrubbing for
@@ -1842,7 +1860,7 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
switch (ret = __evict_page(session, false)) {
case 0:
if (busy)
- return (0);
+ goto err;
/* FALLTHROUGH */
case EBUSY:
break;
@@ -1853,9 +1871,18 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
cache->app_waits++;
break;
default:
- return (ret);
+ goto err;
}
}
+
+err: if (WT_STAT_ENABLED(session) &&
+ !F_ISSET(session, WT_SESSION_INTERNAL)) {
+ __wt_epoch(session, &leave);
+ WT_STAT_CONN_INCRV(session,
+ application_cache_time, WT_TIMEDIFF_US(leave, enter));
+ }
+
+ return (ret);
/* NOTREACHED */
}
diff --git a/src/include/api.h b/src/include/api.h
index e1b2f8edaf3..2783d17f825 100644
--- a/src/include/api.h
+++ b/src/include/api.h
@@ -139,7 +139,9 @@
(s) = (WT_SESSION_IMPL *)(cur)->session; \
TXN_API_CALL_NOCONF(s, WT_CURSOR, n, cur, \
((bt) == NULL) ? NULL : ((WT_BTREE *)(bt))->dhandle); \
- if (F_ISSET(S2C(s), WT_CONN_IN_MEMORY) && __wt_cache_full(s)) \
+ if (F_ISSET(S2C(s), WT_CONN_IN_MEMORY) && \
+ !F_ISSET((WT_BTREE *)(bt), WT_BTREE_IGNORE_CACHE) && \
+ __wt_cache_full(s)) \
WT_ERR(WT_CACHE_FULL);
#define JOINABLE_CURSOR_UPDATE_API_CALL(cur, s, n, bt) \
diff --git a/src/include/btree.h b/src/include/btree.h
index cfaf59e70e1..713d46ae85f 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -119,7 +119,7 @@ struct __wt_btree {
uint64_t last_recno; /* Column-store last record number */
WT_REF root; /* Root page reference */
- int modified; /* If the tree ever modified */
+ bool modified; /* If the tree ever modified */
bool bulk_load_ok; /* Bulk-load is a possibility */
WT_BM *bm; /* Block manager reference */
@@ -154,18 +154,19 @@ struct __wt_btree {
WT_SPINLOCK flush_lock; /* Lock to flush the tree's pages */
/* Flags values up to 0xff are reserved for WT_DHANDLE_* */
-#define WT_BTREE_BULK 0x00100 /* Bulk-load handle */
-#define WT_BTREE_IN_MEMORY 0x00200 /* Cache-resident object */
-#define WT_BTREE_LOOKASIDE 0x00400 /* Look-aside table */
-#define WT_BTREE_NO_CHECKPOINT 0x00800 /* Disable checkpoints */
-#define WT_BTREE_NO_EVICTION 0x01000 /* Disable eviction */
-#define WT_BTREE_NO_LOGGING 0x02000 /* Disable logging */
-#define WT_BTREE_NO_RECONCILE 0x04000 /* Allow splits, even with no evict */
-#define WT_BTREE_REBALANCE 0x08000 /* Handle is for rebalance */
-#define WT_BTREE_SALVAGE 0x10000 /* Handle is for salvage */
-#define WT_BTREE_SKIP_CKPT 0x20000 /* Handle skipped checkpoint */
-#define WT_BTREE_UPGRADE 0x40000 /* Handle is for upgrade */
-#define WT_BTREE_VERIFY 0x80000 /* Handle is for verify */
+#define WT_BTREE_BULK 0x000100 /* Bulk-load handle */
+#define WT_BTREE_IGNORE_CACHE 0x000200 /* Cache-resident object */
+#define WT_BTREE_IN_MEMORY 0x000400 /* Cache-resident object */
+#define WT_BTREE_LOOKASIDE 0x000800 /* Look-aside table */
+#define WT_BTREE_NO_CHECKPOINT 0x001000 /* Disable checkpoints */
+#define WT_BTREE_NO_EVICTION 0x002000 /* Disable eviction */
+#define WT_BTREE_NO_LOGGING 0x004000 /* Disable logging */
+#define WT_BTREE_NO_RECONCILE 0x008000 /* Allow splits, even with no evict */
+#define WT_BTREE_REBALANCE 0x010000 /* Handle is for rebalance */
+#define WT_BTREE_SALVAGE 0x020000 /* Handle is for salvage */
+#define WT_BTREE_SKIP_CKPT 0x040000 /* Handle skipped checkpoint */
+#define WT_BTREE_UPGRADE 0x080000 /* Handle is for upgrade */
+#define WT_BTREE_VERIFY 0x100000 /* Handle is for verify */
uint32_t flags;
};
diff --git a/src/include/btree.i b/src/include/btree.i
index a9ce4f754a9..74ebf74f1e9 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -522,14 +522,22 @@ __wt_page_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page)
* might result in an extra checkpoint that doesn't do any work but it
* shouldn't cause problems; regardless, let's play it safe.)
*/
- if (S2BT(session)->modified == 0) {
+ if (!S2BT(session)->modified) {
/* Assert we never dirty a checkpoint handle. */
WT_ASSERT(session, session->dhandle->checkpoint == NULL);
- S2BT(session)->modified = 1;
+ S2BT(session)->modified = true;
WT_FULL_BARRIER();
}
+ /*
+ * There is a possibility of btree being dirty whereas connection being
+ * clean when entering this function. So make sure to update connection
+ * to dirty outside a condition on btree modified flag.
+ */
+ if (!S2C(session)->modified)
+ S2C(session)->modified = true;
+
__wt_page_only_modify_set(session, page);
}
diff --git a/src/include/cache.i b/src/include/cache.i
index 4255d04ec37..17ab39e97d2 100644
--- a/src/include/cache.i
+++ b/src/include/cache.i
@@ -355,7 +355,7 @@ __wt_cache_eviction_check(WT_SESSION_IMPL *session, bool busy, bool *didworkp)
txn_state = WT_SESSION_TXN_STATE(session);
busy = busy || txn_state->id != WT_TXN_NONE ||
session->nhazard > 0 ||
- (txn_state->snap_min != WT_TXN_NONE &&
+ (txn_state->pinned_id != WT_TXN_NONE &&
txn_global->current != txn_global->oldest_id);
/*
diff --git a/src/include/connection.h b/src/include/connection.h
index e19ad684b24..ce81dcf5976 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -352,6 +352,12 @@ struct __wt_connection_impl {
WT_SESSION_IMPL *meta_ckpt_session;/* Metadata checkpoint session */
+ /*
+ * Is there a data/schema change that needs to be the part of a
+ * checkpoint.
+ */
+ bool modified;
+
WT_SESSION_IMPL *sweep_session; /* Handle sweep session */
wt_thread_t sweep_tid; /* Handle sweep thread */
int sweep_tid_set; /* Handle sweep thread set */
diff --git a/src/include/extern.h b/src/include/extern.h
index 5444b2e9f14..e3cffa4ca3c 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -677,7 +677,7 @@ extern uint32_t __wt_log2_int(uint32_t n);
extern bool __wt_ispo2(uint32_t v);
extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2);
extern void __wt_random_init(WT_RAND_STATE volatile *rnd_state);
-extern int __wt_random_init_seed( WT_SESSION_IMPL *session, WT_RAND_STATE volatile *rnd_state) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_random_init_seed( WT_SESSION_IMPL *session, WT_RAND_STATE volatile *rnd_state);
extern uint32_t __wt_random(WT_RAND_STATE volatile *rnd_state);
extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/include/extern_posix.h b/src/include/extern_posix.h
index d2f74d2ffe4..fd94ef0ddf2 100644
--- a/src/include/extern_posix.h
+++ b/src/include/extern_posix.h
@@ -27,5 +27,5 @@ extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds);
extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_thread_id(char *buf, size_t buflen);
-extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp);
extern void __wt_yield(void);
diff --git a/src/include/extern_win.h b/src/include/extern_win.h
index 8c2b19056e0..f06ee881ece 100644
--- a/src/include/extern_win.h
+++ b/src/include/extern_win.h
@@ -25,7 +25,7 @@ extern void __wt_sleep(uint64_t seconds, uint64_t micro_seconds);
extern int __wt_thread_create(WT_SESSION_IMPL *session, wt_thread_t *tidret, WT_THREAD_CALLBACK(*func)(void *), void *arg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_thread_join(WT_SESSION_IMPL *session, wt_thread_t tid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_thread_id(char *buf, size_t buflen);
-extern int __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp);
extern int __wt_to_utf16_string( WT_SESSION_IMPL *session, const char*utf8, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_to_utf8_string( WT_SESSION_IMPL *session, const wchar_t*wide, WT_ITEM **outbuf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern DWORD __wt_getlasterror(void);
diff --git a/src/include/misc.i b/src/include/misc.i
index f267c7afc91..befd480e085 100644
--- a/src/include/misc.i
+++ b/src/include/misc.i
@@ -33,16 +33,14 @@ __wt_strdup(WT_SESSION_IMPL *session, const char *str, void *retp)
* __wt_seconds --
* Return the seconds since the Epoch.
*/
-static inline int
+static inline void
__wt_seconds(WT_SESSION_IMPL *session, time_t *timep)
{
struct timespec t;
- WT_RET(__wt_epoch(session, &t));
+ __wt_epoch(session, &t);
*timep = t.tv_sec;
-
- return (0);
}
/*
diff --git a/src/include/stat.h b/src/include/stat.h
index cd0cae16826..68879206851 100644
--- a/src/include/stat.h
+++ b/src/include/stat.h
@@ -420,6 +420,8 @@ struct __wt_connection_stats {
int64_t thread_fsync_active;
int64_t thread_read_active;
int64_t thread_write_active;
+ int64_t application_evict_time;
+ int64_t application_cache_time;
int64_t page_busy_blocked;
int64_t page_forcible_evict_blocked;
int64_t page_locked_blocked;
@@ -437,6 +439,7 @@ struct __wt_connection_stats {
int64_t txn_checkpoint_scrub_time;
int64_t txn_checkpoint_time_total;
int64_t txn_checkpoint;
+ int64_t txn_checkpoint_skipped;
int64_t txn_fail_cache;
int64_t txn_checkpoint_fsync_post;
int64_t txn_checkpoint_fsync_post_duration;
diff --git a/src/include/txn.h b/src/include/txn.h
index 2e41ae8620d..8128e8e4cc2 100644
--- a/src/include/txn.h
+++ b/src/include/txn.h
@@ -49,9 +49,9 @@
WT_ASSERT((s), (s)->txn.forced_iso > 0); \
(s)->txn.forced_iso--; \
WT_ASSERT((s), txn_state->id == saved_state.id && \
- (txn_state->snap_min == saved_state.snap_min || \
- saved_state.snap_min == WT_TXN_NONE)); \
- txn_state->snap_min = saved_state.snap_min; \
+ (txn_state->pinned_id == saved_state.pinned_id || \
+ saved_state.pinned_id == WT_TXN_NONE)); \
+ txn_state->pinned_id = saved_state.pinned_id; \
} while (0)
struct __wt_named_snapshot {
@@ -59,14 +59,14 @@ struct __wt_named_snapshot {
TAILQ_ENTRY(__wt_named_snapshot) q;
- uint64_t snap_min, snap_max;
+ uint64_t pinned_id, snap_min, snap_max;
uint64_t *snapshot;
uint32_t snapshot_count;
};
struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_txn_state {
volatile uint64_t id;
- volatile uint64_t snap_min;
+ volatile uint64_t pinned_id;
};
struct __wt_txn_global {
diff --git a/src/include/txn.i b/src/include/txn.i
index 1a8851a9a2a..cf7e2eafc65 100644
--- a/src/include/txn.i
+++ b/src/include/txn.i
@@ -309,7 +309,7 @@ __wt_txn_idle_cache_check(WT_SESSION_IMPL *session)
* WT_TXN_HAS_SNAPSHOT.
*/
if (F_ISSET(txn, WT_TXN_RUNNING) &&
- !F_ISSET(txn, WT_TXN_HAS_ID) && txn_state->snap_min == WT_TXN_NONE)
+ !F_ISSET(txn, WT_TXN_HAS_ID) && txn_state->pinned_id == WT_TXN_NONE)
WT_RET(__wt_cache_eviction_check(session, false, NULL));
return (0);
@@ -480,8 +480,8 @@ __wt_txn_cursor_op(WT_SESSION_IMPL *session)
* positioned on a value, it can't be freed.
*/
if (txn->isolation == WT_ISO_READ_UNCOMMITTED) {
- if (txn_state->snap_min == WT_TXN_NONE)
- txn_state->snap_min = txn_global->last_running;
+ if (txn_state->pinned_id == WT_TXN_NONE)
+ txn_state->pinned_id = txn_global->last_running;
} else if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
WT_RET(__wt_txn_get_snapshot(session));
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index f4763a113f1..2b71a580532 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -1069,6 +1069,11 @@ struct __wt_session {
* Permitted values are \c "none"\, \c "english"\, \c "utf8<file>" or \c
* "utf16<file>". See @ref huffman for more information., a string;
* default \c none.}
+ * @config{ignore_in_memory_cache_size, allow update and insert
+ * operations to proceed even if the cache is already at capacity. Only
+ * valid in conjunction with in-memory databases. Should be used with
+ * caution - this configuration allows WiredTiger to consume memory over
+ * the configured cache limit., a boolean flag; default \c false.}
* @config{immutable, configure the index to be immutable - that is an
* index is not changed by any update to a record in the table., a
* boolean flag; default \c false.}
@@ -1815,14 +1820,13 @@ struct __wt_connection {
* default \c 5.}
* @config{eviction_dirty_target, perform eviction in worker threads
* when the cache contains at least this much dirty content\, expressed
- * as a percentage of the total cache size. Ignored if \c in_memory is
- * \c true., an integer between 1 and 99; default \c 5.}
+ * as a percentage of the total cache size., an integer between 1 and
+ * 99; default \c 5.}
* @config{eviction_dirty_trigger, trigger application threads to
* perform eviction when the cache contains at least this much dirty
* content\, expressed as a percentage of the total cache size. This
- * setting only alters behavior if it is lower than eviction_trigger.
- * Ignored if \c in_memory is \c true., an integer between 1 and 99;
- * default \c 20.}
+ * setting only alters behavior if it is lower than eviction_trigger.,
+ * an integer between 1 and 99; default \c 20.}
* @config{eviction_target, perform eviction in worker threads when the
* cache contains at least this much content\, expressed as a percentage
* of the total cache size. Must be less than \c eviction_trigger., an
@@ -2281,13 +2285,12 @@ struct __wt_connection {
* is \c true., an integer between 0 and 99; default \c 5.}
* @config{eviction_dirty_target, perform eviction in worker threads when the
* cache contains at least this much dirty content\, expressed as a percentage
- * of the total cache size. Ignored if \c in_memory is \c true., an integer
- * between 1 and 99; default \c 5.}
+ * of the total cache size., an integer between 1 and 99; default \c 5.}
* @config{eviction_dirty_trigger, trigger application threads to perform
* eviction when the cache contains at least this much dirty content\, expressed
* as a percentage of the total cache size. This setting only alters behavior
- * if it is lower than eviction_trigger. Ignored if \c in_memory is \c true.,
- * an integer between 1 and 99; default \c 20.}
+ * if it is lower than eviction_trigger., an integer between 1 and 99; default
+ * \c 20.}
* @config{eviction_target, perform eviction in worker threads when the cache
* contains at least this much content\, expressed as a percentage of the total
* cache size. Must be less than \c eviction_trigger., an integer between 10
@@ -4591,67 +4594,76 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_THREAD_READ_ACTIVE 1186
/*! thread-state: active filesystem write calls */
#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1187
+/*! thread-yield: application thread time evicting (usecs) */
+#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1188
+/*! thread-yield: application thread time waiting for cache (usecs) */
+#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1189
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1188
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1190
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1189
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1191
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1190
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1192
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1191
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1193
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1192
+#define WT_STAT_CONN_PAGE_SLEEP 1194
/*! transaction: number of named snapshots created */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1193
+#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1195
/*! transaction: number of named snapshots dropped */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1194
+#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1196
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1195
+#define WT_STAT_CONN_TXN_BEGIN 1197
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1196
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1198
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1197
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1199
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1198
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1200
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1199
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1201
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1200
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1202
/*! transaction: transaction checkpoint scrub dirty target */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1201
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1203
/*! transaction: transaction checkpoint scrub time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1202
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1204
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1203
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1205
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1204
+#define WT_STAT_CONN_TXN_CHECKPOINT 1206
+/*!
+ * transaction: transaction checkpoints skipped because database was
+ * clean
+ */
+#define WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED 1207
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1205
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1208
/*!
* transaction: transaction fsync calls for checkpoint after allocating
* the transaction ID
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1206
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1209
/*!
* transaction: transaction fsync duration for checkpoint after
* allocating the transaction ID (usecs)
*/
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1207
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1210
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1208
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1211
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1209
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1212
/*!
* transaction: transaction range of IDs currently pinned by named
* snapshots
*/
-#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1210
+#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1213
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1211
+#define WT_STAT_CONN_TXN_SYNC 1214
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1212
+#define WT_STAT_CONN_TXN_COMMIT 1215
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1213
+#define WT_STAT_CONN_TXN_ROLLBACK 1216
/*!
* @}
diff --git a/src/log/log.c b/src/log/log.c
index b0c789f0f9e..00e4ea5f441 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -128,9 +128,9 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
"log_force_sync: sync directory %s to LSN %" PRIu32
"/%" PRIu32,
log->log_dir_fh->name, min_lsn->l.file, min_lsn->l.offset);
- WT_ERR(__wt_epoch(session, &fsync_start));
+ __wt_epoch(session, &fsync_start);
WT_ERR(__wt_fsync(session, log->log_dir_fh, true));
- WT_ERR(__wt_epoch(session, &fsync_stop));
+ __wt_epoch(session, &fsync_stop);
fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start);
log->sync_dir_lsn = *min_lsn;
WT_STAT_CONN_INCR(session, log_sync_dir);
@@ -152,9 +152,9 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
__wt_verbose(session, WT_VERB_LOG,
"log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32,
log_fh->name, min_lsn->l.file, min_lsn->l.offset);
- WT_ERR(__wt_epoch(session, &fsync_start));
+ __wt_epoch(session, &fsync_start);
WT_ERR(__wt_fsync(session, log_fh, true));
- WT_ERR(__wt_epoch(session, &fsync_stop));
+ __wt_epoch(session, &fsync_stop);
fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start);
log->sync_lsn = *min_lsn;
WT_STAT_CONN_INCR(session, log_sync);
@@ -1478,9 +1478,9 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
"/%" PRIu32,
log->log_dir_fh->name,
sync_lsn.l.file, sync_lsn.l.offset);
- WT_ERR(__wt_epoch(session, &fsync_start));
+ __wt_epoch(session, &fsync_start);
WT_ERR(__wt_fsync(session, log->log_dir_fh, true));
- WT_ERR(__wt_epoch(session, &fsync_stop));
+ __wt_epoch(session, &fsync_stop);
fsync_duration_usecs =
WT_TIMEDIFF_US(fsync_stop, fsync_start);
log->sync_dir_lsn = sync_lsn;
@@ -1500,9 +1500,9 @@ __wt_log_release(WT_SESSION_IMPL *session, WT_LOGSLOT *slot, bool *freep)
log->log_fh->name,
sync_lsn.l.file, sync_lsn.l.offset);
WT_STAT_CONN_INCR(session, log_sync);
- WT_ERR(__wt_epoch(session, &fsync_start));
+ __wt_epoch(session, &fsync_start);
WT_ERR(__wt_fsync(session, log->log_fh, true));
- WT_ERR(__wt_epoch(session, &fsync_stop));
+ __wt_epoch(session, &fsync_stop);
fsync_duration_usecs =
WT_TIMEDIFF_US(fsync_stop, fsync_start);
WT_STAT_CONN_INCRV(session,
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
index e98f59e7b05..b9a6dd18b7a 100644
--- a/src/lsm/lsm_cursor.c
+++ b/src/lsm/lsm_cursor.c
@@ -165,8 +165,7 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update)
WT_LSM_TREE *lsm_tree;
WT_SESSION_IMPL *session;
WT_TXN *txn;
- uint64_t *switch_txnp;
- uint64_t snap_min;
+ uint64_t pinned_id, *switchp;
lsm_tree = clsm->lsm_tree;
session = (WT_SESSION_IMPL *)clsm->iface.session;
@@ -226,8 +225,8 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update)
* that overlaps with our snapshot is a potential
* conflict.
*
- * Note that the global snap_min is correct here: it
- * tracks concurrent transactions excluding special
+ * Note that the pinned ID is correct here: it tracks
+ * concurrent transactions excluding special
* transactions such as checkpoint (which we can't
* conflict with because checkpoint only writes the
* metadata, which is not an LSM tree).
@@ -237,17 +236,17 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update)
F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) {
WT_ASSERT(session,
F_ISSET(txn, WT_TXN_HAS_SNAPSHOT));
- snap_min =
- WT_SESSION_TXN_STATE(session)->snap_min;
- for (switch_txnp =
+ pinned_id =
+ WT_SESSION_TXN_STATE(session)->pinned_id;
+ for (switchp =
&clsm->switch_txn[clsm->nchunks - 2];
clsm->nupdates < clsm->nchunks;
- clsm->nupdates++, switch_txnp--) {
- if (WT_TXNID_LT(*switch_txnp, snap_min))
+ clsm->nupdates++, switchp--) {
+ if (WT_TXNID_LT(*switchp, pinned_id))
break;
WT_ASSERT(session,
!__wt_txn_visible_all(
- session, *switch_txnp));
+ session, *switchp));
}
}
}
diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c
index 5a5140b9c3a..0a5f4fdd8b5 100644
--- a/src/lsm/lsm_manager.c
+++ b/src/lsm/lsm_manager.c
@@ -392,7 +392,7 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) {
if (!lsm_tree->active)
continue;
- WT_ERR(__wt_epoch(session, &now));
+ __wt_epoch(session, &now);
pushms = lsm_tree->work_push_ts.tv_sec == 0 ? 0 :
WT_TIMEDIFF_MS(now, lsm_tree->work_push_ts);
fillms = 3 * lsm_tree->chunk_fill_ms;
@@ -651,7 +651,7 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session,
return (0);
}
- WT_RET(__wt_epoch(session, &lsm_tree->work_push_ts));
+ __wt_epoch(session, &lsm_tree->work_push_ts);
WT_RET(__wt_calloc_one(session, &entry));
entry->type = type;
entry->flags = flags;
diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c
index 4bbfcfd4411..493855d489a 100644
--- a/src/lsm/lsm_merge.c
+++ b/src/lsm/lsm_merge.c
@@ -54,7 +54,7 @@ __lsm_merge_aggressive_clear(WT_LSM_TREE *lsm_tree)
* __lsm_merge_aggressive_update --
* Update the merge aggressiveness for an LSM tree.
*/
-static int
+static void
__lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
struct timespec now;
@@ -72,7 +72,7 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
if (!lsm_tree->modified ||
F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) {
lsm_tree->merge_aggressiveness = 10;
- return (0);
+ return;
}
/*
@@ -81,7 +81,7 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
*/
if (lsm_tree->chunks_flushed <= lsm_tree->merge_min) {
__lsm_merge_aggressive_clear(lsm_tree);
- return (0);
+ return;
}
/*
@@ -91,10 +91,10 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
*/
if (!F_ISSET(lsm_tree, WT_LSM_TREE_AGGRESSIVE_TIMER)) {
F_SET(lsm_tree, WT_LSM_TREE_AGGRESSIVE_TIMER);
- return (__wt_epoch(session, &lsm_tree->merge_aggressive_ts));
+ __wt_epoch(session, &lsm_tree->merge_aggressive_ts);
}
- WT_RET(__wt_epoch(session, &now));
+ __wt_epoch(session, &now);
msec_since_last_merge =
WT_TIMEDIFF_MS(now, lsm_tree->merge_aggressive_ts);
@@ -113,7 +113,7 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
* generates a variable load.
*/
if (msec_since_last_merge < msec_to_create_merge)
- return (0);
+ return;
/*
* Bump how aggressively we look for merges based on how long since
@@ -134,7 +134,6 @@ __lsm_merge_aggressive_update(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
msec_since_last_merge, lsm_tree->chunk_fill_ms);
lsm_tree->merge_aggressiveness = new_aggressive;
}
- return (0);
}
/*
@@ -326,7 +325,7 @@ retry_find:
goto retry_find;
}
/* Consider getting aggressive if no merge was found */
- WT_RET(__lsm_merge_aggressive_update(session, lsm_tree));
+ __lsm_merge_aggressive_update(session, lsm_tree);
return (WT_NOTFOUND);
}
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index db9fd581110..0054dcd1583 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -265,7 +265,7 @@ __wt_lsm_tree_setup_chunk(
WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
{
WT_ASSERT(session, F_ISSET(session, WT_SESSION_LOCKED_SCHEMA));
- WT_RET(__wt_epoch(session, &chunk->create_ts));
+ __wt_epoch(session, &chunk->create_ts);
WT_RET(__wt_lsm_tree_chunk_name(
session, lsm_tree, chunk->id, &chunk->uri));
@@ -496,7 +496,7 @@ __lsm_tree_open(WT_SESSION_IMPL *session,
lsm_tree->queue_ref = 0;
/* Set a flush timestamp as a baseline. */
- WT_ERR(__wt_epoch(session, &lsm_tree->last_flush_ts));
+ __wt_epoch(session, &lsm_tree->last_flush_ts);
/* Now the tree is setup, make it visible to others. */
TAILQ_INSERT_HEAD(&S2C(session)->lsmqh, lsm_tree, q);
@@ -1139,7 +1139,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
return (0);
}
- WT_ERR(__wt_seconds(session, &begin));
+ __wt_seconds(session, &begin);
/*
* Compacting has two distinct phases.
@@ -1267,7 +1267,7 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
break;
}
__wt_sleep(1, 0);
- WT_ERR(__wt_seconds(session, &end));
+ __wt_seconds(session, &end);
if (session->compact->max_time > 0 &&
session->compact->max_time < (uint64_t)(end - begin)) {
WT_ERR(ETIMEDOUT);
diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c
index 72bcf56b3c4..917104031fc 100644
--- a/src/lsm/lsm_work_unit.c
+++ b/src/lsm/lsm_work_unit.c
@@ -358,7 +358,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
WT_ERR(__wt_lsm_tree_set_chunk_size(session, chunk));
/* Update the flush timestamp to help track ongoing progress. */
- WT_ERR(__wt_epoch(session, &lsm_tree->last_flush_ts));
+ __wt_epoch(session, &lsm_tree->last_flush_ts);
++lsm_tree->chunks_flushed;
/* Lock the tree, mark the chunk as on disk and update the metadata. */
diff --git a/src/meta/meta_ckpt.c b/src/meta/meta_ckpt.c
index 2b7719c3241..b985104c2eb 100644
--- a/src/meta/meta_ckpt.c
+++ b/src/meta/meta_ckpt.c
@@ -424,7 +424,7 @@ __wt_meta_ckptlist_set(WT_SESSION_IMPL *session,
* guaranteed, a time_t has to be an arithmetic type,
* but not an integral type.
*/
- WT_ERR(__wt_seconds(session, &secs));
+ __wt_seconds(session, &secs);
ckpt->sec = (uintmax_t)secs;
}
if (strcmp(ckpt->name, WT_CHECKPOINT) == 0)
diff --git a/src/os_posix/os_mtx_cond.c b/src/os_posix/os_mtx_cond.c
index b25bb8c25d1..842bb6eeec9 100644
--- a/src/os_posix/os_mtx_cond.c
+++ b/src/os_posix/os_mtx_cond.c
@@ -63,7 +63,7 @@ __wt_cond_wait_signal(
locked = true;
if (usecs > 0) {
- WT_ERR(__wt_epoch(session, &ts));
+ __wt_epoch(session, &ts);
ts.tv_sec += (time_t)
(((uint64_t)ts.tv_nsec + WT_THOUSAND * usecs) / WT_BILLION);
ts.tv_nsec = (long)
diff --git a/src/os_posix/os_time.c b/src/os_posix/os_time.c
index b1b22a8e684..c7ae881af97 100644
--- a/src/os_posix/os_time.c
+++ b/src/os_posix/os_time.c
@@ -12,26 +12,26 @@
* __wt_epoch --
* Return the time since the Epoch.
*/
-int
+void
__wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
{
WT_DECL_RET;
#if defined(HAVE_CLOCK_GETTIME)
- WT_SYSCALL(clock_gettime(CLOCK_REALTIME, tsp), ret);
+ WT_SYSCALL_RETRY(clock_gettime(CLOCK_REALTIME, tsp), ret);
if (ret == 0)
- return (0);
- WT_RET_MSG(session, ret, "clock_gettime");
+ return;
+ WT_PANIC_MSG(session, ret, "clock_gettime");
#elif defined(HAVE_GETTIMEOFDAY)
struct timeval v;
- WT_SYSCALL(gettimeofday(&v, NULL), ret);
+ WT_SYSCALL_RETRY(gettimeofday(&v, NULL), ret);
if (ret == 0) {
tsp->tv_sec = v.tv_sec;
tsp->tv_nsec = v.tv_usec * WT_THOUSAND;
- return (0);
+ return;
}
- WT_RET_MSG(session, ret, "gettimeofday");
+ WT_PANIC_MSG(session, ret, "gettimeofday");
#else
NO TIME-OF-DAY IMPLEMENTATION: see src/os_posix/os_time.c
#endif
diff --git a/src/os_win/os_time.c b/src/os_win/os_time.c
index e784b5d8a36..6aa5b3719f6 100644
--- a/src/os_win/os_time.c
+++ b/src/os_win/os_time.c
@@ -12,11 +12,11 @@
* __wt_epoch --
* Return the time since the Epoch.
*/
-int
+void
__wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
{
- uint64_t ns100;
FILETIME time;
+ uint64_t ns100;
WT_UNUSED(session);
@@ -26,8 +26,6 @@ __wt_epoch(WT_SESSION_IMPL *session, struct timespec *tsp)
- 116444736000000000LL;
tsp->tv_sec = ns100 / 10000000;
tsp->tv_nsec = (long)((ns100 % 10000000) * 100);
-
- return (0);
}
/*
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index 9c38c535301..810f3fd976b 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -451,19 +451,18 @@ __wt_reconcile(WT_SESSION_IMPL *session,
}
/*
- * When application threads perform eviction, don't cache block manager
- * or reconciliation structures (even across calls), we can have a
- * significant number of application threads doing eviction at the same
- * time with large items. We ignore checkpoints, once the checkpoint
- * completes, all unnecessary session resources will be discarded.
+ * When threads perform eviction, don't cache block manager or
+ * reconciliation structures (even across calls), we can have a
+ * significant number of threads doing eviction at the same time with
+ * large items. We ignore checkpoints, once the checkpoint completes,
+ * all unnecessary session resources will be discarded.
*
- * Even in application threads doing checkpoints or in internal threads
- * doing any reconciliation, clean up reconciliation resources. Some
- * workloads have millions of boundary structures in a reconciliation
- * and we don't want to tie that memory down, even across calls.
+ * Even in application threads doing checkpoints, clean up
+ * reconciliation resources. Some workloads have millions of boundary
+ * structures in a reconciliation and we don't want to tie that memory
+ * down, even across calls.
*/
- if (WT_SESSION_IS_CHECKPOINT(session) ||
- F_ISSET(session, WT_SESSION_INTERNAL))
+ if (WT_SESSION_IS_CHECKPOINT(session))
__rec_bnd_cleanup(session, r, false);
else {
/*
@@ -564,10 +563,12 @@ __rec_write_status(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
* barrier after the change for clarity (the requirement is the
* flag be set before a subsequent checkpoint reads it, and
* as the current checkpoint is waiting on this reconciliation
- * to complete, there's no risk of that happening)
+ * to complete, there's no risk of that happening).
*/
- btree->modified = 1;
+ btree->modified = true;
WT_FULL_BARRIER();
+ if (!S2C(session)->modified)
+ S2C(session)->modified = true;
/*
* Eviction should only be here if following the save/restore
@@ -3335,7 +3336,7 @@ supd_check_complete:
__wt_verbose(session, WT_VERB_SPLIT,
"Reconciliation creating a page with %" PRIu32
" entries, memory footprint %" WT_SIZET_FMT
- ", page count %" PRIu32 ", %s, split state: %d\n",
+ ", page count %" PRIu32 ", %s, split state: %d",
r->entries, r->page->memory_footprint, r->bnd_next,
F_ISSET(r, WT_EVICTING) ? "evict" : "checkpoint",
r->bnd_state);
diff --git a/src/session/session_api.c b/src/session/session_api.c
index 0d3fcad3184..d3432c19ef3 100644
--- a/src/session/session_api.c
+++ b/src/session/session_api.c
@@ -68,9 +68,10 @@ __wt_session_copy_values(WT_SESSION_IMPL *session)
* unless the cursor is reading from a checkpoint.
*/
WT_TXN_STATE *txn_state = WT_SESSION_TXN_STATE(session);
- WT_ASSERT(session, txn_state->snap_min != WT_TXN_NONE ||
- (WT_PREFIX_MATCH(cursor->uri, "file:") &&
- F_ISSET((WT_CURSOR_BTREE *)cursor, WT_CBT_NO_TXN)));
+ WT_ASSERT(session,
+ txn_state->pinned_id != WT_TXN_NONE ||
+ (WT_PREFIX_MATCH(cursor->uri, "file:") &&
+ F_ISSET((WT_CURSOR_BTREE *)cursor, WT_CBT_NO_TXN)));
#endif
F_CLR(cursor, WT_CURSTD_VALUE_INT);
@@ -1417,10 +1418,10 @@ __session_transaction_pinned_range(WT_SESSION *wt_session, uint64_t *prange)
/* Assign pinned to the lesser of id or snap_min */
if (txn_state->id != WT_TXN_NONE &&
- WT_TXNID_LT(txn_state->id, txn_state->snap_min))
+ WT_TXNID_LT(txn_state->id, txn_state->pinned_id))
pinned = txn_state->id;
else
- pinned = txn_state->snap_min;
+ pinned = txn_state->pinned_id;
if (pinned == WT_TXN_NONE)
*prange = 0;
@@ -1494,14 +1495,14 @@ __session_transaction_sync(WT_SESSION *wt_session, const char *config)
if (timeout_ms == 0)
WT_ERR(ETIMEDOUT);
- WT_ERR(__wt_epoch(session, &start));
+ __wt_epoch(session, &start);
/*
* Keep checking the LSNs until we find it is stable or we reach
* our timeout.
*/
while (__wt_log_cmp(&session->bg_sync_lsn, &log->sync_lsn) > 0) {
__wt_cond_signal(session, conn->log_file_cond);
- WT_ERR(__wt_epoch(session, &now));
+ __wt_epoch(session, &now);
waited_ms = WT_TIMEDIFF_MS(now, start);
if (forever || waited_ms < timeout_ms)
/*
diff --git a/src/session/session_compact.c b/src/session/session_compact.c
index f03d5d34bac..66635007723 100644
--- a/src/session/session_compact.c
+++ b/src/session/session_compact.c
@@ -179,17 +179,16 @@ __compact_handle_append(WT_SESSION_IMPL *session, const char *cfg[])
* Check if the timeout has been exceeded.
*/
static int
-__session_compact_check_timeout(
- WT_SESSION_IMPL *session, struct timespec begin)
+__session_compact_check_timeout(WT_SESSION_IMPL *session, struct timespec begin)
{
struct timespec end;
if (session->compact->max_time == 0)
return (0);
- WT_RET(__wt_epoch(session, &end));
+ __wt_epoch(session, &end);
if (session->compact->max_time < WT_TIMEDIFF_SEC(end, begin))
- WT_RET(ETIMEDOUT);
+ return (ETIMEDOUT);
return (0);
}
@@ -219,7 +218,7 @@ __compact_file(WT_SESSION_IMPL *session, const char *cfg[])
session, t, "target=(\"%s\"),force=1", dhandle->name));
checkpoint_cfg[1] = t->data;
- WT_ERR(__wt_epoch(session, &start_time));
+ __wt_epoch(session, &start_time);
/*
* We compact 10% of the file on each pass (but the overall size of the
diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c
index e76407567bc..725854c6001 100644
--- a/src/session/session_dhandle.c
+++ b/src/session/session_dhandle.c
@@ -8,8 +8,6 @@
#include "wt_internal.h"
-static int __session_dhandle_sweep(WT_SESSION_IMPL *);
-
/*
* __session_add_dhandle --
* Add a handle to the session's cache.
@@ -371,7 +369,7 @@ __wt_session_close_cache(WT_SESSION_IMPL *session)
* __session_dhandle_sweep --
* Discard any session dhandles that are not open.
*/
-static int
+static void
__session_dhandle_sweep(WT_SESSION_IMPL *session)
{
WT_CONNECTION_IMPL *conn;
@@ -385,9 +383,9 @@ __session_dhandle_sweep(WT_SESSION_IMPL *session)
* Periodically sweep for dead handles; if we've swept recently, don't
* do it again.
*/
- WT_RET(__wt_seconds(session, &now));
+ __wt_seconds(session, &now);
if (difftime(now, session->last_sweep) < conn->sweep_interval)
- return (0);
+ return;
session->last_sweep = now;
WT_STAT_CONN_INCR(session, dh_session_sweeps);
@@ -408,7 +406,6 @@ __session_dhandle_sweep(WT_SESSION_IMPL *session)
}
dhandle_cache = dhandle_cache_next;
}
- return (0);
}
/*
@@ -446,7 +443,7 @@ __session_get_dhandle(
}
/* Sweep the handle list to remove any dead handles. */
- WT_RET(__session_dhandle_sweep(session));
+ __session_dhandle_sweep(session);
/*
* We didn't find a match in the session cache, search the shared
diff --git a/src/support/err.c b/src/support/err.c
index 8bfac250b3a..3ecbab1cbe9 100644
--- a/src/support/err.c
+++ b/src/support/err.c
@@ -162,7 +162,6 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error,
WT_SESSION *wt_session;
struct timespec ts;
size_t len, remain, wlen;
- int prefix_cnt;
const char *err, *prefix;
char *end, *p, tid[128];
@@ -211,44 +210,32 @@ __wt_eventv(WT_SESSION_IMPL *session, bool msg_event, int error,
* name, and the session's name. Write them as a comma-separate list,
* followed by a colon.
*/
- prefix_cnt = 0;
- if (__wt_epoch(session, &ts) == 0) {
- __wt_thread_id(tid, sizeof(tid));
- remain = WT_PTRDIFF(end, p);
- wlen = (size_t)snprintf(p, remain,
- "[%" PRIuMAX ":%" PRIuMAX "][%s]",
- (uintmax_t)ts.tv_sec,
- (uintmax_t)ts.tv_nsec / WT_THOUSAND, tid);
- p = wlen >= remain ? end : p + wlen;
- prefix_cnt = 1;
- }
+ __wt_epoch(session, &ts);
+ __wt_thread_id(tid, sizeof(tid));
+ remain = WT_PTRDIFF(end, p);
+ wlen = (size_t)snprintf(p, remain, "[%" PRIuMAX ":%" PRIuMAX "][%s]",
+ (uintmax_t)ts.tv_sec, (uintmax_t)ts.tv_nsec / WT_THOUSAND, tid);
+ p = wlen >= remain ? end : p + wlen;
+
if ((prefix = S2C(session)->error_prefix) != NULL) {
remain = WT_PTRDIFF(end, p);
- wlen = (size_t)snprintf(p, remain,
- "%s%s", prefix_cnt == 0 ? "" : ", ", prefix);
+ wlen = (size_t)snprintf(p, remain, ", %s", prefix);
p = wlen >= remain ? end : p + wlen;
- prefix_cnt = 1;
}
prefix = session->dhandle == NULL ? NULL : session->dhandle->name;
if (prefix != NULL) {
remain = WT_PTRDIFF(end, p);
- wlen = (size_t)snprintf(p, remain,
- "%s%s", prefix_cnt == 0 ? "" : ", ", prefix);
+ wlen = (size_t)snprintf(p, remain, ", %s", prefix);
p = wlen >= remain ? end : p + wlen;
- prefix_cnt = 1;
}
if ((prefix = session->name) != NULL) {
remain = WT_PTRDIFF(end, p);
- wlen = (size_t)snprintf(p, remain,
- "%s%s", prefix_cnt == 0 ? "" : ", ", prefix);
- p = wlen >= remain ? end : p + wlen;
- prefix_cnt = 1;
- }
- if (prefix_cnt != 0) {
- remain = WT_PTRDIFF(end, p);
- wlen = (size_t)snprintf(p, remain, ": ");
+ wlen = (size_t)snprintf(p, remain, ", %s", prefix);
p = wlen >= remain ? end : p + wlen;
}
+ remain = WT_PTRDIFF(end, p);
+ wlen = (size_t)snprintf(p, remain, ": ");
+ p = wlen >= remain ? end : p + wlen;
if (file_name != NULL) {
remain = WT_PTRDIFF(end, p);
diff --git a/src/support/rand.c b/src/support/rand.c
index d2e4cd27aab..025b18e4ed3 100644
--- a/src/support/rand.c
+++ b/src/support/rand.c
@@ -66,20 +66,18 @@ __wt_random_init(WT_RAND_STATE volatile * rnd_state)
* threads and we want each thread to initialize its own random state based
* on a different random seed.
*/
-int
+void
__wt_random_init_seed(
WT_SESSION_IMPL *session, WT_RAND_STATE volatile * rnd_state)
{
struct timespec ts;
WT_RAND_STATE rnd;
- WT_RET(__wt_epoch(session, &ts));
+ __wt_epoch(session, &ts);
M_W(rnd) = (uint32_t)(ts.tv_nsec + 521288629);
M_Z(rnd) = (uint32_t)(ts.tv_nsec + 362436069);
*rnd_state = rnd;
-
- return (0);
}
/*
diff --git a/src/support/stat.c b/src/support/stat.c
index 7150223e6cb..9d440f9ebf3 100644
--- a/src/support/stat.c
+++ b/src/support/stat.c
@@ -706,6 +706,8 @@ static const char * const __stats_connection_desc[] = {
"thread-state: active filesystem fsync calls",
"thread-state: active filesystem read calls",
"thread-state: active filesystem write calls",
+ "thread-yield: application thread time evicting (usecs)",
+ "thread-yield: application thread time waiting for cache (usecs)",
"thread-yield: page acquire busy blocked",
"thread-yield: page acquire eviction blocked",
"thread-yield: page acquire locked blocked",
@@ -723,6 +725,7 @@ static const char * const __stats_connection_desc[] = {
"transaction: transaction checkpoint scrub time (msecs)",
"transaction: transaction checkpoint total time (msecs)",
"transaction: transaction checkpoints",
+ "transaction: transaction checkpoints skipped because database was clean",
"transaction: transaction failures due to cache overflow",
"transaction: transaction fsync calls for checkpoint after allocating the transaction ID",
"transaction: transaction fsync duration for checkpoint after allocating the transaction ID (usecs)",
@@ -950,6 +953,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing thread_fsync_active */
/* not clearing thread_read_active */
/* not clearing thread_write_active */
+ stats->application_evict_time = 0;
+ stats->application_cache_time = 0;
stats->page_busy_blocked = 0;
stats->page_forcible_evict_blocked = 0;
stats->page_locked_blocked = 0;
@@ -967,6 +972,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing txn_checkpoint_scrub_time */
/* not clearing txn_checkpoint_time_total */
stats->txn_checkpoint = 0;
+ stats->txn_checkpoint_skipped = 0;
stats->txn_fail_cache = 0;
stats->txn_checkpoint_fsync_post = 0;
/* not clearing txn_checkpoint_fsync_post_duration */
@@ -1242,6 +1248,10 @@ __wt_stat_connection_aggregate(
to->thread_fsync_active += WT_STAT_READ(from, thread_fsync_active);
to->thread_read_active += WT_STAT_READ(from, thread_read_active);
to->thread_write_active += WT_STAT_READ(from, thread_write_active);
+ to->application_evict_time +=
+ WT_STAT_READ(from, application_evict_time);
+ to->application_cache_time +=
+ WT_STAT_READ(from, application_cache_time);
to->page_busy_blocked += WT_STAT_READ(from, page_busy_blocked);
to->page_forcible_evict_blocked +=
WT_STAT_READ(from, page_forcible_evict_blocked);
@@ -1270,6 +1280,8 @@ __wt_stat_connection_aggregate(
to->txn_checkpoint_time_total +=
WT_STAT_READ(from, txn_checkpoint_time_total);
to->txn_checkpoint += WT_STAT_READ(from, txn_checkpoint);
+ to->txn_checkpoint_skipped +=
+ WT_STAT_READ(from, txn_checkpoint_skipped);
to->txn_fail_cache += WT_STAT_READ(from, txn_fail_cache);
to->txn_checkpoint_fsync_post +=
WT_STAT_READ(from, txn_checkpoint_fsync_post);
diff --git a/src/support/thread_group.c b/src/support/thread_group.c
index f5ddabad7d4..a866d2d01c5 100644
--- a/src/support/thread_group.c
+++ b/src/support/thread_group.c
@@ -60,7 +60,7 @@ __thread_group_grow(
while (group->current_threads < new_count) {
thread = group->threads[group->current_threads++];
__wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Starting utility thread: %p:%"PRIu32"\n",
+ "Starting utility thread: %p:%" PRIu32,
(void *)group, thread->id);
F_SET(thread, WT_THREAD_RUN);
WT_ASSERT(session, thread->session != NULL);
@@ -100,7 +100,7 @@ __thread_group_shrink(WT_SESSION_IMPL *session,
/* Wake threads to ensure they notice the state change */
if (thread->tid != 0) {
__wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Stopping utility thread: %p:%"PRIu32"\n",
+ "Stopping utility thread: %p:%" PRIu32,
(void *)group, thread->id);
F_CLR(thread, WT_THREAD_RUN);
__wt_cond_signal(session, group->wait_cond);
@@ -224,7 +224,7 @@ __wt_thread_group_resize(
__wt_verbose(session, WT_VERB_THREAD_GROUP,
"Resize thread group: %p, from min: %" PRIu32 " -> %" PRIu32
- " from max: %" PRIu32 " -> %" PRIu32 "\n",
+ " from max: %" PRIu32 " -> %" PRIu32,
(void *)group, group->min, new_min, group->max, new_max);
__wt_writelock(session, group->lock);
@@ -253,7 +253,7 @@ __wt_thread_group_create(
cond_alloced = false;
__wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Creating thread group: %p\n", (void *)group);
+ "Creating thread group: %p", (void *)group);
WT_RET(__wt_rwlock_alloc(session, &group->lock, "Thread group"));
WT_ERR(__wt_cond_alloc(
@@ -286,7 +286,7 @@ __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group)
WT_DECL_RET;
__wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Destroying thread group: %p\n", (void *)group);
+ "Destroying thread group: %p", (void *)group);
WT_ASSERT(session, __wt_rwlock_islocked(session, group->lock));
diff --git a/src/txn/txn.c b/src/txn/txn.c
index 01e0fbbb634..3b24bcd505d 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -96,11 +96,11 @@ __wt_txn_release_snapshot(WT_SESSION_IMPL *session)
txn_state = WT_SESSION_TXN_STATE(session);
WT_ASSERT(session,
- txn_state->snap_min == WT_TXN_NONE ||
+ txn_state->pinned_id == WT_TXN_NONE ||
session->txn.isolation == WT_ISO_READ_UNCOMMITTED ||
- !__wt_txn_visible_all(session, txn_state->snap_min));
+ !__wt_txn_visible_all(session, txn_state->pinned_id));
- txn_state->snap_min = WT_TXN_NONE;
+ txn_state->pinned_id = WT_TXN_NONE;
F_CLR(txn, WT_TXN_HAS_SNAPSHOT);
}
@@ -117,7 +117,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
WT_TXN_GLOBAL *txn_global;
WT_TXN_STATE *s, *txn_state;
uint64_t current_id, id;
- uint64_t prev_oldest_id, snap_min;
+ uint64_t prev_oldest_id, pinned_id;
uint32_t i, n, session_cnt;
conn = S2C(session);
@@ -135,21 +135,21 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
WT_PAUSE();
WT_RET(ret);
- current_id = snap_min = txn_global->current;
+ current_id = pinned_id = txn_global->current;
prev_oldest_id = txn_global->oldest_id;
/*
* Include the checkpoint transaction, if one is running: we should
* ignore any uncommitted changes the checkpoint has written to the
* metadata. We don't have to keep the checkpoint's changes pinned so
- * don't including it in the published snap_min.
+ * don't including it in the published pinned ID.
*/
if ((id = txn_global->checkpoint_txnid) != WT_TXN_NONE)
txn->snapshot[n++] = id;
/* For pure read-only workloads, avoid scanning. */
if (prev_oldest_id == current_id) {
- txn_state->snap_min = current_id;
+ txn_state->pinned_id = current_id;
/* Check that the oldest ID has not moved in the meantime. */
WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
goto done;
@@ -172,18 +172,18 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
(id = s->id) != WT_TXN_NONE &&
WT_TXNID_LE(prev_oldest_id, id)) {
txn->snapshot[n++] = id;
- if (WT_TXNID_LT(id, snap_min))
- snap_min = id;
+ if (WT_TXNID_LT(id, pinned_id))
+ pinned_id = id;
}
}
/*
- * If we got a new snapshot, update the published snap_min for this
+ * If we got a new snapshot, update the published pinned ID for this
* session.
*/
- WT_ASSERT(session, WT_TXNID_LE(prev_oldest_id, snap_min));
+ WT_ASSERT(session, WT_TXNID_LE(prev_oldest_id, pinned_id));
WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
- txn_state->snap_min = snap_min;
+ txn_state->pinned_id = pinned_id;
done: __wt_readunlock(session, txn_global->scan_rwlock);
__txn_sort_snapshot(session, n, current_id);
@@ -232,13 +232,13 @@ __txn_oldest_scan(WT_SESSION_IMPL *session,
/*
* !!!
- * Note: Don't ignore snap_min values older than the previous
- * oldest ID. Read-uncommitted operations publish snap_min
+ * Note: Don't ignore pinned ID values older than the previous
+ * oldest ID. Read-uncommitted operations publish pinned ID
* values without acquiring the scan lock to protect the global
- * table. See the comment in __wt_txn_cursor_op for
- * more details.
+ * table. See the comment in __wt_txn_cursor_op for more
+ * details.
*/
- if ((id = s->snap_min) != WT_TXN_NONE &&
+ if ((id = s->pinned_id) != WT_TXN_NONE &&
WT_TXNID_LT(id, oldest_id)) {
oldest_id = id;
oldest_session = &conn->sessions[i];
@@ -360,7 +360,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, uint32_t flags)
__wt_verbose(session, WT_VERB_TRANSACTION,
"old snapshot %" PRIu64
" pinned in session %" PRIu32 " [%s]"
- " with snap_min %" PRIu64 "\n",
+ " with snap_min %" PRIu64,
oldest_id, oldest_session->id,
oldest_session->lastop,
oldest_session->txn.snap_min);
@@ -673,7 +673,7 @@ __wt_txn_init(WT_SESSION_IMPL *session)
if (S2C(session)->txn_global.states != NULL) {
WT_TXN_STATE *txn_state;
txn_state = WT_SESSION_TXN_STATE(session);
- WT_ASSERT(session, txn_state->snap_min == WT_TXN_NONE);
+ WT_ASSERT(session, txn_state->pinned_id == WT_TXN_NONE);
}
#endif
@@ -773,7 +773,7 @@ __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[])
WT_CACHE_LINE_ALIGNMENT_VERIFY(session, txn_global->states);
for (i = 0, s = txn_global->states; i < conn->session_size; i++, s++)
- s->id = s->snap_min = WT_TXN_NONE;
+ s->id = s->pinned_id = WT_TXN_NONE;
return (0);
}
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index 3aad95f5a9f..0557e6ce60c 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -314,7 +314,7 @@ __checkpoint_update_generation(WT_SESSION_IMPL *session)
* __checkpoint_reduce_dirty_cache --
* Release clean trees from the list cached for checkpoints.
*/
-static int
+static void
__checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
{
WT_CACHE *cache;
@@ -332,9 +332,9 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
/* Give up if scrubbing is disabled. */
if (cache->eviction_checkpoint_target == 0 ||
cache->eviction_checkpoint_target >= cache->eviction_dirty_trigger)
- return (0);
+ return;
- WT_RET(__wt_epoch(session, &start));
+ __wt_epoch(session, &start);
last = start;
bytes_written_last = 0;
bytes_written_start = cache->bytes_written;
@@ -345,7 +345,7 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
* cache via reconfigure. This avoids potential divide by zero.
*/
if (cache_size < 10 * WT_MEGABYTE)
- return (0);
+ return;
stepdown_us = 10000;
work_us = 0;
progress = false;
@@ -371,7 +371,7 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
break;
__wt_sleep(0, stepdown_us / 10);
- WT_RET(__wt_epoch(session, &stop));
+ __wt_epoch(session, &stop);
current_us = WT_TIMEDIFF_US(stop, last);
total_ms = WT_TIMEDIFF_MS(stop, start);
bytes_written_total =
@@ -427,14 +427,12 @@ __checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
WT_MAX(cache->eviction_dirty_target, current_dirty - delta);
WT_STAT_CONN_SET(session, txn_checkpoint_scrub_target,
cache->eviction_scrub_limit);
- WT_RET(__wt_epoch(session, &last));
+ __wt_epoch(session, &last);
}
- WT_RET(__wt_epoch(session, &stop));
+ __wt_epoch(session, &stop);
total_ms = WT_TIMEDIFF_MS(stop, start);
WT_STAT_CONN_SET(session, txn_checkpoint_scrub_time, total_ms);
-
- return (0);
}
/*
@@ -497,7 +495,7 @@ __checkpoint_stats(
* __checkpoint_verbose_track --
* Output a verbose message with timing information
*/
-static int
+static void
__checkpoint_verbose_track(WT_SESSION_IMPL *session,
const char *msg, struct timespec *start)
{
@@ -506,9 +504,9 @@ __checkpoint_verbose_track(WT_SESSION_IMPL *session,
uint64_t msec;
if (!WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT))
- return (0);
+ return;
- WT_RET(__wt_epoch(session, &stop));
+ __wt_epoch(session, &stop);
/*
* Get time diff in microseconds.
@@ -526,7 +524,6 @@ __checkpoint_verbose_track(WT_SESSION_IMPL *session,
WT_UNUSED(msg);
WT_UNUSED(start);
#endif
- return (0);
}
/*
@@ -576,7 +573,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
conn->cache->evict_max_page_size = 0;
/* Initialize the verbose tracking timer */
- WT_ERR(__wt_epoch(session, &verb_timer));
+ __wt_epoch(session, &verb_timer);
/*
* Update the global oldest ID so we do all possible cleanup.
@@ -594,18 +591,18 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* Try to reduce the amount of dirty data in cache so there is less
* work do during the critical section of the checkpoint.
*/
- WT_ERR(__checkpoint_reduce_dirty_cache(session));
+ __checkpoint_reduce_dirty_cache(session);
/* Tell logging that we are about to start a database checkpoint. */
if (full && logging)
WT_ERR(__wt_txn_checkpoint_log(
session, full, WT_TXN_LOG_CKPT_PREPARE, NULL));
- WT_ERR(__checkpoint_verbose_track(session,
- "starting transaction", &verb_timer));
+ __checkpoint_verbose_track(session,
+ "starting transaction", &verb_timer);
if (full)
- WT_ERR(__wt_epoch(session, &start));
+ __wt_epoch(session, &start);
/*
* Start the checkpoint for real.
@@ -666,6 +663,14 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_txn_id_check(session));
/*
+ * Mark the connection as clean. If some data gets modified after
+ * generating checkpoint transaction id, connection will be reset to
+ * dirty when reconciliation marks the btree dirty on encountering the
+ * dirty page.
+ */
+ conn->modified = false;
+
+ /*
* Save the checkpoint session ID.
*
* We never do checkpoints in the default session (with id zero).
@@ -689,7 +694,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
*/
WT_ASSERT(session,
WT_TXNID_LE(txn_global->oldest_id, txn_state->id) &&
- WT_TXNID_LE(txn_global->oldest_id, txn_state->snap_min));
+ WT_TXNID_LE(txn_global->oldest_id, txn_state->pinned_id));
/*
* Clear our entry from the global transaction session table. Any
@@ -698,7 +703,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* can safely ignore the checkpoint ID (see the visible all check for
* details).
*/
- txn_state->id = txn_state->snap_min = WT_TXN_NONE;
+ txn_state->id = txn_state->pinned_id = WT_TXN_NONE;
__wt_writeunlock(session, txn_global->scan_rwlock);
/*
@@ -739,23 +744,22 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_presync));
__wt_evict_server_wake(session);
- WT_ERR(__checkpoint_verbose_track(session,
- "committing transaction", &verb_timer));
+ __checkpoint_verbose_track(session,
+ "committing transaction", &verb_timer);
/*
* Checkpoints have to hit disk (it would be reasonable to configure for
* lazy checkpoints, but we don't support them yet).
*/
- WT_ERR(__wt_epoch(session, &fsync_start));
+ __wt_epoch(session, &fsync_start);
WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync));
- WT_ERR(__wt_epoch(session, &fsync_stop));
+ __wt_epoch(session, &fsync_stop);
fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start);
WT_STAT_CONN_INCR(session, txn_checkpoint_fsync_post);
WT_STAT_CONN_SET(session,
txn_checkpoint_fsync_post_duration, fsync_duration_usecs);
- WT_ERR(__checkpoint_verbose_track(session,
- "sync completed", &verb_timer));
+ __checkpoint_verbose_track(session, "sync completed", &verb_timer);
/*
* Commit the transaction now that we are sure that all files in the
@@ -793,8 +797,8 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
ret = __wt_checkpoint_sync(session, NULL));
WT_ERR(ret);
- WT_ERR(__checkpoint_verbose_track(session,
- "metadata sync completed", &verb_timer));
+ __checkpoint_verbose_track(session,
+ "metadata sync completed", &verb_timer);
} else
WT_WITH_DHANDLE(session,
WT_SESSION_META_DHANDLE(session),
@@ -808,7 +812,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
txn_global->checkpoint_pinned = WT_TXN_NONE;
if (full) {
- WT_ERR(__wt_epoch(session, &stop));
+ __wt_epoch(session, &stop);
__checkpoint_stats(session, &start, &stop);
}
@@ -825,6 +829,9 @@ err: /*
* overwritten the checkpoint, so what ends up on disk is not
* consistent.
*/
+ if (ret != 0 && !conn->modified)
+ conn->modified = true;
+
session->isolation = txn->isolation = WT_ISO_READ_UNCOMMITTED;
if (tracking)
WT_TRET(__wt_meta_track_off(session, false, ret != 0));
@@ -1352,9 +1359,13 @@ __checkpoint_tree(
* out of sync with the set of dirty pages (modify is set, but there
* are no dirty pages), we perform a checkpoint without any writes, no
* checkpoint is created, and then things get bad.
+ * While marking the root page as dirty, we do not want to dirty the
+ * btree because we are marking the btree as clean just after this call.
+ * Also, marking the btree dirty at this stage will unnecessarily mark
+ * the connection as dirty causing checkpoint-skip code to fail.
*/
WT_ERR(__wt_page_modify_init(session, btree->root.page));
- __wt_page_modify_set(session, btree->root.page);
+ __wt_page_only_modify_set(session, btree->root.page);
/*
* Clear the tree's modified flag; any changes before we clear the flag
@@ -1366,7 +1377,7 @@ __checkpoint_tree(
* it sets the modified flag itself. Use a full barrier so we get the
* store done quickly, this isn't a performance path.
*/
- btree->modified = 0;
+ btree->modified = false;
WT_FULL_BARRIER();
/* Tell logging that a file checkpoint is starting. */
@@ -1440,8 +1451,11 @@ err: /*
* If the checkpoint didn't complete successfully, make sure the
* tree is marked dirty.
*/
- if (ret != 0 && !btree->modified && was_modified)
- btree->modified = 1;
+ if (ret != 0 && !btree->modified && was_modified) {
+ btree->modified = true;
+ if (!S2C(session)->modified)
+ S2C(session)->modified = true;
+ }
__wt_meta_ckptlist_free(session, ckptbase);
btree->ckpt = NULL;
diff --git a/src/txn/txn_nsnap.c b/src/txn/txn_nsnap.c
index 8f7e93238de..7ba0cc8700e 100644
--- a/src/txn/txn_nsnap.c
+++ b/src/txn/txn_nsnap.c
@@ -42,9 +42,16 @@ __nsnap_drop_one(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *name)
return (WT_NOTFOUND);
/* Bump the global ID if we are removing the first entry */
- if (found == TAILQ_FIRST(&txn_global->nsnaph))
+ if (found == TAILQ_FIRST(&txn_global->nsnaph)) {
+ WT_ASSERT(session, !__wt_txn_visible_all(
+ session, txn_global->nsnap_oldest_id));
txn_global->nsnap_oldest_id = (TAILQ_NEXT(found, q) != NULL) ?
- TAILQ_NEXT(found, q)->snap_min : WT_TXN_NONE;
+ TAILQ_NEXT(found, q)->pinned_id : WT_TXN_NONE;
+ WT_DIAGNOSTIC_YIELD;
+ WT_ASSERT(session, txn_global->nsnap_oldest_id == WT_TXN_NONE ||
+ !__wt_txn_visible_all(
+ session, txn_global->nsnap_oldest_id));
+ }
TAILQ_REMOVE(&txn_global->nsnaph, found, q);
__nsnap_destroy(session, found);
WT_STAT_CONN_INCR(session, txn_snapshots_dropped);
@@ -104,7 +111,7 @@ __nsnap_drop_to(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *name, bool inclusive)
}
if (TAILQ_NEXT(last, q) != NULL)
- new_nsnap_oldest = TAILQ_NEXT(last, q)->snap_min;
+ new_nsnap_oldest = TAILQ_NEXT(last, q)->pinned_id;
}
do {
@@ -117,7 +124,15 @@ __nsnap_drop_to(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *name, bool inclusive)
} while (nsnap != last && !TAILQ_EMPTY(&txn_global->nsnaph));
/* Now that the queue of named snapshots is updated, update the ID */
+ WT_ASSERT(session, !__wt_txn_visible_all(
+ session, txn_global->nsnap_oldest_id) &&
+ (new_nsnap_oldest == WT_TXN_NONE ||
+ WT_TXNID_LE(txn_global->nsnap_oldest_id, new_nsnap_oldest)));
txn_global->nsnap_oldest_id = new_nsnap_oldest;
+ WT_DIAGNOSTIC_YIELD;
+ WT_ASSERT(session,
+ new_nsnap_oldest == WT_TXN_NONE ||
+ !__wt_txn_visible_all(session, new_nsnap_oldest));
return (ret);
}
@@ -157,6 +172,7 @@ __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_calloc_one(session, &nsnap_new));
nsnap = nsnap_new;
WT_ERR(__wt_strndup(session, cval.str, cval.len, &nsnap->name));
+ nsnap->pinned_id = WT_SESSION_TXN_STATE(session)->pinned_id;
nsnap->snap_min = txn->snap_min;
nsnap->snap_max = txn->snap_max;
if (txn->snapshot_count > 0) {
@@ -175,15 +191,25 @@ __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[])
*/
WT_ERR_NOTFOUND_OK(__nsnap_drop_one(session, &cval));
- if (TAILQ_EMPTY(&txn_global->nsnaph))
- txn_global->nsnap_oldest_id = nsnap_new->snap_min;
+ if (TAILQ_EMPTY(&txn_global->nsnaph)) {
+ WT_ASSERT(session, txn_global->nsnap_oldest_id == WT_TXN_NONE &&
+ !__wt_txn_visible_all(session, nsnap_new->pinned_id));
+ __wt_readlock(session, txn_global->scan_rwlock);
+ txn_global->nsnap_oldest_id = nsnap_new->pinned_id;
+ __wt_readunlock(session, txn_global->scan_rwlock);
+ }
TAILQ_INSERT_TAIL(&txn_global->nsnaph, nsnap_new, q);
WT_STAT_CONN_INCR(session, txn_snapshots_created);
nsnap_new = NULL;
-err: if (started_txn)
+err: if (started_txn) {
+#ifdef HAVE_DIAGNOSTIC
+ uint64_t pinned_id = WT_SESSION_TXN_STATE(session)->pinned_id;
+#endif
WT_TRET(__wt_txn_rollback(session, NULL));
- else if (ret == 0)
+ WT_DIAGNOSTIC_YIELD;
+ WT_ASSERT(session, !__wt_txn_visible_all(session, pinned_id));
+ } else if (ret == 0)
F_SET(txn, WT_TXN_NAMED_SNAPSHOT);
if (nsnap_new != NULL)
@@ -258,7 +284,20 @@ __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval)
__wt_readlock(session, txn_global->nsnap_rwlock);
TAILQ_FOREACH(nsnap, &txn_global->nsnaph, q)
if (WT_STRING_MATCH(nsnap->name, nameval->str, nameval->len)) {
- txn->snap_min = txn_state->snap_min = nsnap->snap_min;
+ /*
+ * Acquire the scan lock so the oldest ID can't move
+ * forward without seeing our pinned ID.
+ */
+ __wt_readlock(session, txn_global->scan_rwlock);
+ txn_state->pinned_id = nsnap->pinned_id;
+ __wt_readunlock(session, txn_global->scan_rwlock);
+
+ WT_ASSERT(session, !__wt_txn_visible_all(
+ session, txn_state->pinned_id) &&
+ txn_global->nsnap_oldest_id != WT_TXN_NONE &&
+ WT_TXNID_LE(txn_global->nsnap_oldest_id,
+ txn_state->pinned_id));
+ txn->snap_min = nsnap->snap_min;
txn->snap_max = nsnap->snap_max;
if ((txn->snapshot_count = nsnap->snapshot_count) != 0)
memcpy(txn->snapshot, nsnap->snapshot,