summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/third_party/wiredtiger/dist/api_config.py27
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py22
-rw-r--r--src/third_party/wiredtiger/dist/s_define.list1
-rw-r--r--src/third_party/wiredtiger/dist/s_funcs.list1
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok3
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py3
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c121
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_delete.c22
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_random.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c13
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c13
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_walk.c159
-rw-r--r--src/third_party/wiredtiger/src/btree/col_srch.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/row_key.c2
-rw-r--r--src/third_party/wiredtiger/src/btree/row_srch.c4
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c8
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_std.c16
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c2
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h92
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i246
-rw-r--r--src/third_party/wiredtiger/src/include/btree_cmp.i62
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h15
-rw-r--r--src/third_party/wiredtiger/src/include/misc.h9
-rw-r--r--src/third_party/wiredtiger/src/include/misc.i21
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h3
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i31
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in547
-rw-r--r--src/third_party/wiredtiger/src/os_posix/os_fs.c8
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c25
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_alter.c197
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_worker.c52
-rw-r--r--src/third_party/wiredtiger/src/session/session_api.c52
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c10
-rw-r--r--src/third_party/wiredtiger/src/support/thread_group.c31
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c78
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c122
-rw-r--r--src/third_party/wiredtiger/src/utilities/util_load.c2
-rw-r--r--src/third_party/wiredtiger/test/format/config.c15
-rw-r--r--src/third_party/wiredtiger/test/format/format.h4
-rw-r--r--src/third_party/wiredtiger/test/format/ops.c130
-rw-r--r--src/third_party/wiredtiger/test/format/t.c2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_alter03.py82
-rw-r--r--src/third_party/wiredtiger/test/suite/test_cursor13.py25
-rw-r--r--src/third_party/wiredtiger/test/suite/test_las01.py (renamed from src/third_party/wiredtiger/test/suite/test_las.py)6
-rw-r--r--src/third_party/wiredtiger/test/suite/test_las02.py114
-rw-r--r--src/third_party/wiredtiger/test/suite/test_prepare01.py5
-rw-r--r--src/third_party/wiredtiger/test/suite/test_prepare02.py2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_prepare03.py2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_prepare04.py2
-rw-r--r--src/third_party/wiredtiger/test/suite/test_prepare05.py119
52 files changed, 1763 insertions, 777 deletions
diff --git a/src/third_party/wiredtiger/dist/api_config.py b/src/third_party/wiredtiger/dist/api_config.py
index d83a632321e..0471bde51fd 100644
--- a/src/third_party/wiredtiger/dist/api_config.py
+++ b/src/third_party/wiredtiger/dist/api_config.py
@@ -128,18 +128,7 @@ for line in open(f, 'r'):
break_on_hyphens=False,
replace_whitespace=False,
fix_sentence_endings=True)
- lastname = None
- for c in sorted(api_data.methods[config_name].config):
- name = c.name
- if '.' in name:
- print >>sys.stderr, "Bad config key " + name
-
- # Deal with duplicates: with complex configurations (like
- # WT_SESSION::create), it's simpler to deal with duplicates here than
- # manually in api_data.py.
- if name == lastname:
- continue
- lastname = name
+ for c in api_data.methods[config_name].config:
if 'undoc' in c.flags:
continue
output = parseconfig(c, config_name)
@@ -244,8 +233,8 @@ def getsubconfigstr(c):
# Write structures of arrays of allowable configuration options, including a
# NULL as a terminator for iteration.
for name in sorted(api_data.methods.keys()):
- ctype = api_data.methods[name].config
- if ctype:
+ config = api_data.methods[name].config
+ if config:
tfile.write('''
static const WT_CONFIG_CHECK confchk_%(name)s[] = {
\t%(check)s
@@ -253,7 +242,7 @@ static const WT_CONFIG_CHECK confchk_%(name)s[] = {
};
''' % {
'name' : name.replace('.', '_'),
- 'check' : '\n\t'.join(getconfcheck(c) for c in sorted(ctype)),
+ 'check' : '\n\t'.join(getconfcheck(c) for c in config),
})
# Write the initialized list of configuration entry structures.
@@ -263,7 +252,7 @@ tfile.write('static const WT_CONFIG_ENTRY config_entries[] = {')
slot=-1
config_defines = ''
for name in sorted(api_data.methods.keys()):
- ctype = api_data.methods[name].config
+ config = api_data.methods[name].config
slot += 1
# Build a list of #defines that reference specific slots in the list (the
@@ -279,15 +268,15 @@ for name in sorted(api_data.methods.keys()):
%(config)s,''' % {
'config' : '\n'.join('\t "%s"' % line
for line in w.wrap(','.join('%s=%s' % (c.name, get_default(c))
- for c in sorted(ctype))) or [""]),
+ for c in config)) or [""]),
'name' : name
})
# Write the checks reference, or NULL if no related checks structure.
tfile.write('\n\t ')
- if ctype:
+ if config:
tfile.write(
- 'confchk_' + name.replace('.', '_') + ', ' + str(len(ctype)))
+ 'confchk_' + name.replace('.', '_') + ', ' + str(len(config)))
else:
tfile.write('NULL, 0')
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index f5e0b4a67a3..f54d2e1fe5b 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -2,7 +2,18 @@
class Method:
def __init__(self, config):
- self.config = config
+ # Deal with duplicates: with complex configurations (like
+ # WT_SESSION::create), it's simpler to deal with duplicates once than
+ # manually as configurations are defined
+ self.config = []
+ lastname = None
+ for c in sorted(config):
+ if '.' in c.name:
+ raise "Bad config key '%s'" % c.name
+ if c.name == lastname:
+ continue
+ lastname = c.name
+ self.config.append(c)
class Config:
def __init__(self, name, default, desc, subconfig=None, **flags):
@@ -15,10 +26,13 @@ class Config:
def __cmp__(self, other):
return cmp(self.name, other.name)
-# Metadata shared by all schema objects
-common_meta = [
+common_runtime_config = [
Config('app_metadata', '', r'''
application-owned metadata for this object'''),
+]
+
+# Metadata shared by all schema objects
+common_meta = common_runtime_config + [
Config('collator', 'none', r'''
configure custom collation for keys. Permitted values are \c "none"
or a custom collator name created with WT_CONNECTION::add_collator'''),
@@ -130,7 +144,7 @@ lsm_config = [
]),
]
-file_runtime_config = [
+file_runtime_config = common_runtime_config + [
Config('access_pattern_hint', 'none', r'''
It is recommended that workloads that consist primarily of
updates and/or point queries specify \c random. Workloads that
diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list
index cfae3106fcf..0cee432d8b1 100644
--- a/src/third_party/wiredtiger/dist/s_define.list
+++ b/src/third_party/wiredtiger/dist/s_define.list
@@ -41,6 +41,7 @@ WT_OPTRACK_MAXRECS
WT_PACKED_STRUCT_BEGIN
WT_PACKED_STRUCT_END
WT_PADDING_CHECK
+WT_PREPARE_INIT
WT_READ_BARRIER
WT_REF_SIZE
WT_SESSION_LOCKED_CHECKPOINT
diff --git a/src/third_party/wiredtiger/dist/s_funcs.list b/src/third_party/wiredtiger/dist/s_funcs.list
index a7653e5b497..95c568a19ff 100644
--- a/src/third_party/wiredtiger/dist/s_funcs.list
+++ b/src/third_party/wiredtiger/dist/s_funcs.list
@@ -16,6 +16,7 @@ __wt_config_getone
__wt_cursor_get_raw_value
__wt_debug_addr
__wt_debug_addr_print
+__wt_debug_cursor_page
__wt_debug_offset
__wt_debug_set_verbose
__wt_debug_tree
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index 7330f560eb6..3b4c5eb8883 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -176,6 +176,7 @@ INCR
INIT
INITIALIZER
INMEM
+INPROGRESS
INSN
INTL
INULL
@@ -685,6 +686,7 @@ enqueue
enqueued
env
eof
+epi
eq
equalp
errhandler
@@ -947,6 +949,7 @@ mkdir
mmap
mmrand
mnt
+movemask
msecs
msg
msvc
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index 1441187812e..8b79d2daed8 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -271,6 +271,7 @@ connection_stats = [
CacheStat('cache_read', 'pages read into cache'),
CacheStat('cache_read_app_count', 'application threads page read from disk to cache count'),
CacheStat('cache_read_app_time', 'application threads page read from disk to cache time (usecs)'),
+ CacheStat('cache_read_deleted', 'pages read into cache after truncate'),
CacheStat('cache_read_lookaside', 'pages read into cache requiring lookaside entries'),
CacheStat('cache_read_lookaside_delay', 'pages read into cache with skipped lookaside entries needed later'),
CacheStat('cache_read_lookaside_skipped', 'pages read into cache skipping older lookaside entries'),
@@ -549,7 +550,6 @@ connection_stats = [
YieldStat('page_locked_blocked', 'page acquire locked blocked'),
YieldStat('page_read_blocked', 'page acquire read blocked'),
YieldStat('page_sleep', 'page acquire time sleeping (usecs)'),
- YieldStat('tree_descend_blocked', 'tree descend one level yielded for split page index update'),
YieldStat('txn_release_blocked', 'connection close blocked waiting for transaction state stabilization'),
]
@@ -629,6 +629,7 @@ dsrc_stats = [
CacheStat('cache_inmem_splittable', 'in-memory page passed criteria to be split'),
CacheStat('cache_pages_requested', 'pages requested from the cache'),
CacheStat('cache_read', 'pages read into cache'),
+ CacheStat('cache_read_deleted', 'pages read into cache after truncate'),
CacheStat('cache_read_lookaside', 'pages read into cache requiring lookaside entries'),
CacheStat('cache_read_overflow', 'overflow pages read into cache'),
CacheStat('cache_write', 'pages written from cache'),
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 699ff2083f7..dc6689f6bf1 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
{
- "commit": "875e91581c63e1e4d47c547291f0a582f30eddae",
+ "commit": "ea986ede145b8c2e3da8f8d11ef25813770c0b39",
"github": "wiredtiger/wiredtiger.git",
"vendor": "wiredtiger",
"branch": "mongodb-3.8"
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index 6575080c858..2d6f8623059 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -29,7 +29,7 @@ struct __wt_dbg {
const char *key_format;
const char *value_format;
- WT_ITEM *tmp; /* Temporary space */
+ WT_ITEM *t1, *t2; /* Temporary space */
};
static const /* Output separator */
@@ -51,8 +51,7 @@ static int __debug_page_row_int(WT_DBG *, WT_PAGE *, uint32_t);
static int __debug_page_row_leaf(WT_DBG *, WT_PAGE *);
static int __debug_ref(WT_DBG *, WT_REF *);
static int __debug_row_skip(WT_DBG *, WT_INSERT_HEAD *);
-static int __debug_tree(
- WT_SESSION_IMPL *, WT_BTREE *, WT_REF *, const char *, uint32_t);
+static int __debug_tree(WT_SESSION_IMPL *, WT_REF *, const char *, uint32_t);
static int __debug_update(WT_DBG *, WT_UPDATE *, bool);
static int __dmsg_wrapup(WT_DBG *);
@@ -124,10 +123,25 @@ __debug_item(WT_DBG *ds, const char *tag, const void *data_arg, size_t size)
static int
__debug_item_key(WT_DBG *ds, const char *tag, const void *data_arg, size_t size)
{
+ WT_SESSION_IMPL *session;
+
+ session = ds->session;
+
+ /*
+ * If the format is 'S', it's a string and our version of it may
+ * not yet be nul-terminated.
+ */
+ if (WT_STREQ(ds->key_format, "S") &&
+ ((char *)data_arg)[size - 1] != '\0') {
+ WT_RET(__wt_buf_fmt(
+ session, ds->t2, "%.*s", (int)size, (char *)data_arg));
+ data_arg = ds->t2->data;
+ size = (size_t)ds->t2->size + 1;
+ }
return (ds->f(ds, "\t%s%s{%s}\n",
tag == NULL ? "" : tag, tag == NULL ? "" : " ",
__wt_buf_set_printable_format(
- ds->session, data_arg, size, ds->key_format, ds->tmp)));
+ ds->session, data_arg, size, ds->key_format, ds->t1)));
}
/*
@@ -138,10 +152,25 @@ static int
__debug_item_value(
WT_DBG *ds, const char *tag, const void *data_arg, size_t size)
{
+ WT_SESSION_IMPL *session;
+
+ session = ds->session;
+
+ /*
+ * If the format is 'S', it's a string and our version of it may
+ * not yet be nul-terminated.
+ */
+ if (WT_STREQ(ds->value_format, "S") &&
+ ((char *)data_arg)[size - 1] != '\0') {
+ WT_RET(__wt_buf_fmt(
+ session, ds->t2, "%.*s", (int)size, (char *)data_arg));
+ data_arg = ds->t2->data;
+ size = (size_t)ds->t2->size + 1;
+ }
return (ds->f(ds, "\t%s%s{%s}\n",
tag == NULL ? "" : tag, tag == NULL ? "" : " ",
__wt_buf_set_printable_format(
- ds->session, data_arg, size, ds->value_format, ds->tmp)));
+ ds->session, data_arg, size, ds->value_format, ds->t1)));
}
/*
@@ -229,7 +258,8 @@ __debug_config(WT_SESSION_IMPL *session, WT_DBG *ds, const char *ofile)
ds->session = session;
- WT_RET(__wt_scr_alloc(session, 512, &ds->tmp));
+ WT_RET(__wt_scr_alloc(session, 512, &ds->t1));
+ WT_RET(__wt_scr_alloc(session, 512, &ds->t2));
/*
* If we weren't given a file, we use the default event handler, and
@@ -245,7 +275,7 @@ __debug_config(WT_SESSION_IMPL *session, WT_DBG *ds, const char *ofile)
ds->f = __dmsg_file;
}
- btree = S2BT_SAFE(session);
+ btree = S2BT(session);
ds->key_format = btree->key_format;
ds->value_format = btree->value_format;
return (0);
@@ -264,7 +294,8 @@ __dmsg_wrapup(WT_DBG *ds)
session = ds->session;
msg = ds->msg;
- __wt_scr_free(session, &ds->tmp);
+ __wt_scr_free(session, &ds->t1);
+ __wt_scr_free(session, &ds->t2);
/*
* Discard the buffer -- it shouldn't have anything in it, but might
@@ -608,10 +639,18 @@ __wt_debug_tree_shape(
*/
int
__wt_debug_tree_all(
- WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile)
+ void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile)
{
- return (__debug_tree(session,
- btree, ref, ofile, WT_DEBUG_TREE_LEAF | WT_DEBUG_TREE_WALK));
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)session_arg;
+ if (btree == NULL)
+ btree = S2BT(session);
+
+ WT_WITH_BTREE(session, btree, ret = __debug_tree(
+ session, ref, ofile, WT_DEBUG_TREE_LEAF | WT_DEBUG_TREE_WALK));
+ return (ret);
}
/*
@@ -623,9 +662,18 @@ __wt_debug_tree_all(
*/
int
__wt_debug_tree(
- WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile)
+ void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile)
{
- return (__debug_tree(session, btree, ref, ofile, WT_DEBUG_TREE_WALK));
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
+ session = (WT_SESSION_IMPL *)session_arg;
+ if (btree == NULL)
+ btree = S2BT(session);
+
+ WT_WITH_BTREE(session, btree,
+ ret = __debug_tree(session, ref, ofile, WT_DEBUG_TREE_WALK));
+ return (ret);
}
/*
@@ -633,18 +681,41 @@ __wt_debug_tree(
* Dump the in-memory information for a page.
*/
int
-__wt_debug_page(WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile)
+__wt_debug_page(
+ void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile)
{
WT_DBG *ds, _ds;
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
- WT_ASSERT(session, S2BT_SAFE(session) != NULL);
+ session = (WT_SESSION_IMPL *)session_arg;
+ if (btree == NULL)
+ btree = S2BT(session);
ds = &_ds;
- WT_RET(__debug_config(session, ds, ofile));
+ WT_WITH_BTREE(session, btree, ret = __debug_config(session, ds, ofile));
+ WT_RET(ret);
- WT_RET(__debug_page(ds, ref, WT_DEBUG_TREE_LEAF));
+ WT_WITH_BTREE(session, btree,
+ ret = __debug_page(ds, ref, WT_DEBUG_TREE_LEAF));
- return (__dmsg_wrapup(ds));
+ WT_TRET(__dmsg_wrapup(ds));
+ return (ret);
+}
+
+/*
+ * __wt_debug_cursor_page --
+ * Dump the in-memory information for a cursor-referenced page.
+ */
+int
+__wt_debug_cursor_page(void *cursor_arg, const char *ofile)
+{
+ WT_CURSOR *cursor;
+ WT_CURSOR_BTREE *cbt;
+
+ cursor = cursor_arg;
+ cbt = cursor_arg;
+ return (__wt_debug_page(cursor->session, cbt->btree, cbt->ref, ofile));
}
/*
@@ -656,8 +727,8 @@ __wt_debug_page(WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile)
* in this function
*/
static int
-__debug_tree(WT_SESSION_IMPL *session,
- WT_BTREE *btree, WT_REF *ref, const char *ofile, uint32_t flags)
+__debug_tree(
+ WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile, uint32_t flags)
{
WT_DBG *ds, _ds;
WT_DECL_RET;
@@ -667,12 +738,12 @@ __debug_tree(WT_SESSION_IMPL *session,
/* A NULL page starts at the top of the tree -- it's a convenience. */
if (ref == NULL)
- ref = &btree->root;
+ ref = &S2BT(session)->root;
- WT_WITH_BTREE(session, btree, ret = __debug_page(ds, ref, flags));
- WT_RET(ret);
+ ret = __debug_page(ds, ref, flags);
- return (__dmsg_wrapup(ds));
+ WT_TRET(__dmsg_wrapup(ds));
+ return (ret);
}
/*
@@ -1189,7 +1260,7 @@ __debug_ref(WT_DBG *ds, WT_REF *ref)
__wt_ref_info(ref, &addr, &addr_size, NULL);
return (ds->f(ds, "\t" "%p %s %s\n", (void *)ref,
- state, __wt_addr_string(session, addr, addr_size, ds->tmp)));
+ state, __wt_addr_string(session, addr, addr_size, ds->t1)));
}
/*
diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c
index cb50bfbcf61..a10c82d2cf2 100644
--- a/src/third_party/wiredtiger/src/btree/bt_delete.c
+++ b/src/third_party/wiredtiger/src/btree/bt_delete.c
@@ -71,9 +71,13 @@ __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
*skipp = false;
- /* If we have a clean page in memory, attempt to evict it. */
+ /*
+ * If we have a clean page in memory, attempt to evict it. Do a fast
+ * check for a dirty page, and then repeat the test once we're locked.
+ */
previous_state = ref->state;
if ((previous_state == WT_REF_MEM || previous_state == WT_REF_LIMBO) &&
+ !__wt_page_is_modified(ref->page) &&
__wt_atomic_casv32(&ref->state, previous_state, WT_REF_LOCKED)) {
if (__wt_page_is_modified(ref->page)) {
ref->state = previous_state;
@@ -222,7 +226,7 @@ __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref)
* and if we've yielded enough times, start sleeping so we
* don't burn CPU to no purpose.
*/
- __wt_ref_state_yield_sleep(&yield_count, &sleep_count);
+ __wt_state_yield_sleep(&yield_count, &sleep_count);
WT_STAT_CONN_INCRV(session,
page_del_rollback_blocked, sleep_count);
}
@@ -287,11 +291,7 @@ __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
if (!__wt_atomic_casv32(&ref->state, WT_REF_DELETED, WT_REF_LOCKED))
return (false);
- skip = ref->page_del == NULL || (visible_all ?
- __wt_txn_visible_all(session, ref->page_del->txnid,
- WT_TIMESTAMP_NULL(&ref->page_del->timestamp)):
- __wt_txn_visible(session, ref->page_del->txnid,
- WT_TIMESTAMP_NULL(&ref->page_del->timestamp)));
+ skip = !__wt_page_del_active(session, ref, visible_all);
/*
* The page_del structure can be freed as soon as the delete is stable:
@@ -330,6 +330,7 @@ __tombstone_update_alloc(WT_SESSION_IMPL *session,
if (page_del != NULL) {
upd->txnid = page_del->txnid;
__wt_timestamp_set(&upd->timestamp, &page_del->timestamp);
+ upd->prepare_state = page_del->prepare_state;
}
*updp = upd;
return (0);
@@ -356,6 +357,9 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
btree = S2BT(session);
page = ref->page;
+ WT_STAT_CONN_INCR(session, cache_read_deleted);
+ WT_STAT_DATA_INCR(session, cache_read_deleted);
+
/*
* Give the page a modify structure.
*
@@ -390,8 +394,8 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
* needs to be resolved, otherwise, there may not be one (and, if the
* transaction has resolved, we can ignore the page-deleted structure).
*/
- page_del =
- __wt_btree_truncate_active(session, ref) ? ref->page_del : NULL;
+ page_del = __wt_page_del_active(session, ref, true) ?
+ ref->page_del : NULL;
/*
* Allocate the per-page update array if one doesn't already exist. (It
diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c
index 8eb120f06ec..17497561248 100644
--- a/src/third_party/wiredtiger/src/btree/bt_random.c
+++ b/src/third_party/wiredtiger/src/btree/bt_random.c
@@ -261,8 +261,8 @@ restart: /*
* On other error, simply return, the swap call ensures we're
* holding nothing on failure.
*/
-descend: if ((ret =
- __wt_page_swap(session, current, descent, flags)) == 0) {
+descend: if ((ret = __wt_page_swap(
+ session, current, descent, false, flags)) == 0) {
current = descent;
continue;
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index 450fd6cf563..345556c4c41 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -562,6 +562,13 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
if (F_ISSET(session, WT_SESSION_IGNORE_CACHE_SIZE))
LF_SET(WT_READ_IGNORE_CACHE_SIZE);
+ /* Sanity check flag combinations. */
+ WT_ASSERT(session, !LF_ISSET(
+ WT_READ_DELETED_SKIP | WT_READ_NO_WAIT | WT_READ_LOOKASIDE) ||
+ LF_ISSET(WT_READ_CACHE));
+ WT_ASSERT(session, !LF_ISSET(WT_READ_DELETED_CHECK) ||
+ !LF_ISSET(WT_READ_DELETED_SKIP));
+
/*
* Ignore reads of pages already known to be in cache, otherwise the
* eviction server can dominate these statistics.
@@ -575,7 +582,9 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
force_attempts = 0, sleep_cnt = wait_cnt = 0;;) {
switch (current_state = ref->state) {
case WT_REF_DELETED:
- if (LF_ISSET(WT_READ_NO_EMPTY) &&
+ if (LF_ISSET(WT_READ_DELETED_SKIP | WT_READ_NO_WAIT))
+ return (WT_NOTFOUND);
+ if (LF_ISSET(WT_READ_DELETED_CHECK) &&
__wt_delete_page_skip(session, ref, false))
return (WT_NOTFOUND);
goto read;
@@ -799,7 +808,7 @@ skip_evict: /*
if (cache_work)
continue;
}
- __wt_ref_state_yield_sleep(&wait_cnt, &sleep_cnt);
+ __wt_state_yield_sleep(&wait_cnt, &sleep_cnt);
WT_STAT_CONN_INCRV(session, page_sleep, sleep_cnt);
}
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index 8600c7d6555..ad7d7d9fcab 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -132,7 +132,20 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
tried_eviction = false;
time_start = time_stop = 0;
+ /* Only visit pages in cache and don't bump page read generations. */
flags = WT_READ_CACHE | WT_READ_NO_GEN;
+
+ /*
+ * Skip all deleted pages. For a page to be marked deleted, it must
+ * have been evicted from cache and marked clean. Checkpoint should
+ * never instantiate deleted pages: if a truncate is not visible to the
+ * checkpoint, the on-disk version is correct. If the truncate is
+ * visible, we skip over the child page when writing its parent. We
+ * check whether a truncate is visible in the checkpoint as part of
+ * reconciling internal pages (specifically in __rec_child_modify).
+ */
+ LF_SET(WT_READ_DELETED_SKIP);
+
internal_bytes = leaf_bytes = 0;
internal_pages = leaf_pages = 0;
saved_pinned_id = WT_SESSION_TXN_STATE(session)->pinned_id;
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index c10a9256769..7f711be3480 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -384,7 +384,7 @@ __verify_tree(WT_SESSION_IMPL *session, WT_REF *ref, WT_VSTUFF *vs)
if (vs->dump_blocks)
WT_RET(__wt_debug_disk(session, page->dsk, NULL));
if (vs->dump_pages)
- WT_RET(__wt_debug_page(session, ref, NULL));
+ WT_RET(__wt_debug_page(session, NULL, ref, NULL));
#endif
/*
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index 535e804d6a8..a800d896023 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -70,7 +70,7 @@ __ref_index_slot(WT_SESSION_IMPL *session,
* before retrying, and if we've yielded enough times, start
* sleeping so we don't burn CPU to no purpose.
*/
- __wt_ref_state_yield_sleep(&yield_count, &sleep_count);
+ __wt_state_yield_sleep(&yield_count, &sleep_count);
WT_STAT_CONN_INCRV(session, page_index_slot_ref_blocked,
sleep_count);
}
@@ -176,84 +176,6 @@ __ref_ascend(WT_SESSION_IMPL *session,
}
/*
- * __ref_descend_prev --
- * Descend the tree one level, during a previous-cursor walk.
- */
-static inline void
-__ref_descend_prev(
- WT_SESSION_IMPL *session, WT_REF *ref, WT_PAGE_INDEX **pindexp)
-{
- WT_PAGE_INDEX *pindex;
- uint64_t yield_count;
-
- /*
- * We're passed a child page into which we're descending, and on which
- * we have a hazard pointer.
- */
- for (yield_count = 0;; yield_count++, __wt_yield()) {
- /*
- * There's a split race when a cursor moving backwards through
- * the tree descends the tree. If we're splitting an internal
- * page into its parent, we move the WT_REF structures and
- * update the parent's page index before updating the split
- * page's page index, and it's not an atomic update. A thread
- * can read the parent page's replacement page index and then
- * read the split page's original index.
- *
- * This can create a race for previous-cursor movements.
- *
- * For example, imagine an internal page with 3 child pages,
- * with the namespaces a-f, g-h and i-j; the first child page
- * splits. The parent starts out with the following page-index:
- *
- * | ... | a | g | i | ... |
- *
- * The split page starts out with the following page-index:
- *
- * | a | b | c | d | e | f |
- *
- * The first step is to move the c-f ranges into a new subtree,
- * so, for example we might have two new internal pages 'c' and
- * 'e', where the new 'c' page references the c-d namespace and
- * the new 'e' page references the e-f namespace. The top of the
- * subtree references the parent page, but until the parent's
- * page index is updated, any threads in the subtree won't be
- * able to ascend out of the subtree. However, once the parent
- * page's page index is updated to this:
- *
- * | ... | a | c | e | g | i | ... |
- *
- * threads in the subtree can ascend into the parent. Imagine a
- * cursor in the c-d part of the namespace that ascends to the
- * parent's 'c' slot. It would then decrement to the slot before
- * the 'c' slot, the 'a' slot.
- *
- * The previous-cursor movement selects the last slot in the 'a'
- * page; if the split page's page-index hasn't been updated yet,
- * it will select the 'f' slot, which is incorrect. Once the
- * split page's page index is updated to this:
- *
- * | a | b |
- *
- * the previous-cursor movement will select the 'b' slot, which
- * is correct.
- *
- * This function takes an argument which is the internal page
- * from which we're descending. If the last slot on the page no
- * longer points to the current page as its "home", the page is
- * being split and part of its namespace moved. We have the
- * correct page and we don't have to move, all we have to do is
- * wait until the split page's page index is updated.
- */
- WT_INTL_INDEX_GET(session, ref->page, pindex);
- if (pindex->index[pindex->entries - 1]->home == ref->page)
- break;
- }
- *pindexp = pindex;
- WT_STAT_CONN_INCRV(session, tree_descend_blocked, yield_count);
-}
-
-/*
* __ref_initial_descent_prev --
* Descend the tree one level, when setting up the initial cursor position
* for a previous-cursor walk.
@@ -265,6 +187,21 @@ __ref_initial_descent_prev(
WT_PAGE_INDEX *pindex;
/*
+ * When splitting an internal page into its parent, we move the WT_REF
+ * structures and update the parent's page index before updating the
+ * split page's page index, and it's not an atomic update. A thread can
+ * read the parent page's replacement page index, then read the split
+ * page's original index, or the parent page's original and the split
+ * page's replacement.
+ *
+ * This isn't a problem for a cursor setting up at the start of the tree
+ * because we do right-hand splits on internal pages and the initial
+ * part of the split page's namespace won't change as part of a split.
+ * A thread reading the parent page's and split page's indexes will move
+ * to the same slot no matter what order of indexes are read.
+ *
+ * Handle a cursor setting up at the end of the tree.
+ *
* We're passed a child page into which we're descending, and on which
* we have a hazard pointer.
*
@@ -293,11 +230,13 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
WT_DECL_RET;
WT_PAGE_INDEX *pindex;
WT_REF *couple, *couple_orig, *ref;
+ uint64_t sleep_count, yield_count;
uint32_t current_state, slot;
bool empty_internal, initial_descent, prev, skip;
btree = S2BT(session);
pindex = NULL;
+ sleep_count = yield_count = 0;
empty_internal = initial_descent = false;
/*
@@ -307,8 +246,9 @@ __tree_walk_internal(WT_SESSION_IMPL *session,
*/
WT_ENTER_PAGE_INDEX(session);
- /* Walk should never instantiate deleted pages. */
- LF_SET(WT_READ_NO_EMPTY);
+ /* Check whether deleted pages can be skipped. */
+ if (!LF_ISSET(WT_READ_DELETED_SKIP))
+ LF_SET(WT_READ_DELETED_CHECK);
/*
* !!!
@@ -427,11 +367,14 @@ restart: /*
* handle restart or not-found returns, it would require
* additional complexity and is not a possible return:
* we're moving to the parent of the current child page,
- * the parent can't have been evicted.
+ * the parent can't have been evicted. (This is why we
+ * don't pass "prev" to the page-swap function, we can't
+ * handle the restart error returned if the parent page
+ * is currently splitting.)
*/
if (!LF_ISSET(WT_READ_SKIP_INTL)) {
WT_ERR(__wt_page_swap(
- session, couple, ref, flags));
+ session, couple, ref, false, flags));
*refp = ref;
goto done;
}
@@ -509,7 +452,7 @@ restart: /*
break;
}
- ret = __wt_page_swap(session, couple, ref,
+ ret = __wt_page_swap(session, couple, ref, prev,
WT_READ_NOTFOUND_OK | WT_READ_RESTART_OK | flags);
/*
@@ -529,6 +472,14 @@ restart: /*
ret = 0;
/*
+ * Yield before retrying, and if we've yielded
+ * enough times, start sleeping so we don't burn
+ * CPU to no purpose.
+ */
+ __wt_state_yield_sleep(
+ &yield_count, &sleep_count);
+
+ /*
* If a cursor is setting up at the end of the
* tree, we can't use our parent page's index,
* because it may have already split; restart
@@ -576,44 +527,16 @@ descend: empty_internal = true;
/*
* There's a split race when a cursor is setting
- * up at the end of the tree or moving backwards
- * through the tree and descending a level. When
- * splitting an internal page into its parent,
- * we move the WT_REF structures and update the
- * parent's page index before updating the split
- * page's page index, and it's not an atomic
- * update. A thread can read the parent page's
- * replacement page index, then read the split
- * page's original index, or the parent page's
- * original and the split page's replacement.
- *
- * This isn't a problem for a cursor setting up
- * at the start of the tree or moving forwards
- * through the tree because we do right-hand
- * splits on internal pages and the initial part
- * of the split page's namespace won't change as
- * part of a split. A thread reading the parent
- * page's and split page's indexes will move to
- * the same slot no matter what order of indexes
- * are read.
- *
- * Handle a cursor setting up at the end of the
- * tree or moving backwards through the tree.
+ * up at the end of the tree.
*/
- if (!prev) {
- WT_INTL_INDEX_GET(
- session, ref->page, pindex);
- slot = 0;
- } else if (initial_descent) {
+ if (prev && initial_descent) {
if (!__ref_initial_descent_prev(
session, ref, &pindex))
goto restart;
- slot = pindex->entries - 1;
- } else {
- __ref_descend_prev(
- session, ref, &pindex);
- slot = pindex->entries - 1;
- }
+ } else
+ WT_INTL_INDEX_GET(
+ session, ref->page, pindex);
+ slot = prev ? pindex->entries - 1 : 0;
continue;
}
diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c
index 5c0e066647a..8cc6630599b 100644
--- a/src/third_party/wiredtiger/src/btree/col_srch.c
+++ b/src/third_party/wiredtiger/src/btree/col_srch.c
@@ -191,8 +191,8 @@ descend: /*
* On other error, simply return, the swap call ensures we're
* holding nothing on failure.
*/
- if ((ret = __wt_page_swap(
- session, current, descent, WT_READ_RESTART_OK)) == 0) {
+ if ((ret = __wt_page_swap(session,
+ current, descent, false, WT_READ_RESTART_OK)) == 0) {
current = descent;
continue;
}
diff --git a/src/third_party/wiredtiger/src/btree/row_key.c b/src/third_party/wiredtiger/src/btree/row_key.c
index ca8e2418857..39eddb6e0cc 100644
--- a/src/third_party/wiredtiger/src/btree/row_key.c
+++ b/src/third_party/wiredtiger/src/btree/row_key.c
@@ -408,7 +408,7 @@ switch_and_jump: /* Switching to a forward roll. */
}
next: switch (direction) {
- case BACKWARD:
+ case BACKWARD:
--rip;
++slot_offset;
break;
diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c
index e75b307812c..20acda8a1ab 100644
--- a/src/third_party/wiredtiger/src/btree/row_srch.c
+++ b/src/third_party/wiredtiger/src/btree/row_srch.c
@@ -443,8 +443,8 @@ descend: /*
* On other error, simply return, the swap call ensures we're
* holding nothing on failure.
*/
- if ((ret = __wt_page_swap(
- session, current, descent, WT_READ_RESTART_OK)) == 0) {
+ if ((ret = __wt_page_swap(session,
+ current, descent, false, WT_READ_RESTART_OK)) == 0) {
current = descent;
continue;
}
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index ffcb2139330..bd68a8b0937 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -238,6 +238,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_alter[] = {
{ "access_pattern_hint", "string",
NULL, "choices=[\"none\",\"random\",\"sequential\"]",
NULL, 0 },
+ { "app_metadata", "string", NULL, NULL, NULL, 0 },
{ "assert", "category",
NULL, NULL,
confchk_assert_subconfigs, 2 },
@@ -1274,9 +1275,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {
confchk_WT_CURSOR_reconfigure, 2
},
{ "WT_SESSION.alter",
- "access_pattern_hint=none,assert=(commit_timestamp=none,"
- "read_timestamp=none),cache_resident=false,log=(enabled=true)",
- confchk_WT_SESSION_alter, 4
+ "access_pattern_hint=none,app_metadata=,"
+ "assert=(commit_timestamp=none,read_timestamp=none),"
+ "cache_resident=false,log=(enabled=true)",
+ confchk_WT_SESSION_alter, 5
},
{ "WT_SESSION.begin_transaction",
"ignore_prepare=false,isolation=,name=,priority=0,read_timestamp="
diff --git a/src/third_party/wiredtiger/src/cursor/cur_std.c b/src/third_party/wiredtiger/src/cursor/cur_std.c
index 1f5c5f25c57..00a6bc4645d 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_std.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_std.c
@@ -704,7 +704,7 @@ err: WT_TRET(cursor->reopen(cursor, false));
*/
int
__wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri,
- const char *cfg[], WT_CURSOR **cursorp)
+ WT_CURSOR *to_dup, const char *cfg[], WT_CURSOR **cursorp)
{
WT_CONFIG_ITEM cval;
WT_CURSOR *cursor;
@@ -752,10 +752,22 @@ __wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri,
}
/*
+ * Caller guarantees that exactly one of the URI and the
+ * duplicate cursor is non-NULL.
+ */
+ if (to_dup != NULL) {
+ WT_ASSERT(session, uri == NULL);
+ uri = to_dup->uri;
+ hash_value = to_dup->uri_hash;
+ } else {
+ WT_ASSERT(session, uri != NULL);
+ hash_value = __wt_hash_city64(uri, strlen(uri));
+ }
+
+ /*
* Walk through all cursors, if there is a cached
* cursor that matches uri and configuration, use it.
*/
- hash_value = __wt_hash_city64(uri, strlen(uri));
bucket = hash_value % WT_HASH_ARRAY_SIZE;
TAILQ_FOREACH(cursor, &session->cursor_cache[bucket], q) {
if (cursor->uri_hash == hash_value &&
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 5c478654585..719fa7e8c5f 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -441,7 +441,7 @@ __evict_child_check(WT_SESSION_IMPL *session, WT_REF *parent)
* control can be running below our locked internal
* page.
*/
- if (__wt_btree_truncate_active(session, child))
+ if (__wt_page_del_active(session, child, true))
return (EBUSY);
break;
default:
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 893f51aa022..9752737ef41 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -10,18 +10,19 @@
/* AUTOMATIC FLAG VALUE GENERATION START */
#define WT_READ_CACHE 0x0001u
-#define WT_READ_IGNORE_CACHE_SIZE 0x0002u
-#define WT_READ_LOOKASIDE 0x0004u
-#define WT_READ_NOTFOUND_OK 0x0008u
-#define WT_READ_NO_EMPTY 0x0010u
-#define WT_READ_NO_GEN 0x0020u
-#define WT_READ_NO_SPLIT 0x0040u
-#define WT_READ_NO_WAIT 0x0080u
-#define WT_READ_PREV 0x0100u
-#define WT_READ_RESTART_OK 0x0200u
-#define WT_READ_SKIP_INTL 0x0400u
-#define WT_READ_TRUNCATE 0x0800u
-#define WT_READ_WONT_NEED 0x1000u
+#define WT_READ_DELETED_CHECK 0x0002u
+#define WT_READ_DELETED_SKIP 0x0004u
+#define WT_READ_IGNORE_CACHE_SIZE 0x0008u
+#define WT_READ_LOOKASIDE 0x0010u
+#define WT_READ_NOTFOUND_OK 0x0020u
+#define WT_READ_NO_GEN 0x0040u
+#define WT_READ_NO_SPLIT 0x0080u
+#define WT_READ_NO_WAIT 0x0100u
+#define WT_READ_PREV 0x0200u
+#define WT_READ_RESTART_OK 0x0400u
+#define WT_READ_SKIP_INTL 0x0800u
+#define WT_READ_TRUNCATE 0x1000u
+#define WT_READ_WONT_NEED 0x2000u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
/* AUTOMATIC FLAG VALUE GENERATION START */
@@ -506,11 +507,7 @@ struct __wt_page {
* Internal pages (both column- and row-store).
*
* In-memory internal pages have an array of pointers to child
- * structures, maintained in collated order. When a page is
- * read into memory, the initial list of children is stored in
- * the "orig_index" field, and it and the collated order are
- * the same. After a page splits, the collated order and the
- * original order will differ.
+ * structures, maintained in collated order.
*
* Multiple threads of control may be searching the in-memory
* internal page and a child page of the internal page may
@@ -707,6 +704,45 @@ struct __wt_page {
((void *)((uint8_t *)((page)->dsk) + (o)))
/*
+ * Prepare update states.
+ *
+ * Prepare update synchronization is based on the state field, which has the
+ * following possible states:
+ *
+ * WT_PREPARE_INIT:
+ * The initial prepare state of either an update or a page_del structure,
+ * indicating a prepare phase has not started yet.
+ * This state has no impact on the visibility of the update's data.
+ *
+ * WT_PREPARE_INPROGRESS:
+ * Update is in prepared phase.
+ *
+ * WT_PREPARE_LOCKED:
+ * State is locked as state transition is in progress from INPROGRESS to
+ * RESOLVED. Any reader of the state needs to wait for state transition to
+ * complete.
+ *
+ * WT_PREPARE_RESOLVED:
+ * Represents the commit state of the prepared update.
+ *
+ * State Transition:
+ * From uncommitted -> prepare -> commit:
+ * INIT --> INPROGRESS --> LOCKED --> RESOLVED
+ * LOCKED will be a momentary phase during timestamp update.
+ *
+ * From uncommitted -> prepare -> rollback:
+ * INIT --> INPROGRESS
+ * Prepare state will not be updated during rollback and will continue to
+ * have the state as INPROGRESS.
+ */
+#define WT_PREPARE_INIT 0 /* Must be 0, as structures
+ will be default initialized
+ with 0. */
+#define WT_PREPARE_INPROGRESS 1
+#define WT_PREPARE_LOCKED 2
+#define WT_PREPARE_RESOLVED 3
+
+/*
* Page state.
*
* Synchronization is based on the WT_REF->state field, which has a number of
@@ -779,6 +815,12 @@ struct __wt_page_deleted {
volatile uint64_t txnid; /* Transaction ID */
WT_DECL_TIMESTAMP(timestamp)
+ /*
+ * The state is used for transaction prepare to manage visibility
+ * and inheriting prepare state to update_list.
+ */
+ volatile uint8_t prepare_state; /* Prepare state. */
+
uint32_t previous_state; /* Previous state */
WT_UPDATE **update_list; /* List of updates for abort */
@@ -989,16 +1031,6 @@ struct __wt_update {
#define WT_UPDATE_TOMBSTONE 5 /* deleted */
uint8_t type; /* type (one byte to conserve memory) */
- /*
- * The update state is used for transaction prepare to manage
- * visibility and transitioning update structure state safely.
- */
-#define WT_UPDATE_STATE_READY 0 /* Must be 0. Default or
- finalized prepare */
-#define WT_UPDATE_STATE_LOCKED 1 /* locked */
-#define WT_UPDATE_STATE_PREPARED 2 /* prepared */
- volatile uint8_t state;
-
/* If the update includes a complete value. */
#define WT_UPDATE_DATA_VALUE(upd) \
((upd)->type == WT_UPDATE_STANDARD || \
@@ -1009,6 +1041,12 @@ struct __wt_update {
#endif
/*
+ * The update state is used for transaction prepare to manage
+ * visibility and transitioning update structure state safely.
+ */
+ volatile uint8_t prepare_state; /* Prepare state. */
+
+ /*
* Zero or more bytes of value (the payload) immediately follows the
* WT_UPDATE structure. We use a C99 flexible array member which has
* the semantics we want.
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index de28eb7232f..149f4304692 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -1151,19 +1151,28 @@ __wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref)
}
/*
- * __wt_btree_truncate_active --
+ * __wt_page_del_active --
* Return if a truncate operation is active.
*/
static inline bool
-__wt_btree_truncate_active(WT_SESSION_IMPL *session, WT_REF *ref)
+__wt_page_del_active(
+ WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
{
WT_PAGE_DELETED *page_del;
+ uint8_t prepare_state;
if ((page_del = ref->page_del) == NULL)
return (false);
if (page_del->txnid == WT_TXN_ABORTED)
return (false);
- return (!__wt_txn_visible_all(session,
+ WT_ORDERED_READ(prepare_state, page_del->prepare_state);
+ if (prepare_state == WT_PREPARE_INPROGRESS ||
+ prepare_state == WT_PREPARE_LOCKED)
+ return (true);
+ return (visible_all ?
+ !__wt_txn_visible_all(session,
+ page_del->txnid, WT_TIMESTAMP_NULL(&page_del->timestamp)) :
+ !__wt_txn_visible(session,
page_del->txnid, WT_TIMESTAMP_NULL(&page_del->timestamp)));
}
@@ -1354,7 +1363,7 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
mod = page->modify;
/* A truncated page can't be evicted until the truncate completes. */
- if (__wt_btree_truncate_active(session, ref))
+ if (__wt_page_del_active(session, ref, true))
return (false);
/* Otherwise, never modified pages can always be evicted. */
@@ -1485,81 +1494,6 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
}
/*
- * __wt_page_swap_func --
- * Swap one page's hazard pointer for another one when hazard pointer
- * coupling up/down the tree.
- */
-static inline int
-__wt_page_swap_func(
- WT_SESSION_IMPL *session, WT_REF *held, WT_REF *want, uint32_t flags
-#ifdef HAVE_DIAGNOSTIC
- , const char *file, int line
-#endif
- )
-{
- WT_DECL_RET;
- bool acquired;
-
- /*
- * This function is here to simplify the error handling during hazard
- * pointer coupling so we never leave a hazard pointer dangling. The
- * assumption is we're holding a hazard pointer on "held", and want to
- * acquire a hazard pointer on "want", releasing the hazard pointer on
- * "held" when we're done.
- *
- * When walking the tree, we sometimes swap to the same page. Fast-path
- * that to avoid thinking about error handling.
- */
- if (held == want)
- return (0);
-
- /* Get the wanted page. */
- ret = __wt_page_in_func(session, want, flags
-#ifdef HAVE_DIAGNOSTIC
- , file, line
-#endif
- );
-
- /*
- * Expected failures: page not found or restart. Our callers list the
- * errors they're expecting to handle.
- */
- if (LF_ISSET(WT_READ_NOTFOUND_OK) && ret == WT_NOTFOUND)
- return (WT_NOTFOUND);
- if (LF_ISSET(WT_READ_RESTART_OK) && ret == WT_RESTART)
- return (WT_RESTART);
-
- /* Discard the original held page on either success or error. */
- acquired = ret == 0;
- WT_TRET(__wt_page_release(session, held, flags));
-
- /* Fast-path expected success. */
- if (ret == 0)
- return (0);
-
- /*
- * If there was an error at any point that our caller isn't prepared to
- * handle, discard any page we acquired.
- */
- if (acquired)
- WT_TRET(__wt_page_release(session, want, flags));
-
- /*
- * If we're returning an error, don't let it be one our caller expects
- * to handle as returned by page-in: the expectation includes the held
- * page not having been released, and that's not the case.
- */
- if (LF_ISSET(WT_READ_NOTFOUND_OK) && ret == WT_NOTFOUND)
- WT_RET_MSG(session,
- EINVAL, "page-release WT_NOTFOUND error mapped to EINVAL");
- if (LF_ISSET(WT_READ_RESTART_OK) && ret == WT_RESTART)
- WT_RET_MSG(session,
- EINVAL, "page-release WT_RESTART error mapped to EINVAL");
-
- return (ret);
-}
-
-/*
* __wt_skip_choose_depth --
* Randomly choose a depth for a skiplist insert.
*/
@@ -1693,22 +1627,152 @@ __wt_split_descent_race(
}
/*
- * __wt_ref_state_yield_sleep --
- * sleep while waiting for the wt_ref state after THOUSAND yields.
+ * __wt_split_prev_race --
+ * Return if we raced with an internal page split when moving backwards
+ * through the tree.
*/
-static inline void
-__wt_ref_state_yield_sleep(uint64_t *yield_count, uint64_t *sleep_count)
+static inline bool
+__wt_split_prev_race(WT_SESSION_IMPL *session, WT_REF *ref)
{
+ WT_PAGE_INDEX *pindex;
+
/*
- * We yield before retrying, and if we've yielded enough times, start
- * sleeping so we don't burn CPU to no purpose.
+ * There's a split race when a cursor moving backwards through the tree
+ * descends the tree. If we're splitting an internal page into its
+ * parent, we move the WT_REF structures and update the parent's page
+ * index before updating the split page's page index, and it's not an
+ * atomic update. A thread can read the parent and split page's original
+ * indexes during a split, or read the parent page's replacement page
+ * index and then read the split page's original index, either of which
+ * can lead to skipping pages.
+ *
+ * For example, imagine an internal page with 3 child pages, with the
+ * namespaces a-f, g-h and i-j; the first child page splits. The parent
+ * starts out with the following page-index:
+ *
+ * | ... | a | g | i | ... |
+ *
+ * The split page starts out with the following page-index:
+ *
+ * | a | b | c | d | e | f |
+ *
+ * The first step is to move the c-f ranges into a new subtree, so, for
+ * example we might have two new internal pages 'c' and 'e', where the
+ * new 'c' page references the c-d namespace and the new 'e' page
+ * references the e-f namespace. The top of the subtree references the
+ * parent page, but until the parent's page index is updated, threads in
+ * the subtree won't be able to ascend out of the subtree. However, once
+ * the parent page's page index is updated to this:
+ *
+ * | ... | a | c | e | g | i | ... |
+ *
+ * threads in the subtree can ascend into the parent. Imagine a cursor
+ * in the c-d part of the namespace that ascends to the parent's 'c'
+ * slot. It would then decrement to the slot before the 'c' slot, the
+ * 'a' slot.
+ *
+ * The previous-cursor movement selects the last slot in the 'a' page;
+ * if the split page's page-index hasn't been updated yet, it selects
+ * the 'f' slot, which is incorrect. Once the split page's page index is
+ * updated to this:
+ *
+ * | a | b |
+ *
+ * the previous-cursor movement will select the 'b' slot, which is
+ * correct.
+ *
+ * This function takes an argument which is the internal page into which
+ * we're coupling. If the last slot on the page no longer points to
+ * the current page as its "home", the page is being split and part of
+ * its namespace moved, we have to restart.
*/
- if ((*yield_count) < WT_THOUSAND) {
- (*yield_count)++;
- __wt_yield();
- return;
+ WT_INTL_INDEX_GET(session, ref->page, pindex);
+ return (pindex->index[pindex->entries - 1]->home != ref->page);
+}
+
+/*
+ * __wt_page_swap_func --
+ * Swap one page's hazard pointer for another one when hazard pointer
+ * coupling up/down the tree.
+ */
+static inline int
+__wt_page_swap_func(WT_SESSION_IMPL *session,
+ WT_REF *held, WT_REF *want, bool prev_race, uint32_t flags
+#ifdef HAVE_DIAGNOSTIC
+ , const char *file, int line
+#endif
+ )
+{
+ WT_DECL_RET;
+ bool acquired;
+
+ /*
+ * This function is here to simplify the error handling during hazard
+ * pointer coupling so we never leave a hazard pointer dangling. The
+ * assumption is we're holding a hazard pointer on "held", and want to
+ * acquire a hazard pointer on "want", releasing the hazard pointer on
+ * "held" when we're done.
+ *
+ * When walking the tree, we sometimes swap to the same page. Fast-path
+ * that to avoid thinking about error handling.
+ */
+ if (held == want)
+ return (0);
+
+ /* Get the wanted page. */
+ ret = __wt_page_in_func(session, want, flags
+#ifdef HAVE_DIAGNOSTIC
+ , file, line
+#endif
+ );
+
+ /*
+ * We can race when descending into an internal page as part of moving
+ * backwards through the tree, and we have to detect that race before
+ * releasing the page from which we are coupling, else we can't restart
+ * the movement.
+ */
+ if (ret == 0 && prev_race && WT_PAGE_IS_INTERNAL(want->page) &&
+ __wt_split_prev_race(session, want)) {
+ ret = WT_RESTART;
+ WT_TRET(__wt_page_release(session, want, flags));
}
- (*sleep_count) = WT_MIN((*sleep_count) + 100, WT_THOUSAND);
- __wt_sleep(0, (*sleep_count));
+ /*
+ * Expected failures: page not found or restart. Our callers list the
+ * errors they're expecting to handle.
+ */
+ if (LF_ISSET(WT_READ_NOTFOUND_OK) && ret == WT_NOTFOUND)
+ return (WT_NOTFOUND);
+ if (LF_ISSET(WT_READ_RESTART_OK) && ret == WT_RESTART)
+ return (WT_RESTART);
+
+ /* Discard the original held page on either success or error. */
+ acquired = ret == 0;
+ WT_TRET(__wt_page_release(session, held, flags));
+
+ /* Fast-path expected success. */
+ if (ret == 0)
+ return (0);
+
+ /*
+ * If there was an error at any point that our caller isn't prepared to
+ * handle, discard any page we acquired.
+ */
+ if (acquired)
+ WT_TRET(__wt_page_release(session, want, flags));
+
+ /*
+ * If we're returning an error, don't let it be one our caller expects
+ * to handle as returned by page-in: the expectation includes the held
+ * page not having been released, and that's not the case.
+ */
+ if (LF_ISSET(WT_READ_NOTFOUND_OK) && ret == WT_NOTFOUND)
+ WT_RET_MSG(session,
+ EINVAL, "page-release WT_NOTFOUND error mapped to EINVAL");
+ if (LF_ISSET(WT_READ_RESTART_OK) && ret == WT_RESTART)
+ WT_RET_MSG(session,
+ EINVAL, "page-release WT_RESTART error mapped to EINVAL");
+
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/include/btree_cmp.i b/src/third_party/wiredtiger/src/include/btree_cmp.i
index 8f8e0e83717..f8679933210 100644
--- a/src/third_party/wiredtiger/src/include/btree_cmp.i
+++ b/src/third_party/wiredtiger/src/include/btree_cmp.i
@@ -10,9 +10,33 @@
#if !defined(_MSC_VER) && !defined(_lint)
#include <x86intrin.h>
#endif
+#endif
/* 16B alignment */
#define WT_ALIGNED_16(p) (((uintptr_t)(p) & 0x0f) == 0)
#define WT_VECTOR_SIZE 16 /* chunk size */
+
+#if defined(HAVE_ARM_NEON_INTRIN_H)
+#include <arm_neon.h>
+/*
+ * _mm_movemask_epi8_neon --
+ * Creates a 16-bit mask from the most significant bits of the 16 signed
+ * or unsigned 8-bit integers.
+ */
+static inline uint16_t
+_mm_movemask_epi8_neon(const uint8x16_t data)
+{
+ uint64x1_t p;
+ p = vset_lane_u64(0x8040201008040201, p, 0);
+ uint8x16_t powers = vcombine_u8(p, p);
+ uint8x16_t zero8x16 = vdupq_n_s8(0);
+ int8x16_t input = vcltq_s8((int8x16_t)data, (int8x16_t)zero8x16);
+ uint64x2_t mask = vpaddlq_u32(
+ vpaddlq_u16(vpaddlq_u8(vandq_u8((uint8x16_t)input, powers))));
+ uint16_t output;
+ output =
+ ((vgetq_lane_u8(mask, 8) << 8) | (vgetq_lane_u8(mask, 0) << 0));
+ return (output);
+}
#endif
/*
@@ -70,6 +94,24 @@ __wt_lex_compare(const WT_ITEM *user_item, const WT_ITEM *tree_item)
}
len += remain;
}
+#elif defined(HAVE_ARM_NEON_INTRIN_H)
+ /* Use vector instructions if we'll execute at least 1 of them. */
+ if (len >= WT_VECTOR_SIZE) {
+ size_t remain;
+ uint8x16_t res_eq, u, t;
+ remain = len % WT_VECTOR_SIZE;
+ len -= remain;
+ for (; len > 0;
+ len -= WT_VECTOR_SIZE,
+ userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE) {
+ u = vld1q_u8(userp);
+ t = vld1q_u8(treep);
+ res_eq = vceqq_u8(u, t);
+ if (_mm_movemask_epi8_neon(res_eq) != 65535)
+ break;
+ }
+ len += remain;
+ }
#endif
/*
* Use the non-vectorized version for the remaining bytes and for the
@@ -158,6 +200,26 @@ __wt_lex_compare_skip(
}
len += remain;
}
+#elif defined(HAVE_ARM_NEON_INTRIN_H)
+ /* Use vector instructions if we'll execute at least 1 of them. */
+ if (len >= WT_VECTOR_SIZE) {
+ size_t remain;
+ uint8x16_t res_eq, u, t;
+ remain = len % WT_VECTOR_SIZE;
+ len -= remain;
+ if (WT_ALIGNED_16(userp) && WT_ALIGNED_16(treep))
+ for (; len > 0;
+ len -= WT_VECTOR_SIZE,
+ userp += WT_VECTOR_SIZE, treep += WT_VECTOR_SIZE,
+ *matchp += WT_VECTOR_SIZE) {
+ u = vld1q_u8(userp);
+ t = vld1q_u8(treep);
+ res_eq = vceqq_u8(u, t);
+ if (_mm_movemask_epi8_neon(res_eq) != 65535)
+ break;
+ }
+ len += remain;
+ }
#endif
/*
* Use the non-vectorized version for the remaining bytes and for the
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index d884401feb2..7b932f3ec49 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -123,9 +123,10 @@ extern int __wt_debug_offset_blind(WT_SESSION_IMPL *session, wt_off_t offset, co
extern int __wt_debug_offset(WT_SESSION_IMPL *session, wt_off_t offset, uint32_t size, uint32_t checksum, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_debug_disk(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_debug_tree_shape(WT_SESSION_IMPL *session, WT_PAGE *page, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_tree_all(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_tree(WT_SESSION_IMPL *session, WT_BTREE *btree, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_page(WT_SESSION_IMPL *session, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_tree_all(void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_tree(void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_page(void *session_arg, WT_BTREE *btree, WT_REF *ref, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_cursor_page(void *cursor_arg, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern bool __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -353,7 +354,7 @@ extern void __wt_cursor_set_valuev(WT_CURSOR *cursor, va_list ap);
extern int __wt_cursor_cache(WT_CURSOR *cursor, WT_DATA_HANDLE *dhandle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_cursor_reopen(WT_CURSOR *cursor, WT_DATA_HANDLE *dhandle);
extern int __wt_cursor_cache_release(WT_SESSION_IMPL *session, WT_CURSOR *cursor, bool *released) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_cursor_cache_get(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *to_dup, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_cursor_close(WT_CURSOR *cursor) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_cursor_equals(WT_CURSOR *cursor, WT_CURSOR *other, int *equalp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_cursor_reconfigure(WT_CURSOR *cursor, const char *config) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -602,7 +603,7 @@ extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
extern int __wt_bulk_insert_fix(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_bulk_insert_fix_bitmap(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_alter(WT_SESSION_IMPL *session, const char *newcfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_schema_alter(WT_SESSION_IMPL *session, const char *uri, const char *newcfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_direct_io_size_check(WT_SESSION_IMPL *session, const char **cfg, const char *config_name, uint32_t *allocsizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_colgroup_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, const char *config, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_index_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *idxname, const char *config, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -641,6 +642,7 @@ extern int __wt_schema_backup_check(WT_SESSION_IMPL *session, const char *name)
extern WT_DATA_SOURCE *__wt_schema_get_source(WT_SESSION_IMPL *session, const char *name);
extern int __wt_str_name_check(WT_SESSION_IMPL *session, const char *str) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_name_check(WT_SESSION_IMPL *session, const char *str, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_exclusive_handle_operation(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), const char *cfg[], uint32_t open_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_session_notsup(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_session_reset_cursors(WT_SESSION_IMPL *session, bool free_buffers) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -846,8 +848,9 @@ extern int __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name,
extern int __wt_txn_global_query_timestamp(WT_SESSION_IMPL *session, char *hex_timestamp, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t *ts, WT_CONFIG_ITEM *cval, bool cmp_oldest, bool cmp_stable, bool cmp_commit) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t *ts, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_parse_prepare_timestamp(WT_SESSION_IMPL *session, const char *cfg[], wt_timestamp_t *timestamp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_parse_read_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session);
extern void __wt_txn_clear_commit_timestamp(WT_SESSION_IMPL *session);
diff --git a/src/third_party/wiredtiger/src/include/misc.h b/src/third_party/wiredtiger/src/include/misc.h
index c4d7def85c0..6b30e63d2a3 100644
--- a/src/third_party/wiredtiger/src/include/misc.h
+++ b/src/third_party/wiredtiger/src/include/misc.h
@@ -292,15 +292,16 @@ typedef void wt_timestamp_t;
__wt_scr_alloc_func(session, size, scratchp, __func__, __LINE__)
#define __wt_page_in(session, ref, flags) \
__wt_page_in_func(session, ref, flags, __func__, __LINE__)
-#define __wt_page_swap(session, held, want, flags) \
- __wt_page_swap_func(session, held, want, flags, __func__, __LINE__)
+#define __wt_page_swap(session, held, want, prev_race, flags) \
+ __wt_page_swap_func( \
+ session, held, want, prev_race, flags, __func__, __LINE__)
#else
#define __wt_scr_alloc(session, size, scratchp) \
__wt_scr_alloc_func(session, size, scratchp)
#define __wt_page_in(session, ref, flags) \
__wt_page_in_func(session, ref, flags)
-#define __wt_page_swap(session, held, want, flags) \
- __wt_page_swap_func(session, held, want, flags)
+#define __wt_page_swap(session, held, want, prev_race, flags) \
+ __wt_page_swap_func(session, held, want, prev_race, flags)
#endif
/* Called on unexpected code path: locate the failure. */
diff --git a/src/third_party/wiredtiger/src/include/misc.i b/src/third_party/wiredtiger/src/include/misc.i
index acbbbcaff83..102d4f0cce0 100644
--- a/src/third_party/wiredtiger/src/include/misc.i
+++ b/src/third_party/wiredtiger/src/include/misc.i
@@ -223,3 +223,24 @@ __wt_txn_context_check(WT_SESSION_IMPL *session, bool requires_txn)
session->name);
return (0);
}
+
+/*
+ * __wt_state_yield_sleep --
+ * Sleep while waiting, after a thousand yields.
+ */
+static inline void
+__wt_state_yield_sleep(uint64_t *yield_count, uint64_t *sleep_count)
+{
+ /*
+ * We yield before retrying, and if we've yielded enough times, start
+ * sleeping so we don't burn CPU to no purpose.
+ */
+ if ((*yield_count) < WT_THOUSAND) {
+ (*yield_count)++;
+ __wt_yield();
+ return;
+ }
+
+ (*sleep_count) = WT_MIN((*sleep_count) + 100, WT_THOUSAND);
+ __wt_sleep(0, (*sleep_count));
+}
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index 01a982b8602..616ca59b57e 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -419,6 +419,7 @@ struct __wt_connection_stats {
int64_t cache_eviction_pages_queued_urgent;
int64_t cache_eviction_pages_queued_oldest;
int64_t cache_read;
+ int64_t cache_read_deleted;
int64_t cache_read_lookaside;
int64_t cache_read_lookaside_skipped;
int64_t cache_read_lookaside_delay;
@@ -615,7 +616,6 @@ struct __wt_connection_stats {
int64_t page_sleep;
int64_t page_del_rollback_blocked;
int64_t child_modify_blocked_page;
- int64_t tree_descend_blocked;
int64_t txn_commit_queue_empty;
int64_t txn_commit_queue_tail;
int64_t txn_commit_queue_inserts;
@@ -736,6 +736,7 @@ struct __wt_dsrc_stats {
int64_t cache_eviction_deepen;
int64_t cache_write_lookaside;
int64_t cache_read;
+ int64_t cache_read_deleted;
int64_t cache_read_lookaside;
int64_t cache_pages_requested;
int64_t cache_eviction_pages_seen;
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 9061157ff5a..f077ef164e9 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -251,8 +251,20 @@ static inline bool
__wt_txn_update_needs_timestamp(WT_SESSION_IMPL *session, WT_TXN_OP *op)
{
WT_TXN *txn;
+ wt_timestamp_t *timestamp;
txn = &session->txn;
+
+ /*
+ * The timestamp is in the page deleted structure for truncates, or
+ * in the update for other operations.
+ */
+ if (op->type == WT_TXN_OP_REF_DELETE)
+ timestamp = op->u.ref == NULL || op->u.ref->page_del == NULL ?
+ NULL : &op->u.ref->page_del->timestamp;
+ else
+ timestamp = op->u.upd == NULL ? NULL : &op->u.upd->timestamp;
+
/*
* Updates in the metadata never get timestamps (either now or at
* commit): metadata cannot be read at a point in time, only the most
@@ -260,8 +272,7 @@ __wt_txn_update_needs_timestamp(WT_SESSION_IMPL *session, WT_TXN_OP *op)
*/
return (op->fileid != WT_METAFILE_ID &&
F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
- (op->u.upd == NULL ||
- __wt_timestamp_iszero(&(op->u.upd->timestamp)) ||
+ (timestamp == NULL || __wt_timestamp_iszero(timestamp) ||
F_ISSET(txn, WT_TXN_PREPARE)));
}
#endif
@@ -550,12 +561,13 @@ __wt_txn_visible(
static inline WT_VISIBLE_TYPE
__wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd)
{
- uint8_t upd_state;
+ uint8_t prepare_state, previous_state;
bool upd_visible;
for (;;__wt_yield()) {
- /* Commit is in progress, yield and try again. */
- if ((upd_state = upd->state) == WT_UPDATE_STATE_LOCKED)
+ /* Prepare state change is in progress, yield and try again. */
+ WT_ORDERED_READ(prepare_state, upd->prepare_state);
+ if (prepare_state == WT_PREPARE_LOCKED)
continue;
upd_visible = __wt_txn_visible(
@@ -565,14 +577,17 @@ __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd)
* The visibility check is only valid if the update does not
* change state. If the state does change, recheck visibility.
*/
- if (upd->state == upd_state)
+ previous_state = prepare_state;
+ WT_ORDERED_READ(prepare_state, upd->prepare_state);
+ if (previous_state == prepare_state)
break;
}
if (!upd_visible)
return (WT_VISIBLE_FALSE);
- if (upd_state == WT_UPDATE_STATE_PREPARED)
+ /* Ignore the prepared update, if transaction configuration says so. */
+ if (prepare_state == WT_PREPARE_INPROGRESS)
return (F_ISSET(&session->txn, WT_TXN_IGNORE_PREPARE) ?
WT_VISIBLE_FALSE : WT_VISIBLE_PREPARE);
@@ -620,7 +635,7 @@ __wt_txn_read(WT_SESSION_IMPL *session, WT_UPDATE *upd, WT_UPDATE **updp)
if (upd == NULL && skipped_birthmark)
upd = &tombstone;
- *updp = (upd == NULL || upd->type == WT_UPDATE_BIRTHMARK ? NULL : upd);
+ *updp = upd == NULL || upd->type == WT_UPDATE_BIRTHMARK ? NULL : upd;
return (0);
}
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 1f2a438b8e9..1c3b75ec6ae 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -1139,6 +1139,8 @@ struct __wt_session {
* option leads to an advisory call to an appropriate operating system
* API where available., a string\, chosen from the following options:
* \c "none"\, \c "random"\, \c "sequential"; default \c none.}
+ * @config{app_metadata, application-owned metadata for this object., a
+ * string; default empty.}
* @config{cache_resident, do not ever evict the object's pages from
* cache. Not compatible with LSM tables; see @ref
* tuning_cache_resident for more information., a boolean flag; default
@@ -5102,445 +5104,442 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1104
/*! cache: pages read into cache */
#define WT_STAT_CONN_CACHE_READ 1105
+/*! cache: pages read into cache after truncate */
+#define WT_STAT_CONN_CACHE_READ_DELETED 1106
/*! cache: pages read into cache requiring lookaside entries */
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1106
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1107
/*! cache: pages read into cache skipping older lookaside entries */
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_SKIPPED 1107
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_SKIPPED 1108
/*!
* cache: pages read into cache with skipped lookaside entries needed
* later
*/
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY 1108
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY 1109
/*! cache: pages requested from the cache */
-#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1109
+#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1110
/*! cache: pages seen by eviction walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1110
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1111
/*! cache: pages selected for eviction unable to be evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1111
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1112
/*! cache: pages walked for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK 1112
+#define WT_STAT_CONN_CACHE_EVICTION_WALK 1113
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 1113
+#define WT_STAT_CONN_CACHE_WRITE 1114
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1114
+#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1115
/*! cache: percentage overhead */
-#define WT_STAT_CONN_CACHE_OVERHEAD 1115
+#define WT_STAT_CONN_CACHE_OVERHEAD 1116
/*! cache: tracked bytes belonging to internal pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1116
+#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1117
/*! cache: tracked bytes belonging to leaf pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_LEAF 1117
+#define WT_STAT_CONN_CACHE_BYTES_LEAF 1118
/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1118
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1119
/*! cache: tracked dirty pages in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1119
+#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1120
/*! cache: unmodified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1120
+#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1121
/*! connection: auto adjusting condition resets */
-#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1121
+#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1122
/*! connection: auto adjusting condition wait calls */
-#define WT_STAT_CONN_COND_AUTO_WAIT 1122
+#define WT_STAT_CONN_COND_AUTO_WAIT 1123
/*! connection: detected system time went backwards */
-#define WT_STAT_CONN_TIME_TRAVEL 1123
+#define WT_STAT_CONN_TIME_TRAVEL 1124
/*! connection: files currently open */
-#define WT_STAT_CONN_FILE_OPEN 1124
+#define WT_STAT_CONN_FILE_OPEN 1125
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1125
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1126
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1126
+#define WT_STAT_CONN_MEMORY_FREE 1127
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1127
+#define WT_STAT_CONN_MEMORY_GROW 1128
/*! connection: pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 1128
+#define WT_STAT_CONN_COND_WAIT 1129
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1129
+#define WT_STAT_CONN_RWLOCK_READ 1130
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1130
+#define WT_STAT_CONN_RWLOCK_WRITE 1131
/*! connection: total fsync I/Os */
-#define WT_STAT_CONN_FSYNC_IO 1131
+#define WT_STAT_CONN_FSYNC_IO 1132
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1132
+#define WT_STAT_CONN_READ_IO 1133
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1133
+#define WT_STAT_CONN_WRITE_IO 1134
/*! cursor: cursor create calls */
-#define WT_STAT_CONN_CURSOR_CREATE 1134
+#define WT_STAT_CONN_CURSOR_CREATE 1135
/*! cursor: cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 1135
+#define WT_STAT_CONN_CURSOR_INSERT 1136
/*! cursor: cursor modify calls */
-#define WT_STAT_CONN_CURSOR_MODIFY 1136
+#define WT_STAT_CONN_CURSOR_MODIFY 1137
/*! cursor: cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 1137
+#define WT_STAT_CONN_CURSOR_NEXT 1138
/*! cursor: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1138
+#define WT_STAT_CONN_CURSOR_PREV 1139
/*! cursor: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1139
+#define WT_STAT_CONN_CURSOR_REMOVE 1140
/*! cursor: cursor reserve calls */
-#define WT_STAT_CONN_CURSOR_RESERVE 1140
+#define WT_STAT_CONN_CURSOR_RESERVE 1141
/*! cursor: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1141
+#define WT_STAT_CONN_CURSOR_RESET 1142
/*! cursor: cursor restarted searches */
-#define WT_STAT_CONN_CURSOR_RESTART 1142
+#define WT_STAT_CONN_CURSOR_RESTART 1143
/*! cursor: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1143
+#define WT_STAT_CONN_CURSOR_SEARCH 1144
/*! cursor: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1144
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1145
/*! cursor: cursor sweep buckets */
-#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1145
+#define WT_STAT_CONN_CURSOR_SWEEP_BUCKETS 1146
/*! cursor: cursor sweep cursors closed */
-#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1146
+#define WT_STAT_CONN_CURSOR_SWEEP_CLOSED 1147
/*! cursor: cursor sweep cursors examined */
-#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1147
+#define WT_STAT_CONN_CURSOR_SWEEP_EXAMINED 1148
/*! cursor: cursor sweeps */
-#define WT_STAT_CONN_CURSOR_SWEEP 1148
+#define WT_STAT_CONN_CURSOR_SWEEP 1149
/*! cursor: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1149
+#define WT_STAT_CONN_CURSOR_UPDATE 1150
/*! cursor: cursors cached on close */
-#define WT_STAT_CONN_CURSOR_CACHE 1150
+#define WT_STAT_CONN_CURSOR_CACHE 1151
/*! cursor: cursors reused from cache */
-#define WT_STAT_CONN_CURSOR_REOPEN 1151
+#define WT_STAT_CONN_CURSOR_REOPEN 1152
/*! cursor: truncate calls */
-#define WT_STAT_CONN_CURSOR_TRUNCATE 1152
+#define WT_STAT_CONN_CURSOR_TRUNCATE 1153
/*! data-handle: connection data handles currently active */
-#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1153
+#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1154
/*! data-handle: connection sweep candidate became referenced */
-#define WT_STAT_CONN_DH_SWEEP_REF 1154
+#define WT_STAT_CONN_DH_SWEEP_REF 1155
/*! data-handle: connection sweep dhandles closed */
-#define WT_STAT_CONN_DH_SWEEP_CLOSE 1155
+#define WT_STAT_CONN_DH_SWEEP_CLOSE 1156
/*! data-handle: connection sweep dhandles removed from hash list */
-#define WT_STAT_CONN_DH_SWEEP_REMOVE 1156
+#define WT_STAT_CONN_DH_SWEEP_REMOVE 1157
/*! data-handle: connection sweep time-of-death sets */
-#define WT_STAT_CONN_DH_SWEEP_TOD 1157
+#define WT_STAT_CONN_DH_SWEEP_TOD 1158
/*! data-handle: connection sweeps */
-#define WT_STAT_CONN_DH_SWEEPS 1158
+#define WT_STAT_CONN_DH_SWEEPS 1159
/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1159
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1160
/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1160
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1161
/*! lock: checkpoint lock acquisitions */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1161
+#define WT_STAT_CONN_LOCK_CHECKPOINT_COUNT 1162
/*! lock: checkpoint lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1162
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION 1163
/*! lock: checkpoint lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1163
+#define WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL 1164
/*!
* lock: commit timestamp queue lock application thread time waiting for
* the dhandle lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_APPLICATION 1164
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_APPLICATION 1165
/*!
* lock: commit timestamp queue lock internal thread time waiting for the
* dhandle lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_INTERNAL 1165
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_INTERNAL 1166
/*! lock: commit timestamp queue read lock acquisitions */
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_READ_COUNT 1166
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_READ_COUNT 1167
/*! lock: commit timestamp queue write lock acquisitions */
-#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WRITE_COUNT 1167
+#define WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WRITE_COUNT 1168
/*!
* lock: dhandle lock application thread time waiting for the dhandle
* lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1168
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION 1169
/*!
* lock: dhandle lock internal thread time waiting for the dhandle lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1169
+#define WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL 1170
/*! lock: dhandle read lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1170
+#define WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT 1171
/*! lock: dhandle write lock acquisitions */
-#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1171
+#define WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT 1172
/*! lock: metadata lock acquisitions */
-#define WT_STAT_CONN_LOCK_METADATA_COUNT 1172
+#define WT_STAT_CONN_LOCK_METADATA_COUNT 1173
/*! lock: metadata lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1173
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION 1174
/*! lock: metadata lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1174
+#define WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL 1175
/*!
* lock: read timestamp queue lock application thread time waiting for
* the dhandle lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1175
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION 1176
/*!
* lock: read timestamp queue lock internal thread time waiting for the
* dhandle lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1176
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL 1177
/*! lock: read timestamp queue read lock acquisitions */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1177
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT 1178
/*! lock: read timestamp queue write lock acquisitions */
-#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1178
+#define WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT 1179
/*! lock: schema lock acquisitions */
-#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1179
+#define WT_STAT_CONN_LOCK_SCHEMA_COUNT 1180
/*! lock: schema lock application thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1180
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION 1181
/*! lock: schema lock internal thread wait time (usecs) */
-#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1181
+#define WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL 1182
/*!
* lock: table lock application thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1182
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION 1183
/*!
* lock: table lock internal thread time waiting for the table lock
* (usecs)
*/
-#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1183
+#define WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL 1184
/*! lock: table read lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1184
+#define WT_STAT_CONN_LOCK_TABLE_READ_COUNT 1185
/*! lock: table write lock acquisitions */
-#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1185
+#define WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT 1186
/*!
* lock: txn global lock application thread time waiting for the dhandle
* lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1186
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION 1187
/*!
* lock: txn global lock internal thread time waiting for the dhandle
* lock (usecs)
*/
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1187
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL 1188
/*! lock: txn global read lock acquisitions */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1188
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT 1189
/*! lock: txn global write lock acquisitions */
-#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1189
+#define WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT 1190
/*! log: busy returns attempting to switch slots */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1190
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1191
/*! log: force checkpoint calls slept */
-#define WT_STAT_CONN_LOG_FORCE_CKPT_SLEEP 1191
+#define WT_STAT_CONN_LOG_FORCE_CKPT_SLEEP 1192
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1192
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1193
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1193
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1194
/*! log: log files manually zero-filled */
-#define WT_STAT_CONN_LOG_ZERO_FILLS 1194
+#define WT_STAT_CONN_LOG_ZERO_FILLS 1195
/*! log: log flush operations */
-#define WT_STAT_CONN_LOG_FLUSH 1195
+#define WT_STAT_CONN_LOG_FLUSH 1196
/*! log: log force write operations */
-#define WT_STAT_CONN_LOG_FORCE_WRITE 1196
+#define WT_STAT_CONN_LOG_FORCE_WRITE 1197
/*! log: log force write operations skipped */
-#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1197
+#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1198
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1198
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1199
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1199
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1200
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1200
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1201
/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1201
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1202
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1202
+#define WT_STAT_CONN_LOG_SCANS 1203
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1203
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1204
/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1204
+#define WT_STAT_CONN_LOG_WRITE_LSN 1205
/*! log: log server thread write LSN walk skipped */
-#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1205
+#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1206
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1206
+#define WT_STAT_CONN_LOG_SYNC 1207
/*! log: log sync time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DURATION 1207
+#define WT_STAT_CONN_LOG_SYNC_DURATION 1208
/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1208
+#define WT_STAT_CONN_LOG_SYNC_DIR 1209
/*! log: log sync_dir time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1209
+#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1210
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1210
+#define WT_STAT_CONN_LOG_WRITES 1211
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1211
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1212
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1212
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1213
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1213
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1214
/*! log: pre-allocated log files not ready and missed */
-#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1214
+#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1215
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1215
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1216
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1216
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1217
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1217
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1218
/*! log: slot close lost race */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1218
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_RACE 1219
/*! log: slot close unbuffered waits */
-#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1219
+#define WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF 1220
/*! log: slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1220
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1221
/*! log: slot join atomic update races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1221
+#define WT_STAT_CONN_LOG_SLOT_RACES 1222
/*! log: slot join calls atomic updates raced */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1222
+#define WT_STAT_CONN_LOG_SLOT_YIELD_RACE 1223
/*! log: slot join calls did not yield */
-#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1223
+#define WT_STAT_CONN_LOG_SLOT_IMMEDIATE 1224
/*! log: slot join calls found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1224
+#define WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE 1225
/*! log: slot join calls slept */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1225
+#define WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP 1226
/*! log: slot join calls yielded */
-#define WT_STAT_CONN_LOG_SLOT_YIELD 1226
+#define WT_STAT_CONN_LOG_SLOT_YIELD 1227
/*! log: slot join found active slot closed */
-#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1227
+#define WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED 1228
/*! log: slot joins yield time (usecs) */
-#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1228
+#define WT_STAT_CONN_LOG_SLOT_YIELD_DURATION 1229
/*! log: slot transitions unable to find free slot */
-#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1229
+#define WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS 1230
/*! log: slot unbuffered writes */
-#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1230
+#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1231
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1231
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1232
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1232
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1233
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1233
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1234
/*! log: written slots coalesced */
-#define WT_STAT_CONN_LOG_SLOT_COALESCED 1234
+#define WT_STAT_CONN_LOG_SLOT_COALESCED 1235
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1235
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1236
/*! perf: file system read latency histogram (bucket 1) - 10-49ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1236
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50 1237
/*! perf: file system read latency histogram (bucket 2) - 50-99ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1237
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100 1238
/*! perf: file system read latency histogram (bucket 3) - 100-249ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1238
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250 1239
/*! perf: file system read latency histogram (bucket 4) - 250-499ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1239
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500 1240
/*! perf: file system read latency histogram (bucket 5) - 500-999ms */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1240
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000 1241
/*! perf: file system read latency histogram (bucket 6) - 1000ms+ */
-#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1241
+#define WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000 1242
/*! perf: file system write latency histogram (bucket 1) - 10-49ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1242
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50 1243
/*! perf: file system write latency histogram (bucket 2) - 50-99ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1243
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100 1244
/*! perf: file system write latency histogram (bucket 3) - 100-249ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1244
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250 1245
/*! perf: file system write latency histogram (bucket 4) - 250-499ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1245
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500 1246
/*! perf: file system write latency histogram (bucket 5) - 500-999ms */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1246
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000 1247
/*! perf: file system write latency histogram (bucket 6) - 1000ms+ */
-#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1247
+#define WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000 1248
/*! perf: operation read latency histogram (bucket 1) - 100-249us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1248
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250 1249
/*! perf: operation read latency histogram (bucket 2) - 250-499us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1249
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500 1250
/*! perf: operation read latency histogram (bucket 3) - 500-999us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1250
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000 1251
/*! perf: operation read latency histogram (bucket 4) - 1000-9999us */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1251
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000 1252
/*! perf: operation read latency histogram (bucket 5) - 10000us+ */
-#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1252
+#define WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000 1253
/*! perf: operation write latency histogram (bucket 1) - 100-249us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1253
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250 1254
/*! perf: operation write latency histogram (bucket 2) - 250-499us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1254
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500 1255
/*! perf: operation write latency histogram (bucket 3) - 500-999us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1255
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000 1256
/*! perf: operation write latency histogram (bucket 4) - 1000-9999us */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1256
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000 1257
/*! perf: operation write latency histogram (bucket 5) - 10000us+ */
-#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1257
+#define WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000 1258
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1258
+#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1259
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1259
+#define WT_STAT_CONN_REC_PAGES 1260
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1260
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1261
/*! reconciliation: pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE 1261
+#define WT_STAT_CONN_REC_PAGE_DELETE 1262
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1262
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1263
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1263
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1264
/*! session: open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1264
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1265
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1265
+#define WT_STAT_CONN_SESSION_OPEN 1266
/*! session: table alter failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1266
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL 1267
/*! session: table alter successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1267
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS 1268
/*! session: table alter unchanged and skipped */
-#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1268
+#define WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP 1269
/*! session: table compact failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1269
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1270
/*! session: table compact successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1270
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1271
/*! session: table create failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1271
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1272
/*! session: table create successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1272
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1273
/*! session: table drop failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1273
+#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1274
/*! session: table drop successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1274
+#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1275
/*! session: table rebalance failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1275
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1276
/*! session: table rebalance successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1276
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1277
/*! session: table rename failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1277
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1278
/*! session: table rename successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1278
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1279
/*! session: table salvage failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1279
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1280
/*! session: table salvage successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1280
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1281
/*! session: table truncate failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1281
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1282
/*! session: table truncate successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1282
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1283
/*! session: table verify failed calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1283
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1284
/*! session: table verify successful calls */
-#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1284
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1285
/*! thread-state: active filesystem fsync calls */
-#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1285
+#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1286
/*! thread-state: active filesystem read calls */
-#define WT_STAT_CONN_THREAD_READ_ACTIVE 1286
+#define WT_STAT_CONN_THREAD_READ_ACTIVE 1287
/*! thread-state: active filesystem write calls */
-#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1287
+#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1288
/*! thread-yield: application thread time evicting (usecs) */
-#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1288
+#define WT_STAT_CONN_APPLICATION_EVICT_TIME 1289
/*! thread-yield: application thread time waiting for cache (usecs) */
-#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1289
+#define WT_STAT_CONN_APPLICATION_CACHE_TIME 1290
/*!
* thread-yield: connection close blocked waiting for transaction state
* stabilization
*/
-#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1290
+#define WT_STAT_CONN_TXN_RELEASE_BLOCKED 1291
/*! thread-yield: connection close yielded for lsm manager shutdown */
-#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1291
+#define WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM 1292
/*! thread-yield: data handle lock yielded */
-#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1292
+#define WT_STAT_CONN_DHANDLE_LOCK_BLOCKED 1293
/*!
* thread-yield: get reference for page index and slot time sleeping
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1293
+#define WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED 1294
/*! thread-yield: log server sync yielded for log write */
-#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1294
+#define WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED 1295
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1295
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1296
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1296
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1297
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1297
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1298
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1298
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1299
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1299
+#define WT_STAT_CONN_PAGE_SLEEP 1300
/*!
* thread-yield: page delete rollback time sleeping for state change
* (usecs)
*/
-#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1300
+#define WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED 1301
/*! thread-yield: page reconciliation yielded due to child modification */
-#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1301
-/*!
- * thread-yield: tree descend one level yielded for split page index
- * update
- */
-#define WT_STAT_CONN_TREE_DESCEND_BLOCKED 1302
+#define WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE 1302
/*! transaction: commit timestamp queue insert to empty */
#define WT_STAT_CONN_TXN_COMMIT_QUEUE_EMPTY 1303
/*! transaction: commit timestamp queue inserts to tail */
@@ -5825,220 +5824,222 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2067
/*! cache: pages read into cache */
#define WT_STAT_DSRC_CACHE_READ 2068
+/*! cache: pages read into cache after truncate */
+#define WT_STAT_DSRC_CACHE_READ_DELETED 2069
/*! cache: pages read into cache requiring lookaside entries */
-#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2069
+#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2070
/*! cache: pages requested from the cache */
-#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2070
+#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2071
/*! cache: pages seen by eviction walk */
-#define WT_STAT_DSRC_CACHE_EVICTION_PAGES_SEEN 2071
+#define WT_STAT_DSRC_CACHE_EVICTION_PAGES_SEEN 2072
/*! cache: pages written from cache */
-#define WT_STAT_DSRC_CACHE_WRITE 2072
+#define WT_STAT_DSRC_CACHE_WRITE 2073
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2073
+#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2074
/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_DSRC_CACHE_BYTES_DIRTY 2074
+#define WT_STAT_DSRC_CACHE_BYTES_DIRTY 2075
/*! cache: unmodified pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2075
+#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2076
/*!
* cache_walk: Average difference between current eviction generation
* when the page was last considered, only reported if cache_walk or all
* statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_GEN_AVG_GAP 2076
+#define WT_STAT_DSRC_CACHE_STATE_GEN_AVG_GAP 2077
/*!
* cache_walk: Average on-disk page image size seen, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_AVG_WRITTEN_SIZE 2077
+#define WT_STAT_DSRC_CACHE_STATE_AVG_WRITTEN_SIZE 2078
/*!
* cache_walk: Average time in cache for pages that have been visited by
* the eviction server, only reported if cache_walk or all statistics are
* enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_AVG_VISITED_AGE 2078
+#define WT_STAT_DSRC_CACHE_STATE_AVG_VISITED_AGE 2079
/*!
* cache_walk: Average time in cache for pages that have not been visited
* by the eviction server, only reported if cache_walk or all statistics
* are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_AVG_UNVISITED_AGE 2079
+#define WT_STAT_DSRC_CACHE_STATE_AVG_UNVISITED_AGE 2080
/*!
* cache_walk: Clean pages currently in cache, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_PAGES_CLEAN 2080
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_CLEAN 2081
/*!
* cache_walk: Current eviction generation, only reported if cache_walk
* or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_GEN_CURRENT 2081
+#define WT_STAT_DSRC_CACHE_STATE_GEN_CURRENT 2082
/*!
* cache_walk: Dirty pages currently in cache, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_PAGES_DIRTY 2082
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_DIRTY 2083
/*!
* cache_walk: Entries in the root page, only reported if cache_walk or
* all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_ROOT_ENTRIES 2083
+#define WT_STAT_DSRC_CACHE_STATE_ROOT_ENTRIES 2084
/*!
* cache_walk: Internal pages currently in cache, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_PAGES_INTERNAL 2084
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_INTERNAL 2085
/*!
* cache_walk: Leaf pages currently in cache, only reported if cache_walk
* or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_PAGES_LEAF 2085
+#define WT_STAT_DSRC_CACHE_STATE_PAGES_LEAF 2086
/*!
* cache_walk: Maximum difference between current eviction generation
* when the page was last considered, only reported if cache_walk or all
* statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_GEN_MAX_GAP 2086
+#define WT_STAT_DSRC_CACHE_STATE_GEN_MAX_GAP 2087
/*!
* cache_walk: Maximum page size seen, only reported if cache_walk or all
* statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_MAX_PAGESIZE 2087
+#define WT_STAT_DSRC_CACHE_STATE_MAX_PAGESIZE 2088
/*!
* cache_walk: Minimum on-disk page image size seen, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_MIN_WRITTEN_SIZE 2088
+#define WT_STAT_DSRC_CACHE_STATE_MIN_WRITTEN_SIZE 2089
/*!
* cache_walk: Number of pages never visited by eviction server, only
* reported if cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_UNVISITED_COUNT 2089
+#define WT_STAT_DSRC_CACHE_STATE_UNVISITED_COUNT 2090
/*!
* cache_walk: On-disk page image sizes smaller than a single allocation
* unit, only reported if cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_SMALLER_ALLOC_SIZE 2090
+#define WT_STAT_DSRC_CACHE_STATE_SMALLER_ALLOC_SIZE 2091
/*!
* cache_walk: Pages created in memory and never written, only reported
* if cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_MEMORY 2091
+#define WT_STAT_DSRC_CACHE_STATE_MEMORY 2092
/*!
* cache_walk: Pages currently queued for eviction, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_QUEUED 2092
+#define WT_STAT_DSRC_CACHE_STATE_QUEUED 2093
/*!
* cache_walk: Pages that could not be queued for eviction, only reported
* if cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_NOT_QUEUEABLE 2093
+#define WT_STAT_DSRC_CACHE_STATE_NOT_QUEUEABLE 2094
/*!
* cache_walk: Refs skipped during cache traversal, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_REFS_SKIPPED 2094
+#define WT_STAT_DSRC_CACHE_STATE_REFS_SKIPPED 2095
/*!
* cache_walk: Size of the root page, only reported if cache_walk or all
* statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_ROOT_SIZE 2095
+#define WT_STAT_DSRC_CACHE_STATE_ROOT_SIZE 2096
/*!
* cache_walk: Total number of pages currently in cache, only reported if
* cache_walk or all statistics are enabled
*/
-#define WT_STAT_DSRC_CACHE_STATE_PAGES 2096
+#define WT_STAT_DSRC_CACHE_STATE_PAGES 2097
/*! compression: compressed pages read */
-#define WT_STAT_DSRC_COMPRESS_READ 2097
+#define WT_STAT_DSRC_COMPRESS_READ 2098
/*! compression: compressed pages written */
-#define WT_STAT_DSRC_COMPRESS_WRITE 2098
+#define WT_STAT_DSRC_COMPRESS_WRITE 2099
/*! compression: page written failed to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2099
+#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2100
/*! compression: page written was too small to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2100
+#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2101
/*! compression: raw compression call failed, additional data available */
-#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2101
+#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2102
/*! compression: raw compression call failed, no additional data available */
-#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2102
+#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2103
/*! compression: raw compression call succeeded */
-#define WT_STAT_DSRC_COMPRESS_RAW_OK 2103
+#define WT_STAT_DSRC_COMPRESS_RAW_OK 2104
/*! cursor: bulk-loaded cursor-insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2104
+#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2105
/*! cursor: create calls */
-#define WT_STAT_DSRC_CURSOR_CREATE 2105
+#define WT_STAT_DSRC_CURSOR_CREATE 2106
/*! cursor: cursor-insert key and value bytes inserted */
-#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2106
+#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2107
/*! cursor: cursor-remove key bytes removed */
-#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2107
+#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2108
/*! cursor: cursor-update value bytes updated */
-#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2108
+#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2109
/*! cursor: cursors cached on close */
-#define WT_STAT_DSRC_CURSOR_CACHE 2109
+#define WT_STAT_DSRC_CURSOR_CACHE 2110
/*! cursor: cursors reused from cache */
-#define WT_STAT_DSRC_CURSOR_REOPEN 2110
+#define WT_STAT_DSRC_CURSOR_REOPEN 2111
/*! cursor: insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT 2111
+#define WT_STAT_DSRC_CURSOR_INSERT 2112
/*! cursor: modify calls */
-#define WT_STAT_DSRC_CURSOR_MODIFY 2112
+#define WT_STAT_DSRC_CURSOR_MODIFY 2113
/*! cursor: next calls */
-#define WT_STAT_DSRC_CURSOR_NEXT 2113
+#define WT_STAT_DSRC_CURSOR_NEXT 2114
/*! cursor: prev calls */
-#define WT_STAT_DSRC_CURSOR_PREV 2114
+#define WT_STAT_DSRC_CURSOR_PREV 2115
/*! cursor: remove calls */
-#define WT_STAT_DSRC_CURSOR_REMOVE 2115
+#define WT_STAT_DSRC_CURSOR_REMOVE 2116
/*! cursor: reserve calls */
-#define WT_STAT_DSRC_CURSOR_RESERVE 2116
+#define WT_STAT_DSRC_CURSOR_RESERVE 2117
/*! cursor: reset calls */
-#define WT_STAT_DSRC_CURSOR_RESET 2117
+#define WT_STAT_DSRC_CURSOR_RESET 2118
/*! cursor: restarted searches */
-#define WT_STAT_DSRC_CURSOR_RESTART 2118
+#define WT_STAT_DSRC_CURSOR_RESTART 2119
/*! cursor: search calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH 2119
+#define WT_STAT_DSRC_CURSOR_SEARCH 2120
/*! cursor: search near calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2120
+#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2121
/*! cursor: truncate calls */
-#define WT_STAT_DSRC_CURSOR_TRUNCATE 2121
+#define WT_STAT_DSRC_CURSOR_TRUNCATE 2122
/*! cursor: update calls */
-#define WT_STAT_DSRC_CURSOR_UPDATE 2122
+#define WT_STAT_DSRC_CURSOR_UPDATE 2123
/*! reconciliation: dictionary matches */
-#define WT_STAT_DSRC_REC_DICTIONARY 2123
+#define WT_STAT_DSRC_REC_DICTIONARY 2124
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2124
+#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2125
/*!
* reconciliation: internal page key bytes discarded using suffix
* compression
*/
-#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2125
+#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2126
/*! reconciliation: internal page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2126
+#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2127
/*! reconciliation: internal-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2127
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2128
/*! reconciliation: leaf page key bytes discarded using prefix compression */
-#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2128
+#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2129
/*! reconciliation: leaf page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2129
+#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2130
/*! reconciliation: leaf-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2130
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2131
/*! reconciliation: maximum blocks required for a page */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2131
+#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2132
/*! reconciliation: overflow values written */
-#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2132
+#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2133
/*! reconciliation: page checksum matches */
-#define WT_STAT_DSRC_REC_PAGE_MATCH 2133
+#define WT_STAT_DSRC_REC_PAGE_MATCH 2134
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_DSRC_REC_PAGES 2134
+#define WT_STAT_DSRC_REC_PAGES 2135
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_DSRC_REC_PAGES_EVICTION 2135
+#define WT_STAT_DSRC_REC_PAGES_EVICTION 2136
/*! reconciliation: pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE 2136
+#define WT_STAT_DSRC_REC_PAGE_DELETE 2137
/*! session: cached cursor count */
-#define WT_STAT_DSRC_SESSION_CURSOR_CACHED 2137
+#define WT_STAT_DSRC_SESSION_CURSOR_CACHED 2138
/*! session: object compaction */
-#define WT_STAT_DSRC_SESSION_COMPACT 2138
+#define WT_STAT_DSRC_SESSION_COMPACT 2139
/*! session: open cursor count */
-#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2139
+#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2140
/*! transaction: update conflicts */
-#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2140
+#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2141
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/os_posix/os_fs.c b/src/third_party/wiredtiger/src/os_posix/os_fs.c
index 811c0576eef..7875a6be028 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_fs.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_fs.c
@@ -39,6 +39,8 @@ __posix_sync(
WT_DECL_RET;
#if defined(F_FULLFSYNC)
+ static bool fullfsync_error_logged = false;
+
/*
* OS X fsync documentation:
* "Note that while fsync() will flush all data from the host to the
@@ -56,10 +58,16 @@ __posix_sync(
WT_SYSCALL_RETRY(fcntl(fd, F_FULLFSYNC, 0) == -1 ? -1 : 0, ret);
if (ret == 0)
return (0);
+
/*
* Assume F_FULLFSYNC failed because the file system doesn't support it
* and fallback to fsync.
*/
+ if (!fullfsync_error_logged) {
+ fullfsync_error_logged = true;
+ __wt_err(session, ret,
+ "fcntl(F_FULLFSYNC) failed, falling back to fsync");
+ }
#endif
#if defined(HAVE_FDATASYNC)
WT_SYSCALL_RETRY(fdatasync(fd), ret);
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 1c46da9be10..8bc022cd3e3 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -1345,7 +1345,8 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
* globally visible, need to check the update state as well.
*/
if (F_ISSET(r, WT_REC_EVICT) &&
- (upd->state != WT_UPDATE_STATE_READY ||
+ (upd->prepare_state == WT_PREPARE_LOCKED ||
+ upd->prepare_state == WT_PREPARE_INPROGRESS ||
(F_ISSET(r, WT_REC_VISIBLE_ALL) ?
WT_TXNID_LE(r->last_running, txnid) :
!__txn_visible_id(session, txnid)))) {
@@ -1631,10 +1632,12 @@ __rec_child_deleted(WT_SESSION_IMPL *session,
* it holds the transaction ID we care about.
*
* In some cases, there had better not be any updates we can't see.
+ *
+ * A visible update to be in READY state (i.e. not in LOCKED or
+ * PREPARED state), for truly visible to others.
*/
if (F_ISSET(r, WT_REC_VISIBILITY_ERR) && page_del != NULL &&
- !__wt_txn_visible(session,
- page_del->txnid, WT_TIMESTAMP_NULL(&page_del->timestamp)))
+ __wt_page_del_active(session, ref, false))
WT_PANIC_RET(session, EINVAL,
"reconciliation illegally skipped an update");
@@ -1662,9 +1665,7 @@ __rec_child_deleted(WT_SESSION_IMPL *session,
* read into this part of the name space again, the cache read function
* instantiates an entirely new page.)
*/
- if (ref->addr != NULL &&
- (page_del == NULL || __wt_txn_visible_all(
- session, page_del->txnid, WT_TIMESTAMP_NULL(&page_del->timestamp))))
+ if (ref->addr != NULL && !__wt_page_del_active(session, ref, true))
WT_RET(__wt_ref_block_free(session, ref));
/*
@@ -1709,10 +1710,11 @@ __rec_child_deleted(WT_SESSION_IMPL *session,
* page to reference it from the parent page.
*
* If the delete is not visible in this checkpoint, write the original
- * address normally. Otherwise, we have to write a proxy record.
+ * address normally. Otherwise, we have to write a proxy record.
+ * If the delete state is not ready, then delete is not visible as it
+ * is in prepared state.
*/
- if (__wt_txn_visible(
- session, page_del->txnid, WT_TIMESTAMP_NULL(&page_del->timestamp)))
+ if (!__wt_page_del_active(session, ref, false))
*statep = WT_CHILD_PROXY;
return (0);
@@ -1838,6 +1840,11 @@ __rec_child_modify(WT_SESSION_IMPL *session,
*
* This call cannot return split/restart, we have a lock
* on the parent which prevents a child page split.
+ *
+ * Set WT_READ_NO_WAIT because we're only interested in
+ * the WT_REF's final state. Pages in transition might
+ * change WT_REF state during our read, and then return
+ * WT_NOTFOUND to us. In that case, loop and look again.
*/
ret = __wt_page_in(session, ref,
WT_READ_CACHE | WT_READ_NO_EVICT |
diff --git a/src/third_party/wiredtiger/src/schema/schema_alter.c b/src/third_party/wiredtiger/src/schema/schema_alter.c
index a957969e6cc..2ebfcfc5d9d 100644
--- a/src/third_party/wiredtiger/src/schema/schema_alter.c
+++ b/src/third_party/wiredtiger/src/schema/schema_alter.c
@@ -7,41 +7,33 @@
*/
#include "wt_internal.h"
+static int __schema_alter(WT_SESSION_IMPL *, const char *, const char *[]);
/*
- * __wt_alter --
- * Alter a file.
+ * __alter_apply --
+ * Alter an object
*/
-int
-__wt_alter(WT_SESSION_IMPL *session, const char *newcfg[])
+static int
+__alter_apply(WT_SESSION_IMPL *session,
+ const char *uri, const char *newcfg[], const char *base_config)
{
WT_DECL_RET;
- const char *cfg[4], *filename, *uri;
+ const char *cfg[4];
char *config, *newconfig;
- uri = session->dhandle->name;
- WT_RET(__wt_meta_track_on(session));
-
- /*
- * We know that we have exclusive access to the file. So it will be
- * closed after we're done with it and the next open will see the
- * updated metadata.
- */
- filename = uri;
newconfig = NULL;
- if (!WT_PREFIX_SKIP(filename, "file:"))
- return (__wt_unexpected_object_type(session, uri, "file:"));
/* Find the URI */
WT_RET(__wt_metadata_search(session, uri, &config));
WT_ASSERT(session, newcfg[0] != NULL);
+
/*
* Start with the base configuration because collapse is like
* a projection and if we are reading older metadata, it may not
* have all the components.
*/
- cfg[0] = WT_CONFIG_BASE(session, file_meta);
+ cfg[0] = base_config;
cfg[1] = config;
cfg[2] = newcfg[0];
cfg[3] = NULL;
@@ -63,7 +55,176 @@ err: __wt_free(session, config);
if (ret == WT_NOTFOUND)
ret = ENOENT;
- WT_TRET(__wt_meta_track_off(session, true, ret != 0));
+ return (ret);
+}
+
+/*
+ * __alter_file --
+ * Alter a file.
+ */
+static int
+__alter_file(WT_SESSION_IMPL *session, const char *newcfg[])
+{
+ const char *uri;
+
+ /*
+ * We know that we have exclusive access to the file. So it will be
+ * closed after we're done with it and the next open will see the
+ * updated metadata.
+ */
+ uri = session->dhandle->name;
+ if (!WT_PREFIX_MATCH(uri, "file:"))
+ return (__wt_unexpected_object_type(session, uri, "file:"));
+
+ return (__alter_apply(session,
+ uri, newcfg, WT_CONFIG_BASE(session, file_meta)));
+}
+
+/*
+ * __alter_tree --
+ * Alter an index or colgroup reference.
+ */
+static int
+__alter_tree(WT_SESSION_IMPL *session, const char *name, const char *newcfg[])
+{
+ WT_CONFIG_ITEM cval;
+ WT_DECL_ITEM(data_source);
+ WT_DECL_RET;
+ char *value;
+ bool is_colgroup;
+
+ value = NULL;
+
+ is_colgroup = WT_PREFIX_MATCH(name, "colgroup:");
+ if (!is_colgroup && !WT_PREFIX_MATCH(name, "index:"))
+ return (__wt_unexpected_object_type(
+ session, name, "'colgroup:' or 'index:'"));
+
+ /* Read the schema value. */
+ WT_ERR(__wt_metadata_search(session, name, &value));
+
+ /* Get the data source URI. */
+ if ((ret = __wt_config_getones(session, value, "source", &cval)) != 0)
+ WT_ERR_MSG(session, EINVAL,
+ "index or column group has no data source: %s", value);
+ WT_ERR(__wt_scr_alloc(session, 0, &data_source));
+ WT_ERR(__wt_buf_fmt(session,
+ data_source, "%.*s", (int)cval.len, cval.str));
+
+ /* Alter the data source */
+ WT_ERR(__schema_alter(session, data_source->data, newcfg));
+
+ /* Alter the index or colgroup */
+ if (is_colgroup)
+ WT_ERR(__alter_apply(session,
+ name, newcfg, WT_CONFIG_BASE(session, colgroup_meta)));
+ else
+ WT_ERR(__alter_apply(session,
+ name, newcfg, WT_CONFIG_BASE(session, index_meta)));
+
+err: __wt_scr_free(session, &data_source);
+ __wt_free(session, value);
+ return (ret);
+}
+
+/*
+ * __alter_table --
+ * Alter a table.
+ */
+static int
+__alter_table(WT_SESSION_IMPL *session, const char *uri, const char *newcfg[])
+{
+ WT_COLGROUP *colgroup;
+ WT_DECL_RET;
+ WT_INDEX *idx;
+ WT_TABLE *table;
+ u_int i;
+ const char *name;
+
+ colgroup = NULL;
+ table = NULL;
+ name = uri;
+ WT_PREFIX_SKIP_REQUIRED(session, name, "table:");
+
+ /*
+ * Open the table so we can alter its column groups and indexes, keeping
+ * the table locked exclusive across the alter.
+ */
+ WT_RET(__wt_schema_get_table_uri(session, uri, true,
+ WT_DHANDLE_EXCLUSIVE, &table));
+ /* Meta tracking needs to be used because alter needs to be atomic. */
+ WT_ASSERT(session, WT_META_TRACKING(session));
+ WT_WITH_DHANDLE(session, &table->iface,
+ ret = __wt_meta_track_handle_lock(session, false));
+ WT_RET(ret);
+
+ /* Alter the column groups. */
+ for (i = 0; i < WT_COLGROUPS(table); i++) {
+ if ((colgroup = table->cgroups[i]) == NULL)
+ continue;
+ WT_RET(__alter_tree(session, colgroup->name, newcfg));
+ }
+
+ /* Alter the indices. */
+ WT_RET(__wt_schema_open_indices(session, table));
+ for (i = 0; i < table->nindices; i++) {
+ if ((idx = table->indices[i]) == NULL)
+ continue;
+ WT_RET(__alter_tree(session, idx->name, newcfg));
+ }
+
+ /* Alter the table */
+ WT_RET(__alter_apply(session,
+ uri, newcfg, WT_CONFIG_BASE(session, table_meta)));
+
+ return (ret);
+}
+
+/*
+ * __schema_alter --
+ * Alter an object.
+ */
+static int
+__schema_alter(WT_SESSION_IMPL *session, const char *uri, const char *newcfg[])
+{
+ uint32_t flags;
+
+ /*
+ * The alter flag is used so LSM can apply some special logic, the
+ * exclusive flag avoids conflicts with other operations and the lock
+ * only flag is required because we don't need to have a handle to
+ * update the metadata and opening the handle causes problems when
+ * meta tracking is enabled.
+ */
+ flags = WT_BTREE_ALTER | WT_DHANDLE_EXCLUSIVE | WT_DHANDLE_LOCK_ONLY;
+ if (WT_PREFIX_MATCH(uri, "file:"))
+ return (__wt_exclusive_handle_operation(
+ session, uri, __alter_file, newcfg, flags));
+ if (WT_PREFIX_MATCH(uri, "colgroup:") ||
+ WT_PREFIX_MATCH(uri, "index:"))
+ return (__alter_tree(session, uri, newcfg));
+ if (WT_PREFIX_MATCH(uri, "lsm:"))
+ return (__wt_lsm_tree_worker(session, uri, __alter_file,
+ NULL, newcfg, flags));
+ if (WT_PREFIX_MATCH(uri, "table:"))
+ return (__alter_table(session, uri, newcfg));
+
+ return (__wt_bad_object_type(session, uri));
+}
+
+/*
+ * __wt_schema_alter --
+ * Alter an object.
+ */
+int
+__wt_schema_alter(WT_SESSION_IMPL *session,
+ const char *uri, const char *newcfg[])
+{
+ WT_DECL_RET;
+
+ WT_RET(__wt_meta_track_on(session));
+ ret = __schema_alter(session, uri, newcfg);
+ WT_TRET(__wt_meta_track_off(session, true, ret != 0));
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_worker.c b/src/third_party/wiredtiger/src/schema/schema_worker.c
index 407550bfdba..aa38ad79bee 100644
--- a/src/third_party/wiredtiger/src/schema/schema_worker.c
+++ b/src/third_party/wiredtiger/src/schema/schema_worker.c
@@ -9,6 +9,36 @@
#include "wt_internal.h"
/*
+ * __wt_exclusive_handle_operation --
+ * Get exclusive access to a file and apply a function.
+ */
+int
+__wt_exclusive_handle_operation(WT_SESSION_IMPL *session,
+ const char *uri,
+ int (*file_func)(WT_SESSION_IMPL *, const char *[]),
+ const char *cfg[], uint32_t open_flags)
+{
+ WT_DECL_RET;
+
+ /*
+ * If the operation requires exclusive access, close
+ * any open file handles, including checkpoints.
+ */
+ if (FLD_ISSET(open_flags, WT_DHANDLE_EXCLUSIVE)) {
+ WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
+ ret = __wt_conn_dhandle_close_all(
+ session, uri, false, false));
+ WT_RET(ret);
+ }
+
+ WT_RET(__wt_session_get_btree_ckpt(session, uri, cfg, open_flags));
+ WT_SAVE_DHANDLE(session, ret = file_func(session, cfg));
+ WT_TRET(__wt_session_release_dhandle(session));
+
+ return (ret);
+}
+
+/*
* __wt_schema_worker --
* Get Btree handles for the object and cycle through calls to an
* underlying worker function with each handle.
@@ -41,25 +71,9 @@ __wt_schema_worker(WT_SESSION_IMPL *session,
/* Get the btree handle(s) and call the underlying function. */
if (WT_PREFIX_MATCH(uri, "file:")) {
- if (file_func != NULL) {
- /*
- * If the operation requires exclusive access, close
- * any open file handles, including checkpoints.
- */
- if (FLD_ISSET(open_flags, WT_DHANDLE_EXCLUSIVE)) {
- WT_WITH_HANDLE_LIST_WRITE_LOCK(session,
- ret = __wt_conn_dhandle_close_all(
- session, uri, false, false));
- WT_ERR(ret);
- }
-
- WT_ERR(__wt_session_get_btree_ckpt(
- session, uri, cfg, open_flags));
- WT_SAVE_DHANDLE(session,
- ret = file_func(session, cfg));
- WT_TRET(__wt_session_release_dhandle(session));
- WT_ERR(ret);
- }
+ if (file_func != NULL)
+ WT_ERR(__wt_exclusive_handle_operation(session,
+ uri, file_func, cfg, open_flags));
} else if (WT_PREFIX_MATCH(uri, "colgroup:")) {
WT_ERR(__wt_schema_get_colgroup(
session, uri, false, NULL, &colgroup));
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index 8db4c5a7615..cd06073a120 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -578,7 +578,7 @@ __wt_open_cursor(WT_SESSION_IMPL *session,
/* We do not cache any subordinate tables/files cursors. */
if (owner == NULL) {
if ((ret = __wt_cursor_cache_get(
- session, uri, cfg, cursorp)) == 0)
+ session, uri, NULL, cfg, cursorp)) == 0)
return (0);
WT_RET_NOTFOUND_OK(ret);
}
@@ -605,35 +605,37 @@ __session_open_cursor(WT_SESSION *wt_session,
session = (WT_SESSION_IMPL *)wt_session;
SESSION_API_CALL(session, open_cursor, config, cfg);
- if (to_dup == NULL) {
+ statjoin = (to_dup != NULL && uri != NULL &&
+ WT_STREQ(uri, "statistics:join"));
+ if (!statjoin) {
+ if ((to_dup == NULL && uri == NULL) ||
+ (to_dup != NULL && uri != NULL))
+ WT_ERR_MSG(session, EINVAL,
+ "should be passed either a URI or a cursor to "
+ "duplicate, but not both");
+
if ((ret = __wt_cursor_cache_get(
- session, uri, cfg, cursorp)) == 0)
+ session, uri, to_dup, cfg, &cursor)) == 0)
goto done;
- WT_RET_NOTFOUND_OK(ret);
- }
+ WT_ERR_NOTFOUND_OK(ret);
- statjoin = (to_dup != NULL && uri != NULL &&
- WT_STREQ(uri, "statistics:join"));
- if ((to_dup == NULL && uri == NULL) ||
- (to_dup != NULL && uri != NULL && !statjoin))
- WT_ERR_MSG(session, EINVAL,
- "should be passed either a URI or a cursor to duplicate, "
- "but not both");
-
- if (to_dup != NULL && !statjoin) {
- uri = to_dup->uri;
- if (!WT_PREFIX_MATCH(uri, "colgroup:") &&
- !WT_PREFIX_MATCH(uri, "index:") &&
- !WT_PREFIX_MATCH(uri, "file:") &&
- !WT_PREFIX_MATCH(uri, "lsm:") &&
- !WT_PREFIX_MATCH(uri, WT_METADATA_URI) &&
- !WT_PREFIX_MATCH(uri, "table:") &&
- __wt_schema_get_source(session, uri) == NULL)
- WT_ERR(__wt_bad_object_type(session, uri));
+ if (to_dup != NULL) {
+ uri = to_dup->uri;
+ if (!WT_PREFIX_MATCH(uri, "colgroup:") &&
+ !WT_PREFIX_MATCH(uri, "index:") &&
+ !WT_PREFIX_MATCH(uri, "file:") &&
+ !WT_PREFIX_MATCH(uri, "lsm:") &&
+ !WT_PREFIX_MATCH(uri, WT_METADATA_URI) &&
+ !WT_PREFIX_MATCH(uri, "table:") &&
+ __wt_schema_get_source(session, uri) == NULL)
+ WT_ERR(__wt_bad_object_type(session, uri));
+ }
}
WT_ERR(__session_open_cursor_int(session, uri, NULL,
statjoin ? to_dup : NULL, cfg, &cursor));
+
+done:
if (to_dup != NULL && !statjoin)
WT_ERR(__wt_cursor_dup_position(to_dup, cursor));
@@ -643,7 +645,6 @@ __session_open_cursor(WT_SESSION *wt_session,
err: if (cursor != NULL)
WT_TRET(cursor->close(cursor));
}
-done:
/*
* Opening a cursor on a non-existent data source will set ret to
* either of ENOENT or WT_NOTFOUND at this point. However,
@@ -687,8 +688,7 @@ __session_alter(WT_SESSION *wt_session, const char *uri, const char *config)
cfg[1] = NULL;
WT_WITH_CHECKPOINT_LOCK(session,
WT_WITH_SCHEMA_LOCK(session,
- ret = __wt_schema_worker(session, uri, __wt_alter, NULL, cfg,
- WT_BTREE_ALTER | WT_DHANDLE_EXCLUSIVE)));
+ ret = __wt_schema_alter(session, uri, cfg)));
err:
if (ret != 0)
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index ae13f7d8abe..c418591d294 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -72,6 +72,7 @@ static const char * const __stats_dsrc_desc[] = {
"cache: page split during eviction deepened the tree",
"cache: page written requiring lookaside records",
"cache: pages read into cache",
+ "cache: pages read into cache after truncate",
"cache: pages read into cache requiring lookaside entries",
"cache: pages requested from the cache",
"cache: pages seen by eviction walk",
@@ -255,6 +256,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->cache_eviction_deepen = 0;
stats->cache_write_lookaside = 0;
stats->cache_read = 0;
+ stats->cache_read_deleted = 0;
stats->cache_read_lookaside = 0;
stats->cache_pages_requested = 0;
stats->cache_eviction_pages_seen = 0;
@@ -435,6 +437,7 @@ __wt_stat_dsrc_aggregate_single(
to->cache_eviction_deepen += from->cache_eviction_deepen;
to->cache_write_lookaside += from->cache_write_lookaside;
to->cache_read += from->cache_read;
+ to->cache_read_deleted += from->cache_read_deleted;
to->cache_read_lookaside += from->cache_read_lookaside;
to->cache_pages_requested += from->cache_pages_requested;
to->cache_eviction_pages_seen += from->cache_eviction_pages_seen;
@@ -633,6 +636,7 @@ __wt_stat_dsrc_aggregate(
to->cache_write_lookaside +=
WT_STAT_READ(from, cache_write_lookaside);
to->cache_read += WT_STAT_READ(from, cache_read);
+ to->cache_read_deleted += WT_STAT_READ(from, cache_read_deleted);
to->cache_read_lookaside += WT_STAT_READ(from, cache_read_lookaside);
to->cache_pages_requested +=
WT_STAT_READ(from, cache_pages_requested);
@@ -844,6 +848,7 @@ static const char * const __stats_connection_desc[] = {
"cache: pages queued for urgent eviction",
"cache: pages queued for urgent eviction during walk",
"cache: pages read into cache",
+ "cache: pages read into cache after truncate",
"cache: pages read into cache requiring lookaside entries",
"cache: pages read into cache skipping older lookaside entries",
"cache: pages read into cache with skipped lookaside entries needed later",
@@ -1040,7 +1045,6 @@ static const char * const __stats_connection_desc[] = {
"thread-yield: page acquire time sleeping (usecs)",
"thread-yield: page delete rollback time sleeping for state change (usecs)",
"thread-yield: page reconciliation yielded due to child modification",
- "thread-yield: tree descend one level yielded for split page index update",
"transaction: commit timestamp queue insert to empty",
"transaction: commit timestamp queue inserts to tail",
"transaction: commit timestamp queue inserts total",
@@ -1233,6 +1237,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_eviction_pages_queued_urgent = 0;
stats->cache_eviction_pages_queued_oldest = 0;
stats->cache_read = 0;
+ stats->cache_read_deleted = 0;
stats->cache_read_lookaside = 0;
stats->cache_read_lookaside_skipped = 0;
stats->cache_read_lookaside_delay = 0;
@@ -1429,7 +1434,6 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->page_sleep = 0;
stats->page_del_rollback_blocked = 0;
stats->child_modify_blocked_page = 0;
- stats->tree_descend_blocked = 0;
stats->txn_commit_queue_empty = 0;
stats->txn_commit_queue_tail = 0;
stats->txn_commit_queue_inserts = 0;
@@ -1658,6 +1662,7 @@ __wt_stat_connection_aggregate(
to->cache_eviction_pages_queued_oldest +=
WT_STAT_READ(from, cache_eviction_pages_queued_oldest);
to->cache_read += WT_STAT_READ(from, cache_read);
+ to->cache_read_deleted += WT_STAT_READ(from, cache_read_deleted);
to->cache_read_lookaside += WT_STAT_READ(from, cache_read_lookaside);
to->cache_read_lookaside_skipped +=
WT_STAT_READ(from, cache_read_lookaside_skipped);
@@ -1944,7 +1949,6 @@ __wt_stat_connection_aggregate(
WT_STAT_READ(from, page_del_rollback_blocked);
to->child_modify_blocked_page +=
WT_STAT_READ(from, child_modify_blocked_page);
- to->tree_descend_blocked += WT_STAT_READ(from, tree_descend_blocked);
to->txn_commit_queue_empty +=
WT_STAT_READ(from, txn_commit_queue_empty);
to->txn_commit_queue_tail +=
diff --git a/src/third_party/wiredtiger/src/support/thread_group.c b/src/third_party/wiredtiger/src/support/thread_group.c
index 50abe64bbe6..4597d26496d 100644
--- a/src/third_party/wiredtiger/src/support/thread_group.c
+++ b/src/third_party/wiredtiger/src/support/thread_group.c
@@ -81,8 +81,8 @@ __thread_group_shrink(
WT_ASSERT(session, thread->tid.created);
__wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Stopping utility thread: %p:%" PRIu32,
- (void *)group, thread->id);
+ "Stopping utility thread: %s:%" PRIu32,
+ group->name, thread->id);
if (F_ISSET(thread, WT_THREAD_ACTIVE))
--group->current_threads;
F_CLR(thread, WT_THREAD_ACTIVE | WT_THREAD_RUN);
@@ -143,9 +143,9 @@ __thread_group_resize(
thread = NULL;
__wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Resize thread group: %p, from min: %" PRIu32 " -> %" PRIu32
+ "Resize thread group: %s, from min: %" PRIu32 " -> %" PRIu32
" from max: %" PRIu32 " -> %" PRIu32,
- (void *)group, group->min, new_min, group->max, new_max);
+ group->name, group->min, new_min, group->max, new_max);
WT_ASSERT(session,
group->current_threads <= group->alloc &&
@@ -155,7 +155,10 @@ __thread_group_resize(
return (0);
if (new_min > new_max)
- return (EINVAL);
+ WT_RET_MSG(session, EINVAL,
+ "Illegal thread group resize: %s, from min: %" PRIu32
+ " -> %" PRIu32 " from max: %" PRIu32 " -> %" PRIu32,
+ group->name, group->min, new_min, group->max, new_max);
/*
* Call shrink to reduce the number of thread structures and running
@@ -205,8 +208,8 @@ __thread_group_resize(
* number later.
*/
__wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Starting utility thread: %p:%" PRIu32,
- (void *)group, thread->id);
+ "Starting utility thread: %s:%" PRIu32,
+ group->name, thread->id);
F_SET(thread, WT_THREAD_RUN);
WT_ERR(__wt_thread_create(thread->session,
&thread->tid, __thread_run, thread));
@@ -285,8 +288,8 @@ __wt_thread_group_create(
cond_alloced = false;
- __wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Creating thread group: %p", (void *)group);
+ __wt_verbose(session,
+ WT_VERB_THREAD_GROUP, "Creating thread group: %s", name);
WT_RET(__wt_rwlock_init(session, &group->lock));
WT_ERR(__wt_cond_alloc(
@@ -321,7 +324,7 @@ __wt_thread_group_destroy(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group)
WT_DECL_RET;
__wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Destroying thread group: %p", (void *)group);
+ "Destroying thread group: %s", group->name);
WT_ASSERT(session, __wt_rwlock_islocked(session, &group->lock));
@@ -364,8 +367,8 @@ __wt_thread_group_start_one(
thread = group->threads[group->current_threads++];
WT_ASSERT(session, thread != NULL);
__wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Activating utility thread: %p:%" PRIu32,
- (void *)group, thread->id);
+ "Activating utility thread: %s:%" PRIu32,
+ group->name, thread->id);
WT_ASSERT(session, !F_ISSET(thread, WT_THREAD_ACTIVE));
F_SET(thread, WT_THREAD_ACTIVE);
__wt_cond_signal(session, thread->pause_cond);
@@ -391,8 +394,8 @@ __wt_thread_group_stop_one(WT_SESSION_IMPL *session, WT_THREAD_GROUP *group)
if (group->current_threads > group->min) {
thread = group->threads[--group->current_threads];
__wt_verbose(session, WT_VERB_THREAD_GROUP,
- "Pausing utility thread: %p:%" PRIu32,
- (void *)group, thread->id);
+ "Pausing utility thread: %s:%" PRIu32,
+ group->name, thread->id);
WT_ASSERT(session, F_ISSET(thread, WT_THREAD_ACTIVE));
F_CLR(thread, WT_THREAD_ACTIVE);
__wt_cond_signal(session, thread->pause_cond);
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 3a9b3755ff5..b2952cbec46 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -677,7 +677,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
WT_UPDATE **updp;
wt_timestamp_t prev_commit_timestamp, ts;
uint32_t previous_state;
- bool update_timestamp;
+ bool prepared_transaction, update_timestamp;
#endif
txn = &session->txn;
@@ -698,8 +698,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
if (cval.len != 0) {
#ifdef HAVE_TIMESTAMPS
WT_ERR(__wt_txn_parse_timestamp(session, "commit", &ts, &cval));
- WT_ERR(__wt_timestamp_validate(session,
- "commit", &ts, &cval, true, true, true));
+ WT_ERR(__wt_timestamp_validate(session, "commit", &ts, &cval));
__wt_timestamp_set(&txn->commit_timestamp, &ts);
__wt_txn_set_commit_timestamp(session);
#else
@@ -794,6 +793,9 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
/* Note: we're going to commit: nothing can fail after this point. */
+#ifdef HAVE_TIMESTAMPS
+ prepared_transaction = F_ISSET(txn, WT_TXN_PREPARE);
+#endif
/* Process and free updates. */
for (i = 0, op = txn->mod; i < txn->mod_count; i++, op++) {
switch (op->type) {
@@ -827,7 +829,7 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
if (!__wt_txn_update_needs_timestamp(session, op))
break;
- if (F_ISSET(txn, WT_TXN_PREPARE)) {
+ if (prepared_transaction) {
/*
* In case of a prepared transaction, the order
* of modification of the prepare timestamp to
@@ -839,10 +841,12 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
* As updating timestamp might not be an atomic
* operation, we will manage using state.
*/
- upd->state = WT_UPDATE_STATE_LOCKED;
+ upd->prepare_state = WT_PREPARE_LOCKED;
+ WT_WRITE_BARRIER();
__wt_timestamp_set(
&upd->timestamp, &txn->commit_timestamp);
- upd->state = WT_UPDATE_STATE_READY;
+ WT_PUBLISH(upd->prepare_state,
+ WT_PREPARE_RESOLVED);
} else
__wt_timestamp_set(
&upd->timestamp, &txn->commit_timestamp);
@@ -855,8 +859,21 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
break;
ref = op->u.ref;
- __wt_timestamp_set(
- &ref->page_del->timestamp, &txn->commit_timestamp);
+ if (prepared_transaction) {
+ /*
+ * As updating timestamp might not be an atomic
+ * operation, we will manage using state.
+ */
+ ref->page_del->prepare_state =
+ WT_PREPARE_LOCKED;
+ WT_WRITE_BARRIER();
+ __wt_timestamp_set(&ref->page_del->timestamp,
+ &txn->commit_timestamp);
+ WT_PUBLISH(ref->page_del->prepare_state,
+ WT_PREPARE_RESOLVED);
+ } else
+ __wt_timestamp_set(&ref->page_del->timestamp,
+ &txn->commit_timestamp);
/*
* The page-deleted list can be discarded by eviction,
@@ -872,11 +889,35 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
break;
}
- if ((updp = ref->page_del->update_list) != NULL)
- for (; *updp != NULL; ++updp)
+ if ((updp = ref->page_del->update_list) == NULL) {
+ /*
+ * Publish to ensure we don't let the page be
+ * evicted and the updates discarded before
+ * being written.
+ */
+ WT_PUBLISH(ref->state, previous_state);
+ break;
+ }
+
+ for (; *updp != NULL; ++updp) {
+ if (prepared_transaction) {
+ /*
+ * As ref state is LOCKED, timestamp
+ * and prepare state are updated in
+ * exclusive access, hence no need for
+ * temporary state WT_PREPARE_LOCKED
+ * and BARRIER.
+ */
__wt_timestamp_set(
&(*updp)->timestamp,
&txn->commit_timestamp);
+ (*updp)->prepare_state =
+ WT_PREPARE_RESOLVED;
+ } else
+ __wt_timestamp_set(
+ &(*updp)->timestamp,
+ &txn->commit_timestamp);
+ }
/*
* Publish to ensure we don't let the page be evicted
@@ -980,7 +1021,6 @@ int
__wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[])
{
#ifdef HAVE_TIMESTAMPS
- WT_CONFIG_ITEM cval;
WT_TXN *txn;
WT_TXN_OP *op;
WT_UPDATE *upd;
@@ -990,22 +1030,14 @@ __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[])
txn = &session->txn;
WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
- /* Transaction should not have a commit timestamp set. */
- WT_ASSERT(session, !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT));
WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || txn->mod_count == 0);
/* Transaction should not have updated any of the logged tables. */
WT_ASSERT(session, txn->logrec == NULL);
WT_RET(__wt_txn_context_check(session, true));
- /* Look for a prepare timestamp. */
- WT_RET(
- __wt_config_gets_def(session, cfg, "prepare_timestamp", 0, &cval));
- if (cval.len == 0)
- WT_RET_MSG(session, EINVAL, "prepare timestamp is required");
-
- /* TODO : Validate prepare timestamp. */
- WT_RET(__wt_txn_parse_timestamp(session, "prepare", &ts, &cval));
+ /* Parse and validate the prepare timestamp. */
+ WT_RET(__wt_txn_parse_prepare_timestamp(session, cfg, &ts));
__wt_timestamp_set(&txn->prepare_timestamp, &ts);
/*
@@ -1051,11 +1083,13 @@ __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[])
/* Set prepare timestamp. */
__wt_timestamp_set(&upd->timestamp, &ts);
- upd->state = WT_UPDATE_STATE_PREPARED;
+ WT_PUBLISH(upd->prepare_state, WT_PREPARE_INPROGRESS);
break;
case WT_TXN_OP_REF_DELETE:
__wt_timestamp_set(
&op->u.ref->page_del->timestamp, &ts);
+ WT_PUBLISH(op->u.ref->page_del->prepare_state,
+ WT_PREPARE_INPROGRESS);
break;
case WT_TXN_OP_TRUNCATE_COL:
case WT_TXN_OP_TRUNCATE_ROW:
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index 2266a9cd6f5..6fd82db5917 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -567,13 +567,13 @@ set: __wt_writelock(session, &txn_global->rwlock);
#ifdef HAVE_TIMESTAMPS
/*
* __wt_timestamp_validate --
- * Validate a timestamp to be not older than the global oldest and/or
- * global stable and/or running transaction commit timestamp.
+ * Validate a timestamp to be not older than the global oldest and global
+ * stable and running transaction commit timestamp and running transaction
+ * prepare timestamp.
*/
int
__wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name,
- wt_timestamp_t *ts, WT_CONFIG_ITEM *cval,
- bool cmp_oldest, bool cmp_stable, bool cmp_commit)
+ wt_timestamp_t *ts, WT_CONFIG_ITEM *cval)
{
WT_TXN *txn = &session->txn;
WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
@@ -596,16 +596,14 @@ __wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name,
if ((has_stable_ts = txn_global->has_stable_timestamp))
__wt_timestamp_set(&stable_ts, &txn_global->stable_timestamp));
- if (cmp_oldest && has_oldest_ts &&
- __wt_timestamp_cmp(ts, &oldest_ts) < 0) {
+ if (has_oldest_ts && __wt_timestamp_cmp(ts, &oldest_ts) < 0) {
WT_RET(__wt_timestamp_to_hex_string(session, hex_timestamp,
&oldest_ts));
WT_RET_MSG(session, EINVAL,
"%s timestamp %.*s older than oldest timestamp %s",
name, (int)cval->len, cval->str, hex_timestamp);
}
- if (cmp_stable && has_stable_ts &&
- __wt_timestamp_cmp(ts, &stable_ts) < 0) {
+ if (has_stable_ts && __wt_timestamp_cmp(ts, &stable_ts) < 0) {
WT_RET(__wt_timestamp_to_hex_string(session, hex_timestamp,
&stable_ts));
WT_RET_MSG(session, EINVAL,
@@ -618,7 +616,7 @@ __wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name,
* Return an error if the given timestamp is older than the first
* commit timestamp.
*/
- if (cmp_commit && F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
+ if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
__wt_timestamp_cmp(ts, &txn->first_commit_timestamp) < 0) {
WT_RET(__wt_timestamp_to_hex_string(
session, hex_timestamp, &txn->first_commit_timestamp));
@@ -628,6 +626,21 @@ __wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name,
name, (int)cval->len, cval->str, hex_timestamp);
}
+ /*
+ * Compare against the prepare timestamp of the current transaction.
+ * Return an error if the given timestamp is older than the prepare
+ * timestamp.
+ */
+ if (F_ISSET(txn, WT_TXN_PREPARE) &&
+ __wt_timestamp_cmp(ts, &txn->prepare_timestamp) < 0) {
+ WT_RET(__wt_timestamp_to_hex_string(
+ session, hex_timestamp, &txn->prepare_timestamp));
+ WT_RET_MSG(session, EINVAL,
+ "%s timestamp %.*s older than the prepare timestamp %s "
+ "for this transaction",
+ name, (int)cval->len, cval->str, hex_timestamp);
+ }
+
return (0);
}
#endif
@@ -651,8 +664,7 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
WT_TRET(__wt_txn_context_check(session, true));
WT_RET(__wt_txn_parse_timestamp(session, "commit", &ts, &cval));
- WT_RET(__wt_timestamp_validate(session,
- "commit", &ts, &cval, true, true, true));
+ WT_RET(__wt_timestamp_validate(session, "commit", &ts, &cval));
__wt_timestamp_set(&txn->commit_timestamp, &ts);
__wt_txn_set_commit_timestamp(session);
#else
@@ -669,6 +681,83 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
}
/*
+ * __wt_txn_parse_prepare_timestamp --
+ * Parse a request to set a transaction's prepare_timestamp.
+ */
+int
+__wt_txn_parse_prepare_timestamp(
+ WT_SESSION_IMPL *session, const char *cfg[], wt_timestamp_t *timestamp)
+{
+ WT_CONFIG_ITEM cval;
+
+ WT_RET(__wt_config_gets_def(session,
+ cfg, "prepare_timestamp", 0, &cval));
+ if (cval.len > 0) {
+#ifdef HAVE_TIMESTAMPS
+ WT_TXN *prev;
+ WT_TXN_GLOBAL *txn_global;
+ wt_timestamp_t oldest_ts;
+ char hex_timestamp[2 * WT_TIMESTAMP_SIZE + 1];
+
+ txn_global = &S2C(session)->txn_global;
+
+ if (F_ISSET(&session->txn, WT_TXN_HAS_TS_COMMIT))
+ WT_RET_MSG(session, EINVAL,
+ "commit timestamp should not have been set before "
+ "prepare transaction");
+
+ WT_RET(__wt_txn_parse_timestamp(
+ session, "prepare", timestamp, &cval));
+
+ /*
+ * Prepare timestamp must be later/greater than latest active
+ * read timestamp.
+ */
+ __wt_readlock(session, &txn_global->read_timestamp_rwlock);
+ prev = TAILQ_LAST(&txn_global->read_timestamph,
+ __wt_txn_rts_qh);
+ if (prev != NULL &&
+ __wt_timestamp_cmp(&prev->read_timestamp, timestamp) >= 0) {
+ __wt_readunlock(session,
+ &txn_global->read_timestamp_rwlock);
+ WT_RET(__wt_timestamp_to_hex_string(session,
+ hex_timestamp, &prev->read_timestamp));
+ WT_RET_MSG(session, EINVAL,
+ "prepare timestamp %.*s not later than an active "
+ "read timestamp %s ", (int)cval.len, cval.str,
+ hex_timestamp);
+ }
+ __wt_readunlock(session, &txn_global->read_timestamp_rwlock);
+
+ /*
+ * If there are no active readers, prepare timestamp must not
+ * be older than oldest timestamp.
+ */
+ if (prev == NULL) {
+ WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
+ __wt_timestamp_set(&oldest_ts,
+ &txn_global->oldest_timestamp));
+
+ if (__wt_timestamp_cmp(timestamp, &oldest_ts) < 0) {
+ WT_RET(__wt_timestamp_to_hex_string(session,
+ hex_timestamp, &oldest_ts));
+ WT_RET_MSG(session, EINVAL,
+ "prepare timestamp %.*s is older than the "
+ "oldest timestamp %s ", (int)cval.len,
+ cval.str, hex_timestamp);
+ }
+ }
+#else
+ WT_UNUSED(timestamp);
+ WT_RET_MSG(session, EINVAL, "prepare_timestamp requires a "
+ "version of WiredTiger built with timestamp support");
+#endif
+ } else
+ WT_RET_MSG(session, EINVAL, "prepare timestamp is required");
+
+ return (0);
+}
+/*
* __wt_txn_parse_read_timestamp --
* Parse a request to set a transaction's read_timestamp.
*/
@@ -691,10 +780,17 @@ __wt_txn_parse_read_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
txn_global = &S2C(session)->txn_global;
WT_RET(__wt_txn_parse_timestamp(session, "read", &ts, &cval));
- /* Read timestamps imply / require snapshot isolation. */
+ /*
+ * Read timestamps imply / require snapshot isolation.
+ *
+ * If we already have a snapshot, it may be too early
+ * to match the timestamp. Get a new one.
+ */
if (!F_ISSET(txn, WT_TXN_RUNNING))
txn->isolation = WT_ISO_SNAPSHOT;
- else if (txn->isolation != WT_ISO_SNAPSHOT)
+ else if (txn->isolation == WT_ISO_SNAPSHOT)
+ __wt_txn_get_snapshot(session);
+ else
WT_RET_MSG(session, EINVAL, "setting a read_timestamp"
" requires a transaction running at snapshot"
" isolation");
diff --git a/src/third_party/wiredtiger/src/utilities/util_load.c b/src/third_party/wiredtiger/src/utilities/util_load.c
index dab24930fe6..2b210419c78 100644
--- a/src/third_party/wiredtiger/src/utilities/util_load.c
+++ b/src/third_party/wiredtiger/src/utilities/util_load.c
@@ -486,7 +486,7 @@ config_rename(WT_SESSION *session, char **urip, const char *name)
return (util_err(session, errno, NULL));
/*
- * Find the separating colon characters, but not the trailing one may
+ * Find the separating colon characters, but note the trailing one may
* not be there.
*/
if ((p = strchr(*urip, ':')) == NULL) {
diff --git a/src/third_party/wiredtiger/test/format/config.c b/src/third_party/wiredtiger/test/format/config.c
index d46b0868887..0677b3b753c 100644
--- a/src/third_party/wiredtiger/test/format/config.c
+++ b/src/third_party/wiredtiger/test/format/config.c
@@ -159,9 +159,8 @@ config_setup(void)
/*
* Periodically, run single-threaded so we can compare the results to
* a Berkeley DB copy, as long as the thread-count isn't nailed down.
- * Don't do it on the first run, all our smoke tests would hit it.
*/
- if (!g.replay && g.run_cnt % 20 == 19 && !config_is_perm("threads"))
+ if (!config_is_perm("threads") && mmrand(NULL, 1, 20) == 1)
g.c_threads = 1;
config_checkpoint();
@@ -191,12 +190,8 @@ config_setup(void)
/*
* Turn off truncate for LSM runs (some configurations with truncate
* always results in a timeout).
- *
- * WiredTiger doesn't currently support truncate and prepare at the
- * same time, see WT-3922. For now, pick one on each run.
*/
- if (!config_is_perm("truncate"))
- if (DATASOURCE("lsm") || mmrand(NULL, 0, 1) == 1)
+ if (!config_is_perm("truncate") && DATASOURCE("lsm"))
config_single("truncate=off", 0);
/* Give Helium configuration a final review. */
@@ -629,10 +624,10 @@ config_pct(void)
/*
* If the delete percentage isn't nailed down, periodically set it to
- * 0 so salvage gets run. Don't do it on the first run, all our smoke
- * tests would hit it.
+ * 0 so salvage gets run and so we can perform stricter sanity checks
+ * on key ordering.
*/
- if (!config_is_perm("delete_pct") && !g.replay && g.run_cnt % 10 == 9) {
+ if (!config_is_perm("delete_pct") && mmrand(NULL, 1, 10) == 1) {
list[CONFIG_DELETE_ENTRY].order = 0;
*list[CONFIG_DELETE_ENTRY].vp = 0;
}
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index d277fb1a915..4eac7a5eb8e 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -122,6 +122,8 @@ typedef struct {
WT_RAND_STATE rnd; /* Global RNG state */
+ pthread_rwlock_t prepare_lock; /* Prepare running */
+
uint64_t timestamp; /* Counter for timestamps */
uint64_t truncate_cnt; /* Counter for truncation */
@@ -290,6 +292,8 @@ typedef struct {
uint64_t last; /* truncate range */
WT_ITEM *lastkey, _lastkey;
+ WT_ITEM *tbuf, _tbuf; /* temporary buffer */
+
#define TINFO_RUNNING 1 /* Running */
#define TINFO_COMPLETE 2 /* Finished */
#define TINFO_JOINED 3 /* Resolved */
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index 596d952dcc6..54aa6d2b766 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -512,6 +512,12 @@ begin_transaction(TINFO *tinfo, WT_SESSION *session, u_int *iso_configp)
config = "isolation=snapshot";
if (g.c_txn_timestamps) {
/*
+ * Avoid starting a new reader when a prepare is in
+ * progress.
+ */
+ (void)pthread_rwlock_rdlock(&g.prepare_lock);
+
+ /*
* Set the thread's read timestamp to the current value
* before allocating a new read timestamp. This
* guarantees the oldest timestamp won't move past the
@@ -531,6 +537,9 @@ begin_transaction(TINFO *tinfo, WT_SESSION *session, u_int *iso_configp)
testutil_check(session->begin_transaction(session, config));
+ if (v == ISOLATION_SNAPSHOT && g.c_txn_timestamps)
+ (void)pthread_rwlock_unlock(&g.prepare_lock);
+
/*
* It's OK for the oldest timestamp to move past a running query, clear
* the thread's read timestamp, it no longer needs to be pinned.
@@ -617,6 +626,7 @@ rollback_transaction(TINFO *tinfo, WT_SESSION *session)
static int
prepare_transaction(TINFO *tinfo, WT_SESSION *session)
{
+ WT_DECL_RET;
uint64_t ts;
char config_buf[64];
@@ -635,10 +645,23 @@ prepare_transaction(TINFO *tinfo, WT_SESSION *session)
*/
++tinfo->prepare;
+ /*
+ * Synchronize prepare call with begin transaction to prevent a new
+ * reader creeping in.
+ *
+ * Prepare will return error if prepare timestamp is less than any
+ * active read timestamp.
+ */
+ (void)pthread_rwlock_wrlock(&g.prepare_lock);
+
ts = set_commit_timestamp(tinfo);
testutil_check(__wt_snprintf(
config_buf, sizeof(config_buf), "prepare_timestamp=%" PRIx64, ts));
- return (session->prepare_transaction(session, config_buf));
+ ret = session->prepare_transaction(session, config_buf);
+
+ (void)pthread_rwlock_unlock(&g.prepare_lock);
+
+ return (ret);
}
/*
@@ -690,6 +713,7 @@ ops(void *arg)
val_gen_init(tinfo->value);
tinfo->lastkey = &tinfo->_lastkey;
key_gen_init(tinfo->lastkey);
+ tinfo->tbuf = &tinfo->_tbuf;
/* Set the first operation where we'll create sessions and cursors. */
cursor = NULL;
@@ -1072,9 +1096,8 @@ update_instead_of_chosen_op:
/*
* Prepare the transaction 10% of the time.
- * Currently doesn't work with truncation, see WT-3922.
*/
- if (g.c_truncate == 0 && mmrand(&tinfo->rnd, 1, 10) == 1) {
+ if (mmrand(&tinfo->rnd, 1, 10) == 1) {
ret = prepare_transaction(tinfo, session);
testutil_assert(ret == 0 || ret == WT_PREPARE_CONFLICT);
if (ret == WT_PREPARE_CONFLICT)
@@ -1113,6 +1136,7 @@ deadlock: ++tinfo->deadlock;
key_gen_teardown(tinfo->key);
val_gen_teardown(tinfo->value);
key_gen_teardown(tinfo->lastkey);
+ free(tinfo->tbuf->mem);
tinfo->state = TINFO_COMPLETE;
return (WT_THREAD_RET_VALUE);
@@ -1291,11 +1315,11 @@ nextprev(TINFO *tinfo, WT_CURSOR *cursor, bool next)
{
WT_DECL_RET;
WT_ITEM key, value;
- uint64_t keyno;
+ uint64_t keyno, keyno_prev;
uint8_t bitfield;
int cmp;
const char *which;
- bool incrementing;
+ bool incrementing, record_gaps;
keyno = 0;
which = next ? "WT_CURSOR.next" : "WT_CURSOR.prev";
@@ -1332,41 +1356,85 @@ nextprev(TINFO *tinfo, WT_CURSOR *cursor, bool next)
if (DATASOURCE("lsm"))
break;
+ /*
+ * Compare the returned key with the previously returned key,
+ * and assert the order is correct. If not deleting keys, and
+ * the rows aren't in the column-store insert name space, also
+ * assert we don't skip groups of records (that's a page-split
+ * bug symptom).
+ */
+ record_gaps = g.c_delete_pct != 0;
switch (g.type) {
case FIX:
case VAR:
- testutil_assertfmt(
- !next || tinfo->keyno < keyno,
- "%s returned %" PRIu64 " then %" PRIu64,
- which, tinfo->keyno, keyno);
- testutil_assertfmt(
- next || tinfo->keyno > keyno,
- "%s returned %" PRIu64 " then %" PRIu64,
- which, tinfo->keyno, keyno);
+ if (tinfo->keyno > g.c_rows || keyno > g.c_rows)
+ record_gaps = true;
+ if (!next) {
+ if (tinfo->keyno < keyno ||
+ (!record_gaps && keyno != tinfo->keyno - 1))
+ goto order_error_col;
+ } else
+ if (tinfo->keyno > keyno ||
+ (!record_gaps && keyno != tinfo->keyno + 1))
+ goto order_error_col;
+ if (0) {
+order_error_col:
+ testutil_die(0,
+ "%s returned %" PRIu64 " then %" PRIu64,
+ which, tinfo->keyno, keyno);
+ }
tinfo->keyno = keyno;
break;
case ROW:
- cmp = memcmp(tinfo->key->data, key.data,
- WT_MIN(tinfo->key->size, key.size));
incrementing =
(next && !g.c_reverse) || (!next && g.c_reverse);
- testutil_assertfmt(
- !incrementing ||
- cmp < 0 ||
- (cmp == 0 && tinfo->key->size < key.size),
- "%s returned {%.*s} then {%.*s}",
- which,
- (int)tinfo->key->size, tinfo->key->data,
- (int)key.size, key.data);
- testutil_assertfmt(
- incrementing ||
- cmp > 0 ||
- (cmp == 0 && tinfo->key->size > key.size),
- "%s returned {%.*s} then {%.*s}",
- which,
- (int)tinfo->key->size, tinfo->key->data,
- (int)key.size, key.data);
+ cmp = memcmp(tinfo->key->data, key.data,
+ WT_MIN(tinfo->key->size, key.size));
+ if (incrementing) {
+ if (cmp > 0 ||
+ (cmp == 0 && tinfo->key->size < key.size))
+ goto order_error_row;
+ } else
+ if (cmp < 0 ||
+ (cmp == 0 && tinfo->key->size > key.size))
+ goto order_error_row;
+ if (!record_gaps) {
+ /*
+ * Convert the keys to record numbers and then
+ * compare less-than-or-equal. (Not less-than,
+ * row-store inserts new rows in-between rows
+ * by append a new suffix to the row's key.)
+ */
+ testutil_check(__wt_buf_fmt(
+ (WT_SESSION_IMPL *)cursor->session,
+ tinfo->tbuf, "%.*s",
+ (int)tinfo->key->size,
+ (char *)tinfo->key->data));
+ keyno_prev =
+ strtoul(tinfo->tbuf->data, NULL, 10);
+ testutil_check(__wt_buf_fmt(
+ (WT_SESSION_IMPL *)cursor->session,
+ tinfo->tbuf, "%.*s",
+ (int)key.size, (char *)key.data));
+ keyno = strtoul(tinfo->tbuf->data, NULL, 10);
+ if (incrementing) {
+ if (keyno_prev != keyno &&
+ keyno_prev + 1 != keyno)
+ goto order_error_row;
+ } else
+ if (keyno_prev != keyno &&
+ keyno_prev - 1 != keyno)
+ goto order_error_row;
+ }
+ if (0) {
+order_error_row:
+ testutil_die(0,
+ "%s returned {%.*s} then {%.*s}",
+ which,
+ (int)tinfo->key->size, tinfo->key->data,
+ (int)key.size, key.data);
+ }
testutil_check(__wt_buf_set((WT_SESSION_IMPL *)
cursor->session, tinfo->key, key.data, key.size));
diff --git a/src/third_party/wiredtiger/test/format/t.c b/src/third_party/wiredtiger/test/format/t.c
index 6077a67a541..d7b9add1f14 100644
--- a/src/third_party/wiredtiger/test/format/t.c
+++ b/src/third_party/wiredtiger/test/format/t.c
@@ -170,6 +170,7 @@ main(int argc, char *argv[])
testutil_check(pthread_rwlock_init(&g.append_lock, NULL));
testutil_check(pthread_rwlock_init(&g.backup_lock, NULL));
testutil_check(pthread_rwlock_init(&g.death_lock, NULL));
+ testutil_check(pthread_rwlock_init(&g.prepare_lock, NULL));
printf("%s: process %" PRIdMAX "\n", progname, (intmax_t)getpid());
while (++g.run_cnt <= g.c_runs || g.c_runs == 0 ) {
@@ -267,6 +268,7 @@ main(int argc, char *argv[])
testutil_check(pthread_rwlock_destroy(&g.append_lock));
testutil_check(pthread_rwlock_destroy(&g.backup_lock));
testutil_check(pthread_rwlock_destroy(&g.death_lock));
+ testutil_check(pthread_rwlock_destroy(&g.prepare_lock));
config_clear();
diff --git a/src/third_party/wiredtiger/test/suite/test_alter03.py b/src/third_party/wiredtiger/test/suite/test_alter03.py
new file mode 100644
index 00000000000..78d1481f778
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_alter03.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2018 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+from wtscenario import make_scenarios
+
+# test_alter03.py
+# Check if app_metadata can be altered.
+class test_alter03(wttest.WiredTigerTestCase):
+ name = "alter03"
+
+ def verify_metadata(self, metastr):
+ if metastr == '':
+ return
+ cursor = self.session.open_cursor('metadata:', None, None)
+ #
+ # Walk through all the metadata looking for the entries that are
+ # the URIs for the named object.
+ #
+ found = False
+ while True:
+ ret = cursor.next()
+ if ret != 0:
+ break
+ key = cursor.get_key()
+ if key.find(self.name) != -1:
+ value = cursor[key]
+ found = True
+ self.assertTrue(value.find(metastr) != -1)
+ cursor.close()
+ self.assertTrue(found == True)
+
+ # Alter: Change the app_metadata and verify
+ def test_alter03_app_metadata(self):
+ uri = "table:" + self.name
+ entries = 100
+ create_params = 'key_format=i,value_format=i,'
+ app_meta_orig = 'app_metadata="meta_data_1",'
+ app_meta_new = 'app_metadata="meta_data_2",'
+
+ self.session.create(uri, create_params + app_meta_orig)
+
+ # Put some data in table.
+ c = self.session.open_cursor(uri, None)
+ for k in range(entries):
+ c[k+1] = 1
+ c.close()
+
+ # Verify the string in the metadata
+ self.verify_metadata(app_meta_orig)
+
+ # Alter app metadata and verify
+ self.session.alter(uri, app_meta_new)
+ self.verify_metadata(app_meta_new)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_cursor13.py b/src/third_party/wiredtiger/test/suite/test_cursor13.py
index 35a841ed78d..9392f7a99d8 100644
--- a/src/third_party/wiredtiger/test/suite/test_cursor13.py
+++ b/src/third_party/wiredtiger/test/suite/test_cursor13.py
@@ -41,6 +41,7 @@ class test_cursor13_base(wttest.WiredTigerTestCase):
stat_cursor_cache = 0
stat_cursor_reopen = 0
+ # Returns a list: [cursor_cached, cursor_reopened]
def caching_stats(self):
stat_cursor = self.session.open_cursor('statistics:', None, None)
cache = stat_cursor[stat.conn.cursor_cache][2]
@@ -48,6 +49,8 @@ class test_cursor13_base(wttest.WiredTigerTestCase):
stat_cursor.close()
return [cache, reopen]
+ # Returns a list: [cursor_sweep, cursor_sweep_buckets,
+ # cursor_sweep_examined, cursor_sweep_closed]
def sweep_stats(self):
stat_cursor = self.session.open_cursor('statistics:', None, None)
sweep = stat_cursor[stat.conn.cursor_sweep][2]
@@ -527,3 +530,25 @@ class test_cursor13_sweep(test_cursor13_big_base):
# by approximately the number of swept cursors, but it's less
# predictable.
self.assertGreater(end_stats[1] - begin_stats[1], 0)
+
+class test_cursor13_dup(test_cursor13_base):
+ def test_dup(self):
+ self.cursor_stats_init()
+ uri = 'table:test_cursor13_dup'
+ self.session.create(uri, 'key_format=S,value_format=S')
+ cursor = self.session.open_cursor(uri)
+ cursor['A'] = 'B'
+ cursor.close()
+
+ # Get a cursor and position it.
+ # An unpositioned cursor cannot be duplicated.
+ c1 = self.session.open_cursor(uri, None)
+ c1.next()
+
+ for notused in range(0, 100):
+ self.session.breakpoint()
+ c2 = self.session.open_cursor(None, c1, None)
+ c2.close()
+ stats = self.caching_stats()
+ self.assertGreaterEqual(stats[0], 100) # cursor_cached > 100
+ self.assertGreaterEqual(stats[1], 100) # cursor_reopened > 100
diff --git a/src/third_party/wiredtiger/test/suite/test_las.py b/src/third_party/wiredtiger/test/suite/test_las01.py
index f38b11138d2..fd4dea87c35 100644
--- a/src/third_party/wiredtiger/test/suite/test_las.py
+++ b/src/third_party/wiredtiger/test/suite/test_las01.py
@@ -33,9 +33,9 @@ from wtdataset import SimpleDataSet
def timestamp_str(t):
return '%x' % t
-# test_las.py
+# test_las01.py
# Smoke tests to ensure lookaside tables are working.
-class test_las(wttest.WiredTigerTestCase):
+class test_las01(wttest.WiredTigerTestCase):
# Force a small cache.
def conn_config(self):
return 'cache_size=50MB'
@@ -93,7 +93,7 @@ class test_las(wttest.WiredTigerTestCase):
def test_las(self):
# Create a small table.
- uri = "table:test_las"
+ uri = "table:test_las01"
nrows = 100
ds = SimpleDataSet(self, uri, nrows, key_format="S", value_format='u')
ds.populate()
diff --git a/src/third_party/wiredtiger/test/suite/test_las02.py b/src/third_party/wiredtiger/test/suite/test_las02.py
new file mode 100644
index 00000000000..af089d6c19e
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_las02.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2018 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+from helper import copy_wiredtiger_home
+import wiredtiger, wttest
+from wtdataset import SimpleDataSet
+
+def timestamp_str(t):
+ return '%x' % t
+
+# test_las02.py
+# Test that truncate with lookaside entries and timestamps gives expected results.
+class test_las02(wttest.WiredTigerTestCase):
+ # Force a small cache.
+ def conn_config(self):
+ return 'cache_size=50MB,log=(enabled)'
+
+ def large_updates(self, uri, value, ds, nrows, commit_ts):
+ # Update a large number of records, we'll hang if the lookaside table isn't working.
+ session = self.session
+ cursor = session.open_cursor(uri)
+ for i in range(1, nrows + 1):
+ session.begin_transaction()
+ cursor[ds.key(i)] = value
+ session.commit_transaction('commit_timestamp=' + timestamp_str(commit_ts))
+ cursor.close()
+
+ def check(self, check_value, uri, nrows, read_ts):
+ session = self.session
+ session.begin_transaction('read_timestamp=' + timestamp_str(read_ts))
+ cursor = session.open_cursor(uri)
+ count = 0
+ for k, v in cursor:
+ self.assertEqual(v, check_value)
+ count += 1
+ session.rollback_transaction()
+ self.assertEqual(count, nrows)
+
+ def test_las(self):
+ nrows = 10000
+
+ # Create a table without logging to ensure we get "skew_newest" lookaside eviction behavior.
+ uri = "table:las02_main"
+ ds = SimpleDataSet(
+ self, uri, 0, key_format="S", value_format="S", config='log=(enabled=false)')
+ ds.populate()
+
+ uri2 = "table:las02_extra"
+ ds2 = SimpleDataSet(self, uri2, 0, key_format="S", value_format="S")
+ ds2.populate()
+
+ # Pin oldest and stable to timestamp 1.
+ self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(1) +
+ ',stable_timestamp=' + timestamp_str(1))
+
+ bigvalue = "aaaaa" * 100
+ self.large_updates(uri, bigvalue, ds, nrows / 3, 1)
+
+ # Check that all updates are seen
+ self.check(bigvalue, uri, nrows / 3, 1)
+
+ # Check to see lookaside working with old timestamp
+ bigvalue2 = "ddddd" * 100
+ self.large_updates(uri, bigvalue2, ds, nrows, 100)
+
+ # Check that the new updates are only seen after the update timestamp
+ self.check(bigvalue, uri, nrows / 3, 1)
+ self.check(bigvalue2, uri, nrows, 100)
+
+ # Force out most of the pages by updating a different tree
+ self.large_updates(uri2, bigvalue, ds2, nrows, 100)
+
+ # Now truncate half of the records
+ self.session.begin_transaction()
+ end = self.session.open_cursor(uri)
+ end.set_key(ds.key(nrows / 2))
+ self.session.truncate(None, None, end)
+ end.close()
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(200))
+
+ # Check that the truncate is visible after commit
+ self.check(bigvalue2, uri, nrows / 2, 200)
+
+ # Repeat earlier checks
+ self.check(bigvalue, uri, nrows / 3, 1)
+ self.check(bigvalue2, uri, nrows, 100)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_prepare01.py b/src/third_party/wiredtiger/test/suite/test_prepare01.py
index 0039e9106f0..20615ab836c 100644
--- a/src/third_party/wiredtiger/test/suite/test_prepare01.py
+++ b/src/third_party/wiredtiger/test/suite/test_prepare01.py
@@ -111,7 +111,6 @@ class test_prepare01(wttest.WiredTigerTestCase):
cursor = self.session.open_cursor(self.uri, None)
self.check(cursor, 0, 0)
- # Currently ignore_prepare is not realized yet, hence no effect.
self.session.begin_transaction("ignore_prepare=false")
for i in xrange(self.nentries):
if i > 0 and i % (self.nentries / 37) == 0:
@@ -165,11 +164,13 @@ class test_read_committed_default(wttest.WiredTigerTestCase):
self.assertEqual(self.cursor_count(cursor), 1)
s.prepare_transaction("prepare_timestamp=4a")
- s.commit_transaction("commit_timestamp=5a")
+ # commit timestamp can be same as prepare timestamp
+ s.commit_transaction("commit_timestamp=4a")
s.begin_transaction()
self.assertEqual(self.cursor_count(cursor), 1)
s.prepare_transaction("prepare_timestamp=7a")
+ # commit timestamp can be greater than prepare timestamp
s.commit_transaction("commit_timestamp=8a")
s.close()
diff --git a/src/third_party/wiredtiger/test/suite/test_prepare02.py b/src/third_party/wiredtiger/test/suite/test_prepare02.py
index e2971ee4ca5..b44362ca951 100644
--- a/src/third_party/wiredtiger/test/suite/test_prepare02.py
+++ b/src/third_party/wiredtiger/test/suite/test_prepare02.py
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
#
# test_prepare02.py
-# Prepare : check post conditions to prepare operation
+# Prepare : Session API usage generates expected error in prepared state.
#
from suite_subprocess import suite_subprocess
diff --git a/src/third_party/wiredtiger/test/suite/test_prepare03.py b/src/third_party/wiredtiger/test/suite/test_prepare03.py
index d9838ae7f82..143d1766bda 100644
--- a/src/third_party/wiredtiger/test/suite/test_prepare03.py
+++ b/src/third_party/wiredtiger/test/suite/test_prepare03.py
@@ -30,7 +30,7 @@ import wiredtiger, wttest
from wtscenario import make_scenarios
# test_prepre03.py
-# Prepare transaction check post conditions for cursor operations
+# Prepare: Cursor API usage generates expected error in prepared state.
# Pattern of test script is to invoke cursor operations in prepared transaction
# state to ensure they fail and to repeat same operations in non-prepared state
diff --git a/src/third_party/wiredtiger/test/suite/test_prepare04.py b/src/third_party/wiredtiger/test/suite/test_prepare04.py
index af5dd12b1e5..cd4fe924293 100644
--- a/src/third_party/wiredtiger/test/suite/test_prepare04.py
+++ b/src/third_party/wiredtiger/test/suite/test_prepare04.py
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
#
# test_prepare04.py
-# Prepare: prepare conflict with update and read operations
+# Prepare: Update and read operations generate prepared conflict error.
#
import random
diff --git a/src/third_party/wiredtiger/test/suite/test_prepare05.py b/src/third_party/wiredtiger/test/suite/test_prepare05.py
new file mode 100644
index 00000000000..3b283dd8102
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_prepare05.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2018 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_prepare05.py
+# Prepare: Timestamps validation for prepare API's
+#
+
+from suite_subprocess import suite_subprocess
+import wiredtiger, wttest
+
+def timestamp_str(t):
+ return '%x' % t
+
+class test_prepare05(wttest.WiredTigerTestCase, suite_subprocess):
+ tablename = 'test_prepare05'
+ uri = 'table:' + tablename
+
+ def test_timestamp_api(self):
+ if not wiredtiger.timestamp_build():
+ self.skipTest('requires a timestamp build')
+
+ self.session.create(self.uri, 'key_format=i,value_format=i')
+ c = self.session.open_cursor(self.uri)
+
+ # It is illegal to set a prepare timestamp older than oldest timestamp.
+ self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(2))
+ self.session.begin_transaction()
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.prepare_transaction(
+ 'prepare_timestamp=' + timestamp_str(1)),
+ "/older than the oldest timestamp/")
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(3))
+
+ # Check setting the prepare timestamp same as oldest timestamp is valid.
+ self.session.begin_transaction()
+ self.session.prepare_transaction('prepare_timestamp=' + timestamp_str(2))
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(3))
+
+ # In a single transaction it is illegal to set a commit timestamp
+ # before invoking prepare for this transaction.
+ # Note: Values are not important, setting commit timestamp before
+ # prepare itself is illegal.
+ self.session.begin_transaction()
+ self.session.timestamp_transaction(
+ 'commit_timestamp=' + timestamp_str(3))
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.prepare_transaction(
+ 'prepare_timestamp=' + timestamp_str(2)),
+ "/should not have been set before/")
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(3))
+
+ # It is illegal to set a prepare timestamp same as or earlier than an
+ # active read timestamp.
+ # Start a new reader to have an active read timestamp.
+ s_reader = self.conn.open_session()
+ s_reader.begin_transaction('read_timestamp=' + timestamp_str(4))
+ self.session.begin_transaction()
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.prepare_transaction(
+ 'prepare_timestamp=' + timestamp_str(4)),
+ "/not later than an active read timestamp/")
+ self.session.rollback_transaction()
+
+ # Check setting the prepare timestamp as later than active read
+ # timestamp is valid.
+ self.session.begin_transaction()
+ c[1] = 1
+ self.session.prepare_transaction(
+ 'prepare_timestamp=' + timestamp_str(5))
+ # Resolve the reader transaction started earlier.
+ s_reader.rollback_transaction()
+ self.session.rollback_transaction()
+
+ # It is illegal to set a commit timestamp older than prepare
+ # timestamp of a transaction.
+ self.session.begin_transaction()
+ c[1] = 1
+ self.session.prepare_transaction(
+ 'prepare_timestamp=' + timestamp_str(5))
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.session.commit_transaction(
+ 'commit_timestamp=' + timestamp_str(4)),
+ "/older than the prepare timestamp/")
+
+ # It is legal to set a commit timestamp as same as prepare
+ # timestamp.
+ self.session.begin_transaction()
+ c[1] = 1
+ self.session.prepare_transaction(
+ 'prepare_timestamp=' + timestamp_str(5))
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(5))
+
+if __name__ == '__main__':
+ wttest.run()