summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2019-04-16 14:36:51 +1000
committerLuke Chen <luke.chen@mongodb.com>2019-04-16 14:45:56 +1000
commita654dcf592ea7ed65426a0de96b4079ff4fc6716 (patch)
treea5256edad1bb219e6af72fd7e7525f58e235a307 /src/third_party/wiredtiger/src
parent19b622ebfb42a525f38e278c09f440eb47b12f1e (diff)
downloadmongo-a654dcf592ea7ed65426a0de96b4079ff4fc6716.tar.gz
Import wiredtiger: 9416282c42d40328dfb7ff0f28831f639f98d3cb from branch mongodb-4.2
ref: 1768d66613..9416282c42 for: 4.1.11 WT-4317 Read checksum error in test_wt4156_metadata_salvage WT-4579 Track the newest durable timestamp for each page WT-4585 Add WT_WITH_HOTBACKUP_LOCK macro WT-4598 Enable the assertion that the durable_timestamp is newer than or equals the commit timestamp. WT-4640 Remove round_to_oldest in favour of roundup_timestamps WT-4695 Python3: allow most tests to run with Python3 with small changes WT-4696 Python3: change dist scripts to run under Python3 WT-4698 Python3: fix modify related tests WT-4699 Python3: fix test_jsondump02.py WT-4700 Python3: run with same source as Python2 WT-4703 Extend test/checkpoint to do removes and online checking WT-4704 Add statistic tracking oldest active read timestamp WT-4705 column-store no longer needs to handle WT_COL page offsets of 0 WT-4707 Failure in verifying cells with copied values WT-4708 Coverity reported copy-paste error in WiredTiger error message WT-4711 Python formatting errors reported while running "s_all" WT-4714 Use the durable timestamp to determine if a page should stay dirty WT-4724 Syntax error in wtperf_ckpt.sh when running 'dash' as default shell
Diffstat (limited to 'src/third_party/wiredtiger/src')
-rw-r--r--src/third_party/wiredtiger/src/block/block_write.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c3
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c3
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c13
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_rebalance.c51
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_slvg.c24
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c4
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_stat.c19
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c82
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c53
-rw-r--r--src/third_party/wiredtiger/src/cache/cache_las.c9
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c11
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_log.c80
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_backup.c19
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_index.c4
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_json.c34
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_table.c4
-rwxr-xr-xsrc/third_party/wiredtiger/src/docs/tools/doxypy.py24
-rwxr-xr-xsrc/third_party/wiredtiger/src/docs/tools/fixlinks.py4
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h7
-rw-r--r--src/third_party/wiredtiger/src/include/cell.i65
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h10
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h6
-rw-r--r--src/third_party/wiredtiger/src/include/meta.h2
-rw-r--r--src/third_party/wiredtiger/src/include/schema.h73
-rw-r--r--src/third_party/wiredtiger/src/include/session.h36
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h2
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h6
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in23
-rw-r--r--src/third_party/wiredtiger/src/log/log.c89
-rw-r--r--src/third_party/wiredtiger/src/meta/meta_ckpt.c14
-rw-r--r--src/third_party/wiredtiger/src/os_common/os_fhandle.c50
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c302
-rw-r--r--src/third_party/wiredtiger/src/schema/schema_util.c47
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c8
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c16
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c181
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_timestamp.c64
39 files changed, 807 insertions, 645 deletions
diff --git a/src/third_party/wiredtiger/src/block/block_write.c b/src/third_party/wiredtiger/src/block/block_write.c
index 9edc4e0108b..55f9d4ca57c 100644
--- a/src/third_party/wiredtiger/src/block/block_write.c
+++ b/src/third_party/wiredtiger/src/block/block_write.c
@@ -43,10 +43,8 @@ __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len)
* more targeted solution at some point.
*/
if (!conn->hot_backup) {
- __wt_readlock(session, &conn->hot_backup_lock);
- if (!conn->hot_backup)
- ret = __wt_ftruncate(session, block->fh, len);
- __wt_readunlock(session, &conn->hot_backup_lock);
+ WT_WITH_HOTBACKUP_READ_LOCK(session,
+ ret = __wt_ftruncate(session, block->fh, len), NULL);
}
/*
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index f504bdeddf4..6a85ccf6c17 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -244,8 +244,7 @@ restart_read: /* Find the matching WT_COL slot. */
* information.
*/
if (cbt->cip_saved != cip) {
- if ((cell = WT_COL_PTR(page, cip)) == NULL)
- continue;
+ cell = WT_COL_PTR(page, cip);
__wt_cell_unpack(session, page, cell, &unpack);
if (unpack.type == WT_CELL_DEL) {
if ((rle = __wt_cell_rle(&unpack)) == 1)
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index 22effc47553..1b8df0008b9 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -390,8 +390,7 @@ restart_read: /* Find the matching WT_COL slot. */
* information.
*/
if (cbt->cip_saved != cip) {
- if ((cell = WT_COL_PTR(page, cip)) == NULL)
- continue;
+ cell = WT_COL_PTR(page, cip);
__wt_cell_unpack(session, page, cell, &unpack);
if (unpack.type == WT_CELL_DEL) {
if (__wt_cell_rle(&unpack) == 1)
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index a6645608150..e75432f7836 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -328,8 +328,8 @@ __wt_cursor_valid(WT_CURSOR_BTREE *cbt, WT_UPDATE **updp, bool *valid)
* when read.
*/
cip = &page->pg_var[cbt->slot];
- if ((cell = WT_COL_PTR(page, cip)) == NULL ||
- __wt_cell_type(cell) == WT_CELL_DEL)
+ cell = WT_COL_PTR(page, cip);
+ if (__wt_cell_type(cell) == WT_CELL_DEL)
return (0);
break;
case BTREE_ROW:
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index 8d1ed01377c..86d00c18300 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -993,13 +993,9 @@ __debug_page_col_var(WT_DBG *ds, WT_REF *ref)
recno = ref->ref_recno;
WT_COL_FOREACH(page, cip, i) {
- if ((cell = WT_COL_PTR(page, cip)) == NULL) {
- unpack = NULL;
- rle = 1;
- } else {
- __wt_cell_unpack(ds->session, page, cell, unpack);
- rle = __wt_cell_rle(unpack);
- }
+ cell = WT_COL_PTR(page, cip);
+ __wt_cell_unpack(ds->session, page, cell, unpack);
+ rle = __wt_cell_rle(unpack);
WT_RET(__wt_snprintf(
tag, sizeof(tag), "%" PRIu64 " %" PRIu64, recno, rle));
WT_RET(
@@ -1339,7 +1335,8 @@ __debug_cell(WT_DBG *ds, const WT_PAGE_HEADER *dsk, WT_CELL_UNPACK *unpack)
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
__wt_timestamp_to_string(unpack->oldest_start_ts, ts_string[0]);
- __wt_timestamp_to_string(unpack->newest_start_ts, ts_string[1]);
+ __wt_timestamp_to_string(
+ unpack->newest_durable_ts, ts_string[1]);
__wt_timestamp_to_string(unpack->newest_stop_ts, ts_string[2]);
WT_RET(ds->f(ds,
", ts %s,%s,%s", ts_string[0], ts_string[1], ts_string[2]));
diff --git a/src/third_party/wiredtiger/src/btree/bt_rebalance.c b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
index 46dc96aedce..c04135ee82d 100644
--- a/src/third_party/wiredtiger/src/btree/bt_rebalance.c
+++ b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
@@ -57,9 +57,9 @@ __rebalance_discard(WT_SESSION_IMPL *session, WT_REBALANCE_STUFF *rs)
* Add a new entry to the list of leaf pages.
*/
static int
-__rebalance_leaf_append(WT_SESSION_IMPL *session,
- const uint8_t *key, size_t key_len,
- WT_CELL_UNPACK *unpack, WT_REBALANCE_STUFF *rs)
+__rebalance_leaf_append(WT_SESSION_IMPL *session, wt_timestamp_t durable_ts,
+ const uint8_t *key, size_t key_len, WT_CELL_UNPACK *unpack,
+ WT_REBALANCE_STUFF *rs)
{
WT_ADDR *copy_addr;
WT_REF *copy;
@@ -80,7 +80,7 @@ __rebalance_leaf_append(WT_SESSION_IMPL *session,
WT_RET(__wt_calloc_one(session, &copy_addr));
copy->addr = copy_addr;
copy_addr->oldest_start_ts = unpack->oldest_start_ts;
- copy_addr->newest_start_ts = unpack->newest_start_ts;
+ copy_addr->newest_durable_ts = durable_ts;
copy_addr->newest_stop_ts = unpack->newest_stop_ts;
WT_RET(__wt_memdup(
session, unpack->data, unpack->size, &copy_addr->addr));
@@ -194,8 +194,8 @@ __rebalance_free_original(WT_SESSION_IMPL *session, WT_REBALANCE_STUFF *rs)
* Walk a column-store page and its descendants.
*/
static int
-__rebalance_col_walk(
- WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs)
+__rebalance_col_walk(WT_SESSION_IMPL *session, wt_timestamp_t durable_ts,
+ const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs)
{
WT_BTREE *btree;
WT_CELL_UNPACK unpack;
@@ -221,7 +221,8 @@ __rebalance_col_walk(
/* An internal page: read it and recursively walk it. */
WT_ERR(__wt_bt_read(
session, buf, unpack.data, unpack.size));
- WT_ERR(__rebalance_col_walk(session, buf->data, rs));
+ WT_ERR(__rebalance_col_walk(
+ session, unpack.newest_durable_ts, buf->data, rs));
__wt_verbose(session, WT_VERB_REBALANCE,
"free-list append internal page: %s",
__wt_addr_string(
@@ -232,7 +233,7 @@ __rebalance_col_walk(
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
WT_ERR(__rebalance_leaf_append(
- session, NULL, 0, &unpack, rs));
+ session, durable_ts, NULL, 0, &unpack, rs));
break;
WT_ILLEGAL_VALUE_ERR(session, unpack.type);
}
@@ -273,8 +274,8 @@ __rebalance_row_leaf_key(WT_SESSION_IMPL *session,
* Walk a row-store page and its descendants.
*/
static int
-__rebalance_row_walk(
- WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs)
+__rebalance_row_walk(WT_SESSION_IMPL *session, wt_timestamp_t durable_ts,
+ const WT_PAGE_HEADER *dsk, WT_REBALANCE_STUFF *rs)
{
WT_BTREE *btree;
WT_CELL_UNPACK key, unpack;
@@ -347,7 +348,8 @@ __rebalance_row_walk(
/* Read and recursively walk the page. */
WT_ERR(__wt_bt_read(
session, buf, unpack.data, unpack.size));
- WT_ERR(__rebalance_row_walk(session, buf->data, rs));
+ WT_ERR(__rebalance_row_walk(
+ session, unpack.newest_durable_ts, buf->data, rs));
break;
case WT_CELL_ADDR_LEAF:
case WT_CELL_ADDR_LEAF_NO:
@@ -376,7 +378,7 @@ __rebalance_row_walk(
len = key.size;
}
WT_ERR(__rebalance_leaf_append(
- session, p, len, &unpack, rs));
+ session, durable_ts, p, len, &unpack, rs));
first_cell = false;
break;
@@ -399,17 +401,19 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[])
WT_BTREE *btree;
WT_DECL_RET;
WT_REBALANCE_STUFF *rs, _rstuff;
+ WT_REF *ref;
WT_UNUSED(cfg);
btree = S2BT(session);
+ ref = &btree->root;
/*
* If the tree has never been written to disk, we're done, rebalance
* walks disk images, not in-memory pages. For the same reason, the
* tree has to be clean.
*/
- if (btree->root.page->dsk == NULL)
+ if (ref->page->dsk == NULL)
return (0);
if (btree->modified)
WT_RET_MSG(session, EINVAL,
@@ -422,17 +426,22 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_scr_alloc(session, 0, &rs->tmp2));
/* Set the internal page tree type. */
- rs->type = btree->root.page->type;
+ rs->type = ref->page->type;
- /* Recursively walk the tree. */
+ /*
+ * Recursively walk the tree. We start with a durable timestamp, but
+ * it should never be used (we'll accumulate durable timestamps from
+ * all the internal pages in our final write), so set it to something
+ * impossible.
+ */
switch (rs->type) {
case WT_PAGE_ROW_INT:
- WT_ERR(
- __rebalance_row_walk(session, btree->root.page->dsk, rs));
+ WT_ERR(__rebalance_row_walk(
+ session, WT_TS_MAX, ref->page->dsk, rs));
break;
case WT_PAGE_COL_INT:
- WT_ERR(
- __rebalance_col_walk(session, btree->root.page->dsk, rs));
+ WT_ERR(__rebalance_col_walk(
+ session, WT_TS_MAX, ref->page->dsk, rs));
break;
WT_ILLEGAL_VALUE_ERR(session, rs->type);
}
@@ -450,8 +459,8 @@ __wt_bt_rebalance(WT_SESSION_IMPL *session, const char *cfg[])
* Swap the old root page for our newly built root page, writing the new
* root page as part of a checkpoint will finish the rebalance.
*/
- __wt_page_out(session, &btree->root.page);
- btree->root.page = rs->root;
+ __wt_page_out(session, &ref->page);
+ ref->page = rs->root;
rs->root = NULL;
err: /* Discard any leftover root page we created. */
diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c
index a03cfb6405d..08f7c424d6c 100644
--- a/src/third_party/wiredtiger/src/btree/bt_slvg.c
+++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c
@@ -75,7 +75,6 @@ struct __wt_track {
#define trk_addr shared->addr.addr
#define trk_addr_size shared->addr.size
#define trk_oldest_start_ts shared->addr.oldest_start_ts
-#define trk_newest_start_ts shared->addr.newest_start_ts
#define trk_newest_stop_ts shared->addr.newest_stop_ts
#define trk_gen shared->gen
#define trk_ovfl_addr shared->ovfl_addr
@@ -505,10 +504,9 @@ __slvg_trk_init(WT_SESSION_IMPL *session,
trk->trk_addr_size = (uint8_t)addr_size;
trk->trk_size = dsk->mem_size;
trk->trk_oldest_start_ts = WT_TS_MAX;
- trk->trk_newest_start_ts = trk->trk_newest_stop_ts = WT_TS_NONE;
+ trk->trk_newest_stop_ts = WT_TS_NONE;
if (!__wt_process.page_version_ts || dsk->type == WT_PAGE_COL_FIX) {
- trk->trk_oldest_start_ts =
- trk->trk_newest_start_ts = WT_TS_NONE;
+ trk->trk_oldest_start_ts = WT_TS_NONE;
trk->trk_newest_stop_ts = WT_TS_MAX;
}
trk->trk_gen = dsk->write_gen;
@@ -665,8 +663,6 @@ __slvg_trk_leaf_ts(WT_TRACK *trk, WT_CELL_UNPACK *unpack)
{
trk->trk_oldest_start_ts =
WT_MIN(unpack->start_ts, trk->trk_oldest_start_ts);
- trk->trk_newest_start_ts =
- WT_MAX(unpack->start_ts, trk->trk_newest_start_ts);
trk->trk_newest_stop_ts =
WT_MAX(unpack->stop_ts, trk->trk_newest_stop_ts);
}
@@ -1070,8 +1066,6 @@ merge:
*/
a_trk->trk_oldest_start_ts = b_trk->trk_oldest_start_ts =
WT_MIN(a_trk->trk_oldest_start_ts, b_trk->trk_oldest_start_ts);
- a_trk->trk_newest_start_ts = b_trk->trk_newest_start_ts =
- WT_MAX(a_trk->trk_newest_start_ts, b_trk->trk_newest_start_ts);
a_trk->trk_newest_stop_ts = b_trk->trk_newest_stop_ts =
WT_MAX(a_trk->trk_newest_stop_ts, b_trk->trk_newest_stop_ts);
__wt_verbose(session, WT_VERB_SALVAGE,
@@ -1203,9 +1197,13 @@ __slvg_col_build_internal(
ref->home = page;
ref->page = NULL;
+ /*
+ * Salvage doesn't read tree internal pages, so all pages are
+ * immediately durable, regardless of the leaf page timestamps.
+ */
WT_ERR(__wt_calloc_one(session, &addr));
addr->oldest_start_ts = trk->trk_oldest_start_ts;
- addr->newest_start_ts = trk->trk_newest_start_ts;
+ addr->newest_durable_ts = WT_TS_NONE;
addr->newest_stop_ts = trk->trk_newest_stop_ts;
WT_ERR(__wt_memdup(
session, trk->trk_addr, trk->trk_addr_size, &addr->addr));
@@ -1726,8 +1724,6 @@ merge:
*/
a_trk->trk_oldest_start_ts = b_trk->trk_oldest_start_ts =
WT_MIN(a_trk->trk_oldest_start_ts, b_trk->trk_oldest_start_ts);
- a_trk->trk_newest_start_ts = b_trk->trk_newest_start_ts =
- WT_MAX(a_trk->trk_newest_start_ts, b_trk->trk_newest_start_ts);
a_trk->trk_newest_stop_ts = b_trk->trk_newest_stop_ts =
WT_MAX(a_trk->trk_newest_stop_ts, b_trk->trk_newest_stop_ts);
__wt_verbose(session, WT_VERB_SALVAGE,
@@ -1875,9 +1871,13 @@ __slvg_row_build_internal(
ref->home = page;
ref->page = NULL;
+ /*
+ * Salvage doesn't read tree internal pages, so all pages are
+ * immediately durable, regardless of the leaf page timestamps.
+ */
WT_ERR(__wt_calloc_one(session, &addr));
addr->oldest_start_ts = trk->trk_oldest_start_ts;
- addr->newest_start_ts = trk->trk_newest_start_ts;
+ addr->newest_durable_ts = WT_TS_NONE;
addr->newest_stop_ts = trk->trk_newest_stop_ts;
WT_ERR(__wt_memdup(
session, trk->trk_addr, trk->trk_addr_size, &addr->addr));
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index f0407ce71b1..127c307b9ab 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -264,7 +264,7 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
session, from_home, (WT_CELL *)ref_addr, &unpack);
WT_RET(__wt_calloc_one(session, &addr));
addr->oldest_start_ts = unpack.oldest_start_ts;
- addr->newest_start_ts = unpack.newest_start_ts;
+ addr->newest_durable_ts = unpack.newest_durable_ts;
addr->newest_stop_ts = unpack.newest_stop_ts;
WT_ERR(__wt_memdup(
session, unpack.data, unpack.size, &addr->addr));
@@ -1675,7 +1675,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
WT_RET(__wt_calloc_one(session, &addr));
ref->addr = addr;
addr->oldest_start_ts = multi->addr.oldest_start_ts;
- addr->newest_start_ts = multi->addr.newest_start_ts;
+ addr->newest_durable_ts = multi->addr.newest_durable_ts;
addr->newest_stop_ts = multi->addr.newest_stop_ts;
WT_RET(__wt_memdup(session,
multi->addr.addr, multi->addr.size, &addr->addr));
diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c
index c201d9af73a..976a771a233 100644
--- a/src/third_party/wiredtiger/src/btree/bt_stat.c
+++ b/src/third_party/wiredtiger/src/btree/bt_stat.c
@@ -160,21 +160,18 @@ __stat_page_col_var(
* we see.
*/
WT_COL_FOREACH(page, cip, i) {
- if ((cell = WT_COL_PTR(page, cip)) == NULL) {
+ cell = WT_COL_PTR(page, cip);
+ __wt_cell_unpack(session, page, cell, unpack);
+ if (unpack->type == WT_CELL_DEL) {
orig_deleted = true;
- ++deleted_cnt;
+ deleted_cnt += __wt_cell_rle(unpack);
} else {
orig_deleted = false;
- __wt_cell_unpack(session, page, cell, unpack);
- if (unpack->type == WT_CELL_DEL)
- orig_deleted = true;
- else {
- entry_cnt += __wt_cell_rle(unpack);
- rle_cnt += __wt_cell_rle(unpack) - 1;
- }
- if (unpack->ovfl)
- ++ovfl_cnt;
+ entry_cnt += __wt_cell_rle(unpack);
}
+ rle_cnt += __wt_cell_rle(unpack) - 1;
+ if (unpack->ovfl)
+ ++ovfl_cnt;
/*
* Walk the insert list, checking for changes. For each insert
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index 1a412ace8f9..f85389bbe81 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -237,7 +237,7 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[])
*/
memset(&addr_unpack, 0, sizeof(addr_unpack));
addr_unpack.oldest_start_ts = ckpt->oldest_start_ts;
- addr_unpack.newest_start_ts = ckpt->newest_start_ts;
+ addr_unpack.newest_durable_ts = ckpt->newest_durable_ts;
addr_unpack.newest_stop_ts = ckpt->newest_stop_ts;
addr_unpack.raw = WT_CELL_ADDR_INT;
@@ -331,14 +331,9 @@ __verify_addr_ts(WT_SESSION_IMPL *session,
"internal page reference at %s has a newest stop "
"timestamp of 0",
__wt_page_addr_string(session, ref, vs->tmp1));
- if (unpack->oldest_start_ts > unpack->newest_start_ts)
+ if (unpack->oldest_start_ts > unpack->newest_stop_ts)
WT_RET_MSG(session, WT_ERROR,
"internal page reference at %s has an oldest start "
- "timestamp newer than its newest start timestamp",
- __wt_page_addr_string(session, ref, vs->tmp1));
- if (unpack->newest_start_ts > unpack->newest_stop_ts)
- WT_RET_MSG(session, WT_ERROR,
- "internal page reference at %s has a newest start "
"timestamp newer than its newest stop timestamp",
__wt_page_addr_string(session, ref, vs->tmp1));
return (0);
@@ -448,13 +443,11 @@ recno_chk: if (recno != vs->record_total + 1)
break;
case WT_PAGE_COL_VAR:
recno = 0;
- WT_COL_FOREACH(page, cip, i)
- if ((cell = WT_COL_PTR(page, cip)) == NULL)
- ++recno;
- else {
- __wt_cell_unpack(session, page, cell, unpack);
- recno += __wt_cell_rle(unpack);
- }
+ WT_COL_FOREACH(page, cip, i) {
+ cell = WT_COL_PTR(page, cip);
+ __wt_cell_unpack(session, page, cell, unpack);
+ recno += __wt_cell_rle(unpack);
+ }
vs->record_total += recno;
break;
}
@@ -745,7 +738,7 @@ __verify_ts_addr_cmp(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t cell_num,
bool gt, WT_VSTUFF *vs)
{
const char *ts1_bp, *ts2_bp;
- char ts1_buf[32], ts2_buf[32];
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
if (gt && ts1 >= ts2)
return (0);
@@ -760,9 +753,8 @@ __verify_ts_addr_cmp(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t cell_num,
ts1_bp = "WT_TS_NONE";
break;
default:
- WT_RET(
- __wt_snprintf(ts1_buf, sizeof(ts1_buf), "%" PRIu64, ts1));
- ts1_bp = ts1_buf;
+ __wt_timestamp_to_string(ts1, ts_string[0]);
+ ts1_bp = ts_string[0];
break;
}
switch (ts2) {
@@ -773,14 +765,13 @@ __verify_ts_addr_cmp(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t cell_num,
ts2_bp = "WT_TS_NONE";
break;
default:
- WT_RET(
- __wt_snprintf(ts2_buf, sizeof(ts2_buf), "%" PRIu64, ts2));
- ts2_bp = ts2_buf;
+ __wt_timestamp_to_string(ts2, ts_string[1]);
+ ts2_bp = ts_string[1];
break;
}
WT_RET_MSG(session, WT_ERROR,
"cell %" PRIu32 " on page at %s failed verification with %s "
- "time of %s, %s the parent's %s time of %s",
+ "timestamp of %s, %s the parent's %s timestamp of %s",
cell_num,
__wt_page_addr_string(session, ref, vs->tmp1),
ts1_name, ts1_bp,
@@ -801,6 +792,7 @@ __verify_page_cell(WT_SESSION_IMPL *session,
WT_DECL_RET;
const WT_PAGE_HEADER *dsk;
uint32_t cell_num;
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
bool found_ovfl;
/*
@@ -851,30 +843,27 @@ __verify_page_cell(WT_SESSION_IMPL *session,
cell_num - 1,
__wt_page_addr_string(
session, ref, vs->tmp1));
- if (unpack.oldest_start_ts > unpack.newest_start_ts)
+ if (unpack.oldest_start_ts > unpack.newest_stop_ts) {
+ __wt_timestamp_to_string(
+ unpack.oldest_start_ts, ts_string[0]);
+ __wt_timestamp_to_string(
+ unpack.newest_stop_ts, ts_string[1]);
WT_RET_MSG(session, WT_ERROR,
"cell %" PRIu32 " on page at %s has an "
- "oldest start timestamp newer than its "
- "newest start timestamp",
- cell_num - 1,
- __wt_page_addr_string(
- session, ref, vs->tmp1));
- if (unpack.newest_start_ts > unpack.newest_stop_ts)
- WT_RET_MSG(session, WT_ERROR,
- "cell %" PRIu32 " on page at %s has a "
- "newest start timestamp newer than its "
- "newest stop timestamp",
+ "oldest start timestamp %s newer than "
+ "its newest stop timestamp %s",
cell_num - 1,
- __wt_page_addr_string(
- session, ref, vs->tmp1));
+ __wt_page_addr_string(session,
+ ref, vs->tmp1), ts_string[0], ts_string[1]);
+ }
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1,
"oldest start", unpack.oldest_start_ts,
"oldest start", addr_unpack->oldest_start_ts,
true, vs));
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1,
- "newest start", unpack.newest_start_ts,
- "newest start", addr_unpack->newest_start_ts,
+ "newest durable", unpack.newest_durable_ts,
+ "newest durable", addr_unpack->newest_durable_ts,
false, vs));
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1,
"newest stop", unpack.newest_stop_ts,
@@ -893,24 +882,25 @@ __verify_page_cell(WT_SESSION_IMPL *session,
cell_num - 1,
__wt_page_addr_string(
session, ref, vs->tmp1));
- if (unpack.start_ts > unpack.stop_ts)
+ if (unpack.start_ts > unpack.stop_ts) {
+ __wt_timestamp_to_string(
+ unpack.start_ts, ts_string[0]);
+ __wt_timestamp_to_string(
+ unpack.stop_ts, ts_string[1]);
WT_RET_MSG(session, WT_ERROR,
"cell %" PRIu32 " on page at %s has a "
- "start timestamp newer than its stop "
- "timestamp ",
+ "start timestamp %s newer than its stop "
+ "timestamp %s",
cell_num - 1,
- __wt_page_addr_string(
- session, ref, vs->tmp1));
+ __wt_page_addr_string(session,
+ ref, vs->tmp1), ts_string[0], ts_string[1]);
+ }
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1,
"start", unpack.start_ts,
"oldest start", addr_unpack->oldest_start_ts,
true, vs));
WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1,
- "start", unpack.start_ts,
- "newest start", addr_unpack->newest_start_ts,
- false, vs));
- WT_RET(__verify_ts_addr_cmp(session, ref, cell_num - 1,
"stop", unpack.stop_ts,
"newest stop", addr_unpack->newest_stop_ts,
false, vs));
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
index ee6cd904aec..24d6d22f1ef 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy_dsk.c
@@ -222,7 +222,7 @@ __verify_dsk_ts_addr_cmp(WT_SESSION_IMPL *session, uint32_t cell_num,
bool gt, const char *tag)
{
const char *ts1_bp, *ts2_bp;
- char ts1_buf[32], ts2_buf[32];
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
if (gt && ts1 >= ts2)
return (0);
@@ -237,9 +237,8 @@ __verify_dsk_ts_addr_cmp(WT_SESSION_IMPL *session, uint32_t cell_num,
ts1_bp = "WT_TS_NONE";
break;
default:
- WT_RET(
- __wt_snprintf(ts1_buf, sizeof(ts1_buf), "%" PRIu64, ts1));
- ts1_bp = ts1_buf;
+ __wt_timestamp_to_string(ts1, ts_string[0]);
+ ts1_bp = ts_string[0];
break;
}
switch (ts2) {
@@ -250,14 +249,13 @@ __verify_dsk_ts_addr_cmp(WT_SESSION_IMPL *session, uint32_t cell_num,
ts2_bp = "WT_TS_NONE";
break;
default:
- WT_RET(
- __wt_snprintf(ts2_buf, sizeof(ts2_buf), "%" PRIu64, ts2));
- ts2_bp = ts2_buf;
+ __wt_timestamp_to_string(ts2, ts_string[1]);
+ ts2_bp = ts_string[1];
break;
}
WT_RET_MSG(session, WT_ERROR,
"cell %" PRIu32 " on page at %s failed verification with %s "
- "time of %s, %s the parent's %s time of %s",
+ "timestamp of %s, %s the parent's %s timestamp of %s",
cell_num, tag,
ts1_name, ts1_bp,
gt ? "less than" : "greater than",
@@ -272,6 +270,8 @@ static int
__verify_dsk_ts(WT_SESSION_IMPL *session,
WT_CELL_UNPACK *unpack, uint32_t cell_num, WT_ADDR *addr, const char *tag)
{
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+
/*
* Check timestamp order, and optionally, against a parent address.
* Timestamps in the parent address aren't necessarily an exact match,
@@ -291,17 +291,17 @@ __verify_dsk_ts(WT_SESSION_IMPL *session,
"cell %" PRIu32 " on page at %s has a newest stop "
"timestamp of 0",
cell_num - 1, tag);
- if (unpack->oldest_start_ts > unpack->newest_start_ts)
+ if (unpack->oldest_start_ts > unpack->newest_stop_ts) {
+ __wt_timestamp_to_string(
+ unpack->oldest_start_ts, ts_string[0]);
+ __wt_timestamp_to_string(
+ unpack->newest_stop_ts, ts_string[1]);
WT_RET_VRFY(session,
"cell %" PRIu32 " on page at %s has an oldest "
- "start timestamp newer than its newest start "
- "timestamp",
- cell_num - 1, tag);
- if (unpack->newest_start_ts > unpack->newest_stop_ts)
- WT_RET_VRFY(session,
- "cell %" PRIu32 " on page at %s has a newest start "
- "timestamp newer than its newest stop timestamp",
- cell_num - 1, tag);
+ "start timestamp %s newer than its newest stop "
+ "timestamp %s",
+ cell_num - 1, tag, ts_string[0], ts_string[1]);
+ }
if (addr == NULL)
break;
@@ -310,8 +310,8 @@ __verify_dsk_ts(WT_SESSION_IMPL *session,
"oldest start", addr->oldest_start_ts,
true, tag));
WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1,
- "newest start", unpack->newest_start_ts,
- "newest start", addr->newest_start_ts,
+ "newest durable", unpack->newest_durable_ts,
+ "newest durable", addr->newest_durable_ts,
false, tag));
WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1,
"newest stop", unpack->newest_stop_ts,
@@ -329,11 +329,16 @@ __verify_dsk_ts(WT_SESSION_IMPL *session,
"cell %" PRIu32 " on page at %s has a stop "
"timestamp of 0",
cell_num - 1, tag);
- if (unpack->start_ts > unpack->stop_ts)
+ if (unpack->start_ts > unpack->stop_ts) {
+ __wt_timestamp_to_string(
+ unpack->start_ts, ts_string[0]);
+ __wt_timestamp_to_string(
+ unpack->stop_ts, ts_string[0]);
WT_RET_VRFY(session,
"cell %" PRIu32 " on page at %s has a start "
- "timestamp newer than its stop timestamp ",
- cell_num - 1, tag);
+ "timestamp %s newer than its stop timestamp %s",
+ cell_num - 1, tag, ts_string[0], ts_string[1]);
+ }
if (addr == NULL)
break;
@@ -342,10 +347,6 @@ __verify_dsk_ts(WT_SESSION_IMPL *session,
"oldest start", addr->oldest_start_ts,
true, tag));
WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1,
- "start", unpack->start_ts,
- "newest start", addr->newest_start_ts,
- false, tag));
- WT_RET(__verify_dsk_ts_addr_cmp(session, cell_num - 1,
"stop", unpack->stop_ts,
"newest stop", addr->newest_stop_ts,
false, tag));
diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c
index 905c24dceae..06e0056613c 100644
--- a/src/third_party/wiredtiger/src/cache/cache_las.c
+++ b/src/third_party/wiredtiger/src/cache/cache_las.c
@@ -1156,12 +1156,9 @@ __wt_las_sweep(WT_SESSION_IMPL *session)
(prepare_state != WT_PREPARE_INPROGRESS ||
durable_timestamp == 0));
- /*
- * FIXME Disable this assertion until fixed by WT-4598.
- * WT_ASSERT(session,
- * (prepare_state == WT_PREPARE_INPROGRESS ||
- * durable_timestamp >= las_timestamp));
- */
+ WT_ASSERT(session,
+ (prepare_state == WT_PREPARE_INPROGRESS ||
+ durable_timestamp >= las_timestamp));
/*
* There are several conditions that need to be met
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 09a52300f3b..e56e7f29004 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -287,7 +287,6 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_begin_transaction[] = {
{ "name", "string", NULL, NULL, NULL, 0 },
{ "priority", "int", NULL, "min=-100,max=100", NULL, 0 },
{ "read_timestamp", "string", NULL, NULL, NULL, 0 },
- { "round_to_oldest", "boolean", NULL, NULL, NULL, 0 },
{ "roundup_timestamps", "category",
NULL, NULL,
confchk_WT_SESSION_begin_transaction_roundup_timestamps_subconfigs, 2 },
@@ -533,7 +532,6 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_timestamp_transaction[] = {
{ "durable_timestamp", "string", NULL, NULL, NULL, 0 },
{ "prepare_timestamp", "string", NULL, NULL, NULL, 0 },
{ "read_timestamp", "string", NULL, NULL, NULL, 0 },
- { "round_to_oldest", "boolean", NULL, NULL, NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
};
@@ -1371,9 +1369,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
},
{ "WT_SESSION.begin_transaction",
"ignore_prepare=false,isolation=,name=,priority=0,read_timestamp="
- ",round_to_oldest=false,roundup_timestamps=(prepared=false,"
- "read=false),snapshot=,sync=",
- confchk_WT_SESSION_begin_transaction, 9
+ ",roundup_timestamps=(prepared=false,read=false),snapshot=,sync=",
+ confchk_WT_SESSION_begin_transaction, 8
},
{ "WT_SESSION.checkpoint",
"drop=,force=false,name=,target=,use_timestamp=true",
@@ -1482,8 +1479,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
},
{ "WT_SESSION.timestamp_transaction",
"commit_timestamp=,durable_timestamp=,prepare_timestamp=,"
- "read_timestamp=,round_to_oldest=false",
- confchk_WT_SESSION_timestamp_transaction, 5
+ "read_timestamp=",
+ confchk_WT_SESSION_timestamp_transaction, 4
},
{ "WT_SESSION.transaction_sync",
"timeout_ms=1200000",
diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c
index 8bc111346c5..81f5724663f 100644
--- a/src/third_party/wiredtiger/src/conn/conn_log.c
+++ b/src/third_party/wiredtiger/src/conn/conn_log.c
@@ -343,6 +343,28 @@ __wt_logmgr_reconfig(WT_SESSION_IMPL *session, const char **cfg)
}
/*
+ * __log_archive_once_int --
+ * Helper for __log_archive_once. Intended to be called while holding the
+ * hot backup read lock.
+ */
+static int
+__log_archive_once_int(WT_SESSION_IMPL *session,
+ char **logfiles, u_int logcount, uint32_t min_lognum)
+{
+ uint32_t lognum;
+ u_int i;
+
+ for (i = 0; i < logcount; i++) {
+ WT_RET(__wt_log_extract_lognum(session, logfiles[i], &lognum));
+ if (lognum < min_lognum)
+ WT_RET(__wt_log_remove(
+ session, WT_LOG_FILENAME, lognum));
+ }
+
+ return (0);
+}
+
+/*
* __log_archive_once --
* Perform one iteration of log archiving. Must be called with the
* log archive lock held.
@@ -353,15 +375,13 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file)
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_LOG *log;
- uint32_t lognum, min_lognum;
- u_int i, logcount;
+ uint32_t min_lognum;
+ u_int logcount;
char **logfiles;
- bool locked;
conn = S2C(session);
log = conn->log;
logcount = 0;
- locked = false;
logfiles = NULL;
/*
@@ -386,22 +406,18 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file)
session, conn->log_path, WT_LOG_FILENAME, &logfiles, &logcount));
/*
- * We can only archive files if a hot backup is not in progress or
- * if we are the backup.
+ * If backup_file is non-zero we know we're coming from an incremental
+ * backup cursor. In that case just perform the archive operation
+ * without the lock.
*/
- __wt_readlock(session, &conn->hot_backup_lock);
- locked = true;
- if (!conn->hot_backup || backup_file != 0) {
- for (i = 0; i < logcount; i++) {
- WT_ERR(__wt_log_extract_lognum(
- session, logfiles[i], &lognum));
- if (lognum < min_lognum)
- WT_ERR(__wt_log_remove(
- session, WT_LOG_FILENAME, lognum));
- }
- }
- __wt_readunlock(session, &conn->hot_backup_lock);
- locked = false;
+ if (backup_file != 0)
+ ret = __log_archive_once_int(
+ session, logfiles, logcount, min_lognum);
+ else
+ WT_WITH_HOTBACKUP_READ_LOCK(session,
+ ret = __log_archive_once_int(
+ session, logfiles, logcount, min_lognum), NULL);
+ WT_ERR(ret);
/*
* Indicate what is our new earliest LSN. It is the start
@@ -411,8 +427,6 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file)
if (0)
err: __wt_err(session, ret, "log archive server error");
- if (locked)
- __wt_readunlock(session, &conn->hot_backup_lock);
WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
return (ret);
}
@@ -594,18 +608,15 @@ __log_file_server(void *arg)
* truncate: both are OK, it's just more work
* during cursor traversal.
*/
- if (!conn->hot_backup) {
- __wt_readlock(
- session, &conn->hot_backup_lock);
- if (!conn->hot_backup &&
- conn->log_cursors == 0)
- WT_ERR_ERROR_OK(
- __wt_ftruncate(session,
+ if (!conn->hot_backup &&
+ conn->log_cursors == 0) {
+ WT_WITH_HOTBACKUP_READ_LOCK(session,
+ WT_ERR_ERROR_OK(
+ __wt_ftruncate(
+ session,
close_fh,
close_end_lsn.l.offset),
- ENOTSUP);
- __wt_readunlock(
- session, &conn->hot_backup_lock);
+ ENOTSUP), NULL);
}
WT_SET_LSN(&close_end_lsn,
close_end_lsn.l.file + 1, 0);
@@ -976,11 +987,8 @@ __log_server(void *arg)
* agreed not to rename or remove any files in
* the database directory.
*/
- __wt_readlock(session, &conn->hot_backup_lock);
- if (!conn->hot_backup)
- ret = __log_prealloc_once(session);
- __wt_readunlock(
- session, &conn->hot_backup_lock);
+ WT_WITH_HOTBACKUP_READ_LOCK(session,
+ ret = __log_prealloc_once(session), NULL);
WT_ERR(ret);
}
diff --git a/src/third_party/wiredtiger/src/cursor/cur_backup.c b/src/third_party/wiredtiger/src/cursor/cur_backup.c
index 04882e527ce..9a279ca3970 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_backup.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_backup.c
@@ -265,10 +265,8 @@ __backup_start(WT_SESSION_IMPL *session,
* operations will not see the backup file list until it is
* complete and valid.
*/
- __wt_writelock(session, &conn->hot_backup_lock);
- conn->hot_backup = true;
- conn->hot_backup_list = NULL;
- __wt_writeunlock(session, &conn->hot_backup_lock);
+ WT_WITH_HOTBACKUP_WRITE_LOCK(session,
+ WT_CONN_HOTBACKUP_START(conn));
/* We're the lock holder, we own cleanup. */
F_SET(cb, WT_CURBACKUP_LOCKER);
@@ -368,9 +366,8 @@ err: /* Close the hot backup file. */
ret = __wt_sync_and_rename(session,
&cb->bfs, WT_BACKUP_TMP, dest);
if (ret == 0) {
- __wt_writelock(session, &conn->hot_backup_lock);
- conn->hot_backup_list = cb->list;
- __wt_writeunlock(session, &conn->hot_backup_lock);
+ WT_WITH_HOTBACKUP_WRITE_LOCK(session,
+ conn->hot_backup_list = cb->list);
F_SET(session, WT_SESSION_BACKUP_CURSOR);
}
/*
@@ -399,18 +396,14 @@ __backup_stop(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
WT_ASSERT(session, !F_ISSET(cb, WT_CURBACKUP_DUP));
/* If it's not a dup backup cursor, make sure one isn't open. */
WT_ASSERT(session, !F_ISSET(session, WT_SESSION_BACKUP_DUP));
- __wt_writelock(session, &conn->hot_backup_lock);
- conn->hot_backup_list = NULL;
- __wt_writeunlock(session, &conn->hot_backup_lock);
+ WT_WITH_HOTBACKUP_WRITE_LOCK(session, conn->hot_backup_list = NULL);
__backup_free(session, cb);
/* Remove any backup specific file. */
WT_TRET(__wt_backup_file_remove(session));
/* Checkpoint deletion and next hot backup can proceed. */
- __wt_writelock(session, &conn->hot_backup_lock);
- conn->hot_backup = false;
- __wt_writeunlock(session, &conn->hot_backup_lock);
+ WT_WITH_HOTBACKUP_WRITE_LOCK(session, conn->hot_backup = false);
F_CLR(session, WT_SESSION_BACKUP_CURSOR);
return (ret);
diff --git a/src/third_party/wiredtiger/src/cursor/cur_index.c b/src/third_party/wiredtiger/src/cursor/cur_index.c
index baabcd0182c..ee0d57037eb 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_index.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_index.c
@@ -544,8 +544,8 @@ __wt_curindex_open(WT_SESSION_IMPL *session,
WT_ERR(__curindex_open_colgroups(session, cindex, cfg));
if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
- __wt_json_column_init(cursor, uri, table->key_format,
- &idx->colconf, &table->colconf);
+ WT_ERR(__wt_json_column_init(cursor, uri, table->key_format,
+ &idx->colconf, &table->colconf));
if (0) {
err: WT_TRET(__curindex_close(cursor));
diff --git a/src/third_party/wiredtiger/src/cursor/cur_json.c b/src/third_party/wiredtiger/src/cursor/cur_json.c
index 21716005c27..f540775180e 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_json.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_json.c
@@ -309,6 +309,8 @@ __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
if ((json = (WT_CURSOR_JSON *)cursor->json_private) != NULL) {
__wt_free(session, json->key_buf);
__wt_free(session, json->value_buf);
+ __wt_free(session, json->key_names.str);
+ __wt_free(session, json->value_names.str);
__wt_free(session, json);
}
}
@@ -373,21 +375,26 @@ __wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode)
* Set json_key_names, json_value_names to comma separated lists
* of column names.
*/
-void
+int
__wt_json_column_init(WT_CURSOR *cursor, const char *uri, const char *keyformat,
const WT_CONFIG_ITEM *idxconf, const WT_CONFIG_ITEM *colconf)
{
WT_CURSOR_JSON *json;
+ WT_SESSION_IMPL *session;
+ size_t len;
uint32_t keycnt, nkeys;
const char *beginkey, *end, *lparen, *p;
json = (WT_CURSOR_JSON *)cursor->json_private;
+ session = (WT_SESSION_IMPL *)cursor->session;
beginkey = colconf->str;
end = beginkey + colconf->len;
if (idxconf != NULL) {
- json->key_names.str = idxconf->str;
- json->key_names.len = idxconf->len;
+ len = idxconf->len;
+ WT_RET(__wt_strndup(session, idxconf->str, len,
+ &json->key_names.str));
+ json->key_names.len = len;
} else if (colconf->len > 0 && *beginkey == '(') {
beginkey++;
if (end[-1] == ')')
@@ -407,20 +414,25 @@ __wt_json_column_init(WT_CURSOR *cursor, const char *uri, const char *keyformat,
}
if ((lparen = strchr(uri, '(')) != NULL) {
/* This cursor is a projection. */
- json->value_names.str = lparen;
- json->value_names.len = strlen(lparen) - 1;
- WT_ASSERT((WT_SESSION_IMPL *)cursor->session,
- json->value_names.str[json->value_names.len] == ')');
+ len = strlen(lparen) - 1;
+ WT_ASSERT(session, lparen[len] == ')');
+ WT_RET(__wt_strndup(session, lparen, len,
+ &json->value_names.str));
+ json->value_names.len = len;
} else {
- json->value_names.str = p;
- json->value_names.len = WT_PTRDIFF(end, p);
+ len = WT_PTRDIFF(end, p);
+ WT_RET(__wt_strndup(session, p, len, &json->value_names.str));
+ json->value_names.len = len;
}
if (idxconf == NULL) {
if (p > beginkey)
p--;
- json->key_names.str = beginkey;
- json->key_names.len = WT_PTRDIFF(p, beginkey);
+ len = WT_PTRDIFF(p, beginkey);
+ WT_RET(__wt_strndup(session, beginkey, len,
+ &json->key_names.str));
+ json->key_names.len = len;
}
+ return (0);
}
#define MATCH_KEYWORD(session, in, result, keyword, matchval) do { \
diff --git a/src/third_party/wiredtiger/src/cursor/cur_table.c b/src/third_party/wiredtiger/src/cursor/cur_table.c
index 77c6018778c..3198a15bd13 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_table.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_table.c
@@ -1050,8 +1050,8 @@ __wt_curtable_open(WT_SESSION_IMPL *session,
cursor, cursor->internal_uri, owner, cfg, cursorp));
if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON))
- __wt_json_column_init(
- cursor, uri, table->key_format, NULL, &table->colconf);
+ WT_ERR(__wt_json_column_init(
+ cursor, uri, table->key_format, NULL, &table->colconf));
/*
* Open the colgroup cursors immediately: we're going to need them for
diff --git a/src/third_party/wiredtiger/src/docs/tools/doxypy.py b/src/third_party/wiredtiger/src/docs/tools/doxypy.py
index 54fef5f03a5..f05a597ed6e 100755
--- a/src/third_party/wiredtiger/src/docs/tools/doxypy.py
+++ b/src/third_party/wiredtiger/src/docs/tools/doxypy.py
@@ -1,5 +1,7 @@
#!/usr/bin/env python
+from __future__ import print_function
+
__applicationName__ = "doxypy"
__blurb__ = """
doxypy is an input filter for Doxygen. It preprocesses python
@@ -86,7 +88,7 @@ class FSM(object):
self.current_input = input
self.current_transition = transition
if options.debug:
- print >>sys.stderr, "# FSM: executing (%s -> %s) for line '%s'" % (from_state, to_state, input)
+ print("# FSM: executing (%s -> %s) for line '%s'" % (from_state, to_state, input), file=sys.stderr)
callback(match)
return
@@ -208,8 +210,8 @@ class Doxypy(object):
if self.output:
try:
if options.debug:
- print >>sys.stderr, "# OUTPUT: ", self.output
- print >>self.outstream, "\n".join(self.output)
+ print("# OUTPUT: ", self.output, file=sys.stderr)
+ print("\n".join(self.output), file=self.outstream)
self.outstream.flush()
except IOError:
# Fix for FS#33. Catches "broken pipe" when doxygen closes
@@ -228,7 +230,7 @@ class Doxypy(object):
Closes the current commentblock and starts a new comment search.
"""
if options.debug:
- print >>sys.stderr, "# CALLBACK: resetCommentSearch"
+ print("# CALLBACK: resetCommentSearch" , file=sys.stderr)
self.__closeComment()
self.startCommentSearch(match)
@@ -239,7 +241,7 @@ class Doxypy(object):
the current indentation.
"""
if options.debug:
- print >>sys.stderr, "# CALLBACK: startCommentSearch"
+ print("# CALLBACK: startCommentSearch", file=sys.stderr)
self.defclass = [self.fsm.current_input]
self.comment = []
self.indent = match.group(1)
@@ -251,7 +253,7 @@ class Doxypy(object):
appends the current line to the output.
"""
if options.debug:
- print >>sys.stderr, "# CALLBACK: stopCommentSearch"
+ print("# CALLBACK: stopCommentSearch" , file=sys.stderr)
self.__closeComment()
self.defclass = []
@@ -263,7 +265,7 @@ class Doxypy(object):
Closes the open comment block, resets it and appends the current line.
"""
if options.debug:
- print >>sys.stderr, "# CALLBACK: appendFileheadLine"
+ print("# CALLBACK: appendFileheadLine" , file=sys.stderr)
self.__closeComment()
self.comment = []
self.output.append(self.fsm.current_input)
@@ -275,7 +277,7 @@ class Doxypy(object):
well as singleline comments.
"""
if options.debug:
- print >>sys.stderr, "# CALLBACK: appendCommentLine"
+ print("# CALLBACK: appendCommentLine" , file=sys.stderr)
(from_state, to_state, condition, callback) = self.fsm.current_transition
# single line comment
@@ -312,13 +314,13 @@ class Doxypy(object):
def appendNormalLine(self, match):
"""Appends a line to the output."""
if options.debug:
- print >>sys.stderr, "# CALLBACK: appendNormalLine"
+ print("# CALLBACK: appendNormalLine" , file=sys.stderr)
self.output.append(self.fsm.current_input)
def appendDefclassLine(self, match):
"""Appends a line to the triggering block."""
if options.debug:
- print >>sys.stderr, "# CALLBACK: appendDefclassLine"
+ print("# CALLBACK: appendDefclassLine" , file=sys.stderr)
self.defclass.append(self.fsm.current_input)
def makeCommentBlock(self):
@@ -397,7 +399,7 @@ def optParse():
(options, filename) = parser.parse_args()
if not filename:
- print >>sys.stderr, "No filename given."
+ print("No filename given.", file=sys.stderr)
sys.exit(-1)
return filename[0]
diff --git a/src/third_party/wiredtiger/src/docs/tools/fixlinks.py b/src/third_party/wiredtiger/src/docs/tools/fixlinks.py
index 59f8494ada3..1532118cd21 100755
--- a/src/third_party/wiredtiger/src/docs/tools/fixlinks.py
+++ b/src/third_party/wiredtiger/src/docs/tools/fixlinks.py
@@ -59,8 +59,8 @@ def process(source):
(m.group(0), m.group(1), m.group(1), m.group(2))), source)
# Replace "self, handle" with "self" -- these are typedef'ed away
- source = re.sub(r'(\s+#.*self),
- (?:connection|cursor|session)', r'\1', source)
+ source = re.sub(r'(\s+#.*self),' +
+ r'(?:connection|cursor|session)', r'\1', source)
return source
if __name__ == '__main__':
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index dc1bdc07419..c8f2221daa2 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -133,7 +133,7 @@ __wt_page_header_byteswap(WT_PAGE_HEADER *dsk)
*/
struct __wt_addr {
wt_timestamp_t oldest_start_ts; /* Aggregated timestamp information */
- wt_timestamp_t newest_start_ts;
+ wt_timestamp_t newest_durable_ts;
wt_timestamp_t newest_stop_ts;
uint8_t *addr; /* Block-manager's cookie */
@@ -990,8 +990,6 @@ struct __wt_col {
* of a base pointer. The on-page data is a WT_CELL (same as row-store
* pages).
*
- * If the value is 0, it's a single, deleted record.
- *
* Obscure the field name, code shouldn't use WT_COL->__col_value, the
* public interface is WT_COL_PTR and WT_COL_PTR_SET.
*/
@@ -1004,8 +1002,7 @@ struct __wt_col {
* not exist on the page, return a NULL.)
*/
#define WT_COL_PTR(page, cip) \
- ((cip)->__col_value == 0 ? \
- NULL : WT_PAGE_REF_OFFSET(page, (cip)->__col_value))
+ WT_PAGE_REF_OFFSET(page, (cip)->__col_value)
#define WT_COL_PTR_SET(cip, value) \
(cip)->__col_value = (value)
diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i
index 0bbe3283dee..260e2304034 100644
--- a/src/third_party/wiredtiger/src/include/cell.i
+++ b/src/third_party/wiredtiger/src/include/cell.i
@@ -147,7 +147,7 @@ struct __wt_cell_unpack {
/* Start/stop timestamps for a value */
wt_timestamp_t start_ts, stop_ts;
/* Aggregated timestamp information */
- wt_timestamp_t oldest_start_ts, newest_start_ts, newest_stop_ts;
+ wt_timestamp_t oldest_start_ts, newest_durable_ts, newest_stop_ts;
/*
* !!!
@@ -219,37 +219,50 @@ __cell_pack_timestamp_value(WT_SESSION_IMPL *session,
*/
static inline void
__wt_timestamp_addr_check(WT_SESSION_IMPL *session,
- wt_timestamp_t oldest_start_ts,
- wt_timestamp_t newest_start_ts, wt_timestamp_t newest_stop_ts)
+ wt_timestamp_t oldest_start_ts, wt_timestamp_t newest_stop_ts)
{
+#ifdef HAVE_DIAGNOSTIC
+ char ts_string[2][WT_TS_INT_STRING_SIZE];
+
+ if (newest_stop_ts == WT_TS_NONE) {
+ __wt_errx(session, "newest stop timestamp of 0");
+ WT_ASSERT(session, newest_stop_ts != WT_TS_NONE);
+ }
+ if (oldest_start_ts > newest_stop_ts) {
+ __wt_timestamp_to_string(oldest_start_ts, ts_string[0]);
+ __wt_timestamp_to_string(newest_stop_ts, ts_string[1]);
+ __wt_errx(session,
+ "an oldest start timestamp %s newer than its newest "
+ "stop timestamp %s",
+ ts_string[0], ts_string[1]);
+ WT_ASSERT(session, oldest_start_ts <= newest_stop_ts);
+ }
+#else
+ WT_UNUSED(session);
WT_UNUSED(oldest_start_ts);
- WT_UNUSED(newest_start_ts);
WT_UNUSED(newest_stop_ts);
-
- WT_ASSERT(session, newest_stop_ts != WT_TS_NONE);
- WT_ASSERT(session, oldest_start_ts <= newest_start_ts);
- WT_ASSERT(session, newest_start_ts <= newest_stop_ts);
+#endif
}
/*
* __cell_pack_timestamp_addr --
- * Pack a oldest_start, newest_start, newest_stop timestamp triplet for an
- * address.
+ * Pack a oldest_start, newest_durable_ts, newest_stop timestamp triplet
+ * for an address.
*/
static inline void
__cell_pack_timestamp_addr(WT_SESSION_IMPL *session,
uint8_t **pp, wt_timestamp_t oldest_start_ts,
- wt_timestamp_t newest_start_ts, wt_timestamp_t newest_stop_ts)
+ wt_timestamp_t newest_durable_ts, wt_timestamp_t newest_stop_ts)
{
- __wt_timestamp_addr_check(session,
- oldest_start_ts, newest_start_ts, newest_stop_ts);
+ __wt_timestamp_addr_check(session, oldest_start_ts, newest_stop_ts);
++*pp;
if (__wt_process.page_version_ts) {
/* Store differences, not absolutes. */
(void)__wt_vpack_uint(pp, 0, oldest_start_ts);
- (void)__wt_vpack_uint(pp, 0, newest_start_ts - oldest_start_ts);
- (void)__wt_vpack_uint(pp, 0, newest_stop_ts - newest_start_ts);
+ (void)__wt_vpack_uint(
+ pp, 0, newest_durable_ts - oldest_start_ts);
+ (void)__wt_vpack_uint(pp, 0, newest_stop_ts - oldest_start_ts);
}
}
@@ -260,8 +273,8 @@ __cell_pack_timestamp_addr(WT_SESSION_IMPL *session,
static inline size_t
__wt_cell_pack_addr(WT_SESSION_IMPL *session,
WT_CELL *cell, u_int cell_type, uint64_t recno,
- wt_timestamp_t oldest_start_ts,
- wt_timestamp_t newest_start_ts, wt_timestamp_t newest_stop_ts, size_t size)
+ wt_timestamp_t oldest_start_ts, wt_timestamp_t newest_durable_ts,
+ wt_timestamp_t newest_stop_ts, size_t size)
{
uint8_t *p;
@@ -270,7 +283,7 @@ __wt_cell_pack_addr(WT_SESSION_IMPL *session,
*p = '\0';
__cell_pack_timestamp_addr(session,
- &p, oldest_start_ts, newest_start_ts, newest_stop_ts);
+ &p, oldest_start_ts, newest_durable_ts, newest_stop_ts);
if (recno == WT_RECNO_OOB)
cell->__chunk[0] = (uint8_t)cell_type; /* Type */
@@ -728,10 +741,11 @@ __wt_cell_unpack_safe(WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk,
* the copied cell must be available from unpack after we return, as our
* caller has no way to find the copied cell).
*/
- WT_CELL_LEN_CHK(cell, 0);
unpack->cell = cell;
restart:
+ WT_CELL_LEN_CHK(cell, 0);
+
/*
* This path is performance critical for read-only trees, we're parsing
* on-page structures. For that reason we don't clear the unpacked cell
@@ -742,7 +756,7 @@ restart:
unpack->v = 0;
unpack->start_ts = WT_TS_NONE;
unpack->stop_ts = WT_TS_MAX;
- unpack->oldest_start_ts = unpack->newest_start_ts = WT_TS_NONE;
+ unpack->oldest_start_ts = unpack->newest_durable_ts = WT_TS_NONE;
unpack->newest_stop_ts = WT_TS_MAX;
unpack->raw = (uint8_t)__wt_cell_type_raw(cell);
unpack->type = (uint8_t)__wt_cell_type(cell);
@@ -798,15 +812,14 @@ restart:
WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 :
WT_PTRDIFF(end, p), &unpack->oldest_start_ts));
WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 :
- WT_PTRDIFF(end, p), &unpack->newest_start_ts));
- unpack->newest_start_ts += unpack->oldest_start_ts;
+ WT_PTRDIFF(end, p), &unpack->newest_durable_ts));
+ unpack->newest_durable_ts += unpack->oldest_start_ts;
WT_RET(__wt_vunpack_uint(&p, end == NULL ? 0 :
WT_PTRDIFF(end, p), &unpack->newest_stop_ts));
- unpack->newest_stop_ts += unpack->newest_start_ts;
+ unpack->newest_stop_ts += unpack->oldest_start_ts;
__wt_timestamp_addr_check(session,
- unpack->oldest_start_ts,
- unpack->newest_start_ts, unpack->newest_stop_ts);
+ unpack->oldest_start_ts, unpack->newest_stop_ts);
break;
case WT_CELL_DEL:
case WT_CELL_VALUE:
@@ -950,7 +963,7 @@ __wt_cell_unpack_dsk(WT_SESSION_IMPL *session,
* somewhere.
*
unpack->oldest_start_ts
- unpack->newest_start_ts
+ unpack->newest_durable_ts
unpack->newest_stop_ts
*/
unpack->data = "";
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index b6100ae134d..1f461a06137 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -148,6 +148,16 @@ struct __wt_named_extractor {
} while (0)
/*
+ * WT_CONN_HOTBACKUP_START --
+ * Macro to set connection data appropriately for when we commence hot
+ * backup.
+ */
+#define WT_CONN_HOTBACKUP_START(conn) do { \
+ conn->hot_backup = true; \
+ conn->hot_backup_list = NULL; \
+} while (0)
+
+/*
* WT_CONNECTION_IMPL --
* Implementation of WT_CONNECTION
*/
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 1ca81b0b4d9..d93deb0a361 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -318,7 +318,7 @@ extern int __wt_curjoin_join(WT_SESSION_IMPL *session, WT_CURSOR_JOIN *cjoin, WT
extern int __wt_json_alloc_unpack(WT_SESSION_IMPL *session, const void *buffer, size_t size, const char *fmt, WT_CURSOR_JSON *json, bool iskey, va_list ap) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_json_close(WT_SESSION_IMPL *session, WT_CURSOR *cursor);
extern size_t __wt_json_unpack_char(u_char ch, u_char *buf, size_t bufsz, bool force_unicode) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
-extern void __wt_json_column_init(WT_CURSOR *cursor, const char *uri, const char *keyformat, const WT_CONFIG_ITEM *idxconf, const WT_CONFIG_ITEM *colconf);
+extern int __wt_json_column_init(WT_CURSOR *cursor, const char *uri, const char *keyformat, const WT_CONFIG_ITEM *idxconf, const WT_CONFIG_ITEM *colconf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_json_token(WT_SESSION *wt_session, const char *src, int *toktype, const char **tokstart, size_t *toklen) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern const char *__wt_json_tokname(int toktype) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
extern int __wt_json_to_item(WT_SESSION_IMPL *session, const char *jstr, const char *format, WT_CURSOR_JSON *json, bool iskey, WT_ITEM *item) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -572,6 +572,7 @@ extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp) WT_GCC_FUNC_DECL_AT
extern bool __wt_fsync_background_chk(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_fsync_background(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_close_connection_close(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_file_zero(WT_SESSION_IMPL *session, WT_FH *fh, wt_off_t start_off, wt_off_t size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_os_inmemory(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_fopen(WT_SESSION_IMPL *session, const char *name, uint32_t open_flags, uint32_t flags, WT_FSTREAM **fstrp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_os_stdio(WT_SESSION_IMPL *session);
@@ -828,7 +829,7 @@ extern int __wt_verbose_dump_txn(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTR
extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern void __wt_checkpoint_progress(WT_SESSION_IMPL *session, bool closing);
extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[], bool waiting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern void __wt_checkpoint_tree_reconcile_update(WT_SESSION_IMPL *session, wt_timestamp_t oldest_start_ts, wt_timestamp_t newest_start_ts, wt_timestamp_t newest_stop_ts);
+extern void __wt_checkpoint_tree_reconcile_update(WT_SESSION_IMPL *session, wt_timestamp_t oldest_start_ts, wt_timestamp_t newest_durable_ts, wt_timestamp_t newest_stop_ts);
extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_checkpoint_sync(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -857,6 +858,7 @@ extern void __wt_timestamp_to_hex_string(wt_timestamp_t ts, char *hex_timestamp)
extern void __wt_verbose_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t ts, const char *msg);
extern int __wt_txn_parse_timestamp_raw(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_parse_timestamp(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t *timestamp, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_get_pinned_timestamp(WT_SESSION_IMPL *session, wt_timestamp_t *tsp, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_query_timestamp(WT_SESSION_IMPL *session, char *hex_timestamp, const char *cfg[], bool global_txn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/third_party/wiredtiger/src/include/meta.h b/src/third_party/wiredtiger/src/include/meta.h
index b1f9a557934..23c30e9a031 100644
--- a/src/third_party/wiredtiger/src/include/meta.h
+++ b/src/third_party/wiredtiger/src/include/meta.h
@@ -77,7 +77,7 @@ struct __wt_ckpt {
uint64_t write_gen; /* Write generation */
wt_timestamp_t oldest_start_ts; /* Aggregated timestamp information */
- wt_timestamp_t newest_start_ts;
+ wt_timestamp_t newest_durable_ts;
wt_timestamp_t newest_stop_ts;
void *bpriv; /* Block manager private */
diff --git a/src/third_party/wiredtiger/src/include/schema.h b/src/third_party/wiredtiger/src/include/schema.h
index 74af41132f2..cd217fe9c51 100644
--- a/src/third_party/wiredtiger/src/include/schema.h
+++ b/src/third_party/wiredtiger/src/include/schema.h
@@ -83,6 +83,9 @@ struct __wt_table {
#define WT_SESSION_LOCKED_TABLE \
(WT_SESSION_LOCKED_TABLE_READ | \
WT_SESSION_LOCKED_TABLE_WRITE)
+#define WT_SESSION_LOCKED_HOTBACKUP \
+ (WT_SESSION_LOCKED_HOTBACKUP_READ | \
+ WT_SESSION_LOCKED_HOTBACKUP_WRITE)
/*
* WT_WITH_LOCK_WAIT --
@@ -257,6 +260,76 @@ struct __wt_table {
} while (0)
/*
+ * WT_WITH_HOTBACKUP_READ_LOCK --
+ * Acquire the hot backup read lock and perform an operation provided that
+ * there is no hot backup in progress. The skipp parameter can be used to
+ * check whether the operation got skipped or not.
+ */
+#define WT_WITH_HOTBACKUP_READ_LOCK(session, op, skipp) do { \
+ WT_CONNECTION_IMPL *__conn = S2C(session); \
+ if ((skipp) != (bool *)NULL) \
+ *(bool *)(skipp) = true; \
+ if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP)) { \
+ if (!__conn->hot_backup) { \
+ if ((skipp) != (bool *)NULL) \
+ *(bool *)(skipp) = false; \
+ op; \
+ } \
+ } else { \
+ __wt_readlock(session, &__conn->hot_backup_lock); \
+ F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
+ if (!__conn->hot_backup) { \
+ if ((skipp) != (bool *)NULL) \
+ *(bool *)(skipp) = false; \
+ op; \
+ } \
+ F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
+ __wt_readunlock(session, &__conn->hot_backup_lock); \
+ } \
+} while (0)
+
+/*
+ * WT_WITH_HOTBACKUP_WRITE_LOCK --
+ * Acquire the hot backup write lock and perform an operation.
+ */
+#define WT_WITH_HOTBACKUP_WRITE_LOCK(session, op) do { \
+ WT_CONNECTION_IMPL *__conn = S2C(session); \
+ if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE)) { \
+ op; \
+ } else { \
+ WT_ASSERT(session, \
+ !F_ISSET( \
+ session, WT_SESSION_LOCKED_HOTBACKUP_READ)); \
+ __wt_writelock(session, &__conn->hot_backup_lock); \
+ F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \
+ op; \
+ F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_WRITE); \
+ __wt_writeunlock(session, &__conn->hot_backup_lock); \
+ } \
+} while (0)
+
+/*
+ * WT_WITH_HOTBACKUP_READ_LOCK_UNCOND --
+ * Acquire the hot backup read lock and perform an operation
+ * unconditionally. This is a specialized macro for a few isolated cases.
+ * Code that wishes to acquire the read lock should default to using
+ * WT_WITH_HOTBACKUP_READ_LOCK which checks that there is no hot backup in
+ * progress.
+ */
+#define WT_WITH_HOTBACKUP_READ_LOCK_UNCOND(session, op) do { \
+ WT_CONNECTION_IMPL *__conn = S2C(session); \
+ if (F_ISSET(session, WT_SESSION_LOCKED_HOTBACKUP)) { \
+ op; \
+ } else { \
+ __wt_readlock(session, &__conn->hot_backup_lock); \
+ F_SET(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
+ op; \
+ F_CLR(session, WT_SESSION_LOCKED_HOTBACKUP_READ); \
+ __wt_readunlock(session, &__conn->hot_backup_lock); \
+ } \
+} while (0)
+
+/*
* WT_WITHOUT_LOCKS --
* Drop the handle, table and/or schema locks, perform an operation,
* re-acquire the lock(s).
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index 0d99b4cc6e0..c7ae31b4e54 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -172,23 +172,25 @@ struct __wt_session_impl {
#define WT_SESSION_LOCKED_CHECKPOINT 0x0000040u
#define WT_SESSION_LOCKED_HANDLE_LIST_READ 0x0000080u
#define WT_SESSION_LOCKED_HANDLE_LIST_WRITE 0x0000100u
-#define WT_SESSION_LOCKED_METADATA 0x0000200u
-#define WT_SESSION_LOCKED_PASS 0x0000400u
-#define WT_SESSION_LOCKED_SCHEMA 0x0000800u
-#define WT_SESSION_LOCKED_SLOT 0x0001000u
-#define WT_SESSION_LOCKED_TABLE_READ 0x0002000u
-#define WT_SESSION_LOCKED_TABLE_WRITE 0x0004000u
-#define WT_SESSION_LOCKED_TURTLE 0x0008000u
-#define WT_SESSION_LOGGING_INMEM 0x0010000u
-#define WT_SESSION_LOOKASIDE_CURSOR 0x0020000u
-#define WT_SESSION_NO_DATA_HANDLES 0x0040000u
-#define WT_SESSION_NO_LOGGING 0x0080000u
-#define WT_SESSION_NO_RECONCILE 0x0100000u
-#define WT_SESSION_NO_SCHEMA_LOCK 0x0200000u
-#define WT_SESSION_QUIET_CORRUPT_FILE 0x0400000u
-#define WT_SESSION_READ_WONT_NEED 0x0800000u
-#define WT_SESSION_SCHEMA_TXN 0x1000000u
-#define WT_SESSION_SERVER_ASYNC 0x2000000u
+#define WT_SESSION_LOCKED_HOTBACKUP_READ 0x0000200u
+#define WT_SESSION_LOCKED_HOTBACKUP_WRITE 0x0000400u
+#define WT_SESSION_LOCKED_METADATA 0x0000800u
+#define WT_SESSION_LOCKED_PASS 0x0001000u
+#define WT_SESSION_LOCKED_SCHEMA 0x0002000u
+#define WT_SESSION_LOCKED_SLOT 0x0004000u
+#define WT_SESSION_LOCKED_TABLE_READ 0x0008000u
+#define WT_SESSION_LOCKED_TABLE_WRITE 0x0010000u
+#define WT_SESSION_LOCKED_TURTLE 0x0020000u
+#define WT_SESSION_LOGGING_INMEM 0x0040000u
+#define WT_SESSION_LOOKASIDE_CURSOR 0x0080000u
+#define WT_SESSION_NO_DATA_HANDLES 0x0100000u
+#define WT_SESSION_NO_LOGGING 0x0200000u
+#define WT_SESSION_NO_RECONCILE 0x0400000u
+#define WT_SESSION_NO_SCHEMA_LOCK 0x0800000u
+#define WT_SESSION_QUIET_CORRUPT_FILE 0x1000000u
+#define WT_SESSION_READ_WONT_NEED 0x2000000u
+#define WT_SESSION_SCHEMA_TXN 0x4000000u
+#define WT_SESSION_SERVER_ASYNC 0x8000000u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
uint32_t flags;
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index f5d1075581b..e2b2aae3d33 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -708,7 +708,9 @@ struct __wt_connection_stats {
int64_t txn_pinned_snapshot_range;
int64_t txn_pinned_timestamp;
int64_t txn_pinned_timestamp_checkpoint;
+ int64_t txn_pinned_timestamp_reader;
int64_t txn_pinned_timestamp_oldest;
+ int64_t txn_timestamp_oldest_active_read;
int64_t txn_sync;
int64_t txn_commit;
int64_t txn_rollback;
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 7fed51cc76b..c60b1772fe9 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -23,6 +23,12 @@
#define WT_TXN_OLDEST_WAIT 0x2u
/* AUTOMATIC FLAG VALUE GENERATION STOP */
+/* AUTOMATIC FLAG VALUE GENERATION START */
+#define WT_TXN_TS_ALREADY_LOCKED 0x1u
+#define WT_TXN_TS_INCLUDE_CKPT 0x2u
+#define WT_TXN_TS_INCLUDE_OLDEST 0x4u
+/* AUTOMATIC FLAG VALUE GENERATION STOP */
+
/*
* Transaction ID comparison dealing with edge cases.
*
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 5a091db45a0..890fbb26a74 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -1791,9 +1791,6 @@ struct __wt_session {
* @config{read_timestamp, read using the specified timestamp. The
* supplied value must not be older than the current oldest timestamp.
* See @ref transaction_timestamps., a string; default empty.}
- * @config{round_to_oldest, if read timestamp is earlier than oldest
- * timestamp\, read timestamp will be rounded to oldest timestamp., a
- * boolean flag; default \c false.}
* @config{roundup_timestamps = (, round up timestamps of the
* transaction. This setting alters the visibility expected in a
* transaction. See @ref transaction_timestamps., a set of related
@@ -1929,9 +1926,6 @@ struct __wt_session {
* supplied value must not be older than the current oldest timestamp.
* This can only be set once for a transaction. See @ref
* transaction_timestamps., a string; default empty.}
- * @config{round_to_oldest, if read timestamp is earlier than oldest
- * timestamp\, read timestamp will be rounded to oldest timestamp., a
- * boolean flag; default \c false.}
* @configend
* @errors
*/
@@ -5805,17 +5799,24 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT 1383
/*!
* transaction: transaction range of timestamps pinned by the oldest
+ * active read timestamp
+ */
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_READER 1384
+/*!
+ * transaction: transaction range of timestamps pinned by the oldest
* timestamp
*/
-#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1384
+#define WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST 1385
+/*! transaction: transaction read timestamp of the oldest active reader */
+#define WT_STAT_CONN_TXN_TIMESTAMP_OLDEST_ACTIVE_READ 1386
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1385
+#define WT_STAT_CONN_TXN_SYNC 1387
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1386
+#define WT_STAT_CONN_TXN_COMMIT 1388
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1387
+#define WT_STAT_CONN_TXN_ROLLBACK 1389
/*! transaction: update conflicts */
-#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1388
+#define WT_STAT_CONN_TXN_UPDATE_CONFLICT 1390
/*!
* @}
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index 10b52246987..1dc6c60a137 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -629,68 +629,6 @@ err: WT_TRET(__wt_fs_directory_list_free(session, &logfiles, logcount));
}
/*
- * __log_zero --
- * Zero a log file.
- */
-static int
-__log_zero(WT_SESSION_IMPL *session,
- WT_FH *fh, wt_off_t start_off, wt_off_t len)
-{
- WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(zerobuf);
- WT_DECL_RET;
- WT_LOG *log;
- uint32_t allocsize, bufsz, off, partial, wrlen;
-
- conn = S2C(session);
- log = conn->log;
- allocsize = log->allocsize;
- zerobuf = NULL;
- if (allocsize < WT_MEGABYTE)
- bufsz = WT_MEGABYTE;
- else
- bufsz = allocsize;
- /*
- * If they're using smaller log files, cap it at the file size.
- */
- if (conn->log_file_max < bufsz)
- bufsz = (uint32_t)conn->log_file_max;
- WT_RET(__wt_scr_alloc(session, bufsz, &zerobuf));
- memset(zerobuf->mem, 0, zerobuf->memsize);
- WT_STAT_CONN_INCR(session, log_zero_fills);
-
- /*
- * Read in a chunk starting at the end of the file. Keep going until
- * we reach the beginning or we find a chunk that contains any non-zero
- * bytes. Compare against a known zero byte chunk.
- */
- off = (uint32_t)start_off;
- while (off < (uint32_t)len) {
- /*
- * Typically we start to zero the file after the log header
- * and the bufsz is a sector-aligned size. So we want to
- * align our writes when we can.
- */
- partial = off % bufsz;
- if (partial != 0)
- wrlen = bufsz - partial;
- else
- wrlen = bufsz;
- /*
- * Check if we're writing a partial amount at the end too.
- */
- if ((uint32_t)len - off < bufsz)
- wrlen = (uint32_t)len - off;
- __wt_capacity_throttle(session, wrlen, WT_THROTTLE_LOG);
- WT_ERR(__wt_write(session,
- fh, (wt_off_t)off, wrlen, zerobuf->mem));
- off += wrlen;
- }
-err: __wt_scr_free(session, &zerobuf);
- return (ret);
-}
-
-/*
* __log_prealloc --
* Pre-allocate a log file.
*/
@@ -710,7 +648,7 @@ __log_prealloc(WT_SESSION_IMPL *session, WT_FH *fh)
* and zero the log file based on what is available.
*/
if (FLD_ISSET(conn->log_flags, WT_CONN_LOG_ZERO_FILL))
- return (__log_zero(session, fh,
+ return (__wt_file_zero(session, fh,
log->first_record, conn->log_file_max));
/* If configured to not extend the file, we're done. */
@@ -1235,7 +1173,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created)
WT_LOG *log;
WT_LSN end_lsn, logrec_lsn;
u_int yield_cnt;
- bool create_log;
+ bool create_log, skipp;
conn = S2C(session);
log = conn->log;
@@ -1284,13 +1222,11 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created)
*/
create_log = true;
if (conn->log_prealloc > 0 && !conn->hot_backup) {
- __wt_readlock(session, &conn->hot_backup_lock);
- if (conn->hot_backup)
- __wt_readunlock(session, &conn->hot_backup_lock);
- else {
- ret = __log_alloc_prealloc(session, log->fileid);
- __wt_readunlock(session, &conn->hot_backup_lock);
+ WT_WITH_HOTBACKUP_READ_LOCK(session,
+ ret = __log_alloc_prealloc(session, log->fileid),
+ &skipp);
+ if (!skipp) {
/*
* If ret is 0 it means we found a pre-allocated file.
* If ret is WT_NOTFOUND, create the new log file and
@@ -1517,24 +1453,23 @@ __log_truncate_file(WT_SESSION_IMPL *session, WT_FH *log_fh, wt_off_t offset)
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_LOG *log;
+ bool skipp;
conn = S2C(session);
log = conn->log;
if (!F_ISSET(log, WT_LOG_TRUNCATE_NOTSUP) && !conn->hot_backup) {
- __wt_readlock(session, &conn->hot_backup_lock);
- if (conn->hot_backup)
- __wt_readunlock(session, &conn->hot_backup_lock);
- else {
- ret = __wt_ftruncate(session, log_fh, offset);
- __wt_readunlock(session, &conn->hot_backup_lock);
+ WT_WITH_HOTBACKUP_READ_LOCK(session,
+ ret = __wt_ftruncate(
+ session, log_fh, offset), &skipp);
+ if (!skipp) {
if (ret != ENOTSUP)
return (ret);
F_SET(log, WT_LOG_TRUNCATE_NOTSUP);
}
}
- return (__log_zero(session, log_fh, offset, conn->log_file_max));
+ return (__wt_file_zero(session, log_fh, offset, conn->log_file_max));
}
/*
diff --git a/src/third_party/wiredtiger/src/meta/meta_ckpt.c b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
index 58711cc4e92..c4eb01b2d39 100644
--- a/src/third_party/wiredtiger/src/meta/meta_ckpt.c
+++ b/src/third_party/wiredtiger/src/meta/meta_ckpt.c
@@ -352,16 +352,16 @@ __ckpt_load(WT_SESSION_IMPL *session,
WT_RET_NOTFOUND_OK(ret);
ckpt->oldest_start_ts =
ret == WT_NOTFOUND || a.len == 0 ? WT_TS_NONE : (uint64_t)a.val;
- ret = __wt_config_subgets(session, v, "newest_start_ts", &a);
+ ret = __wt_config_subgets(session, v, "newest_durable_ts", &a);
WT_RET_NOTFOUND_OK(ret);
- ckpt->newest_start_ts =
+ ckpt->newest_durable_ts =
ret == WT_NOTFOUND || a.len == 0 ? WT_TS_NONE : (uint64_t)a.val;
ret = __wt_config_subgets(session, v, "newest_stop_ts", &a);
WT_RET_NOTFOUND_OK(ret);
ckpt->newest_stop_ts =
ret == WT_NOTFOUND || a.len == 0 ? WT_TS_MAX : (uint64_t)a.val;
__wt_timestamp_addr_check(session,
- ckpt->oldest_start_ts, ckpt->newest_start_ts, ckpt->newest_stop_ts);
+ ckpt->oldest_start_ts, ckpt->newest_stop_ts);
WT_RET(__wt_config_subgets(session, v, "write_gen", &a));
if (a.len == 0)
@@ -433,8 +433,8 @@ __wt_meta_ckptlist_set(WT_SESSION_IMPL *session,
__wt_seconds(session, &ckpt->sec);
}
- __wt_timestamp_addr_check(session, ckpt->oldest_start_ts,
- ckpt->newest_start_ts, ckpt->newest_stop_ts);
+ __wt_timestamp_addr_check(session,
+ ckpt->oldest_start_ts, ckpt->newest_stop_ts);
WT_ERR(__wt_buf_catfmt(session, buf, "%s%s", sep, ckpt->name));
sep = ",";
@@ -452,7 +452,7 @@ __wt_meta_ckptlist_set(WT_SESSION_IMPL *session,
",time=%" PRIu64
",size=%" PRId64
",oldest_start_ts=%" PRId64
- ",newest_start_ts=%" PRId64
+ ",newest_durable_ts=%" PRId64
",newest_stop_ts=%" PRId64
",write_gen=%" PRId64 ")",
(int)ckpt->addr.size, (char *)ckpt->addr.data,
@@ -460,7 +460,7 @@ __wt_meta_ckptlist_set(WT_SESSION_IMPL *session,
ckpt->sec,
(int64_t)ckpt->size,
(int64_t)ckpt->oldest_start_ts,
- (int64_t)ckpt->newest_start_ts,
+ (int64_t)ckpt->newest_durable_ts,
(int64_t)ckpt->newest_stop_ts,
(int64_t)ckpt->write_gen));
}
diff --git a/src/third_party/wiredtiger/src/os_common/os_fhandle.c b/src/third_party/wiredtiger/src/os_common/os_fhandle.c
index df67508c4fe..ca2fe730444 100644
--- a/src/third_party/wiredtiger/src/os_common/os_fhandle.c
+++ b/src/third_party/wiredtiger/src/os_common/os_fhandle.c
@@ -500,3 +500,53 @@ __wt_close_connection_close(WT_SESSION_IMPL *session)
} WT_TAILQ_SAFE_REMOVE_END
return (ret);
}
+
+/*
+ * __wt_file_zero --
+ * Zero out the file from offset for size bytes.
+ */
+int
+__wt_file_zero(WT_SESSION_IMPL *session,
+ WT_FH *fh, wt_off_t start_off, wt_off_t size)
+{
+ WT_DECL_ITEM(zerobuf);
+ WT_DECL_RET;
+ WT_THROTTLE_TYPE type;
+ uint64_t bufsz, off, partial, wrlen;
+
+ zerobuf = NULL;
+ bufsz = WT_MIN((uint64_t)size, WT_MEGABYTE);
+ /*
+ * For now logging is the only type and statistic. This needs
+ * updating if block manager decides to use this function.
+ */
+ type = WT_THROTTLE_LOG;
+ WT_STAT_CONN_INCR(session, log_zero_fills);
+ WT_RET(__wt_scr_alloc(session, bufsz, &zerobuf));
+ memset(zerobuf->mem, 0, zerobuf->memsize);
+ off = (uint64_t)start_off;
+ while (off < (uint64_t)size) {
+ /*
+ * We benefit from aligning our writes when we can. Log files
+ * will typically want to start to zero after the log header
+ * and the bufsz is a sector-aligned size. So align when
+ * we can.
+ */
+ partial = off % bufsz;
+ if (partial != 0)
+ wrlen = bufsz - partial;
+ else
+ wrlen = bufsz;
+ /*
+ * Check if we're writing a partial amount at the end too.
+ */
+ if ((uint64_t)size - off < bufsz)
+ wrlen = (uint64_t)size - off;
+ __wt_capacity_throttle(session, wrlen, type);
+ WT_ERR(__wt_write(session,
+ fh, (wt_off_t)off, (size_t)wrlen, zerobuf->mem));
+ off += wrlen;
+ }
+err: __wt_scr_free(session, &zerobuf);
+ return (ret);
+}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index f25ada93885..90db828b1a5 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -151,7 +151,7 @@ typedef struct {
uint64_t recno;
WT_ITEM key;
wt_timestamp_t oldest_start_ts;
- wt_timestamp_t newest_start_ts;
+ wt_timestamp_t newest_durable_ts;
wt_timestamp_t newest_stop_ts;
/* Saved minimum split-size boundary information. */
@@ -159,7 +159,7 @@ typedef struct {
uint64_t min_recno;
WT_ITEM min_key;
wt_timestamp_t min_oldest_start_ts;
- wt_timestamp_t min_newest_start_ts;
+ wt_timestamp_t min_newest_durable_ts;
wt_timestamp_t min_newest_stop_ts;
size_t min_offset; /* byte offset */
@@ -279,7 +279,7 @@ typedef struct {
WT_UPDATE *upd; /* Update to write (or NULL) */
uint64_t txnid; /* Transaction ID, timestamps */
- wt_timestamp_t start_ts, stop_ts;
+ wt_timestamp_t start_ts, durable_ts, stop_ts;
bool upd_saved; /* Updates saved to list */
@@ -318,7 +318,7 @@ static int __rec_las_wrapup_err(WT_SESSION_IMPL *, WT_RECONCILE *);
static int __rec_root_write(WT_SESSION_IMPL *, WT_PAGE *, uint32_t);
static int __rec_row_int(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
static int __rec_row_leaf(WT_SESSION_IMPL *,
- WT_RECONCILE *, WT_PAGE *, WT_SALVAGE_COOKIE *);
+ WT_RECONCILE *, WT_REF *, WT_SALVAGE_COOKIE *);
static int __rec_row_leaf_insert(
WT_SESSION_IMPL *, WT_RECONCILE *, WT_INSERT *);
static int __rec_row_merge(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
@@ -469,7 +469,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
ret = __rec_row_int(session, r, page));
break;
case WT_PAGE_ROW_LEAF:
- ret = __rec_row_leaf(session, r, page, salvage);
+ ret = __rec_row_leaf(session, r, ref, salvage);
break;
default:
ret = __wt_illegal_value(session, page->type);
@@ -1186,6 +1186,7 @@ __rec_append_orig_value(WT_SESSION_IMPL *session,
append->txnid = upd->txnid;
append->start_ts = upd->start_ts;
append->durable_ts = upd->durable_ts;
+ append->stop_ts = upd->stop_ts;
append->next = upd->next;
}
@@ -1396,15 +1397,16 @@ __rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins,
* TIMESTAMP-FIXME
* This is waiting on the WT_UPDATE structure's start/stop
* timestamp work. For now, if we don't have a timestamp,
- * just pretend it's durable, otherwise pretend the start
- * and stop timestamps are the same.
+ * just pretend it's durable, otherwise pretend the durable,
+ * start and stop timestamps are all the same.
*
*/
if (upd_select->upd->start_ts == WT_TS_NONE) {
- upd_select->start_ts = WT_TS_NONE;
+ upd_select->start_ts =
+ upd_select->durable_ts = WT_TS_NONE;
upd_select->stop_ts = WT_TS_MAX;
} else
- upd_select->start_ts =
+ upd_select->start_ts = upd_select->durable_ts =
upd_select->stop_ts = upd_select->upd->start_ts;
/*
@@ -1453,7 +1455,7 @@ __rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins,
* order), so we track the maximum transaction ID and the newest update
* with a timestamp (if any).
*/
- timestamp = first_ts_upd == NULL ? 0 : first_ts_upd->start_ts;
+ timestamp = first_ts_upd == NULL ? 0 : first_ts_upd->durable_ts;
all_visible = upd == first_txn_upd && !(uncommitted || prepared) &&
(F_ISSET(r, WT_REC_VISIBLE_ALL) ?
__wt_txn_visible_all(session, max_txn, timestamp) :
@@ -1515,14 +1517,11 @@ __rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins,
if (WT_TXNID_LT(r->unstable_txn, first_upd->txnid))
r->unstable_txn = first_upd->txnid;
if (first_ts_upd != NULL) {
- /*
- * FIXME Disable this assertion until fixed by WT-4598.
- * WT_ASSERT(session,
- * first_ts_upd->prepare_state ==
- * WT_PREPARE_INPROGRESS ||
- * first_ts_upd->start_ts <=
- * first_ts_upd->durable_ts);
- */
+ WT_ASSERT(session,
+ first_ts_upd->prepare_state ==
+ WT_PREPARE_INPROGRESS ||
+ first_ts_upd->start_ts <= first_ts_upd->durable_ts);
+
if (r->unstable_timestamp < first_ts_upd->start_ts)
r->unstable_timestamp = first_ts_upd->start_ts;
@@ -1545,12 +1544,10 @@ __rec_upd_select(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins,
* to use the two independently and be confident both
* will be set.
*/
- /*
- * FIXME Disable this assertion until fixed by WT-4598.
- * WT_ASSERT(session,
- * upd->prepare_state == WT_PREPARE_INPROGRESS ||
- * upd->durable_ts >= upd->start_ts);
- */
+ WT_ASSERT(session,
+ upd->prepare_state == WT_PREPARE_INPROGRESS ||
+ upd->durable_ts >= upd->start_ts);
+
if (upd->start_ts < r->unstable_timestamp)
r->unstable_timestamp = upd->start_ts;
/*
@@ -2244,7 +2241,7 @@ __wt_split_page_size(int split_pct, uint32_t maxpagesize, uint32_t allocsize)
*/
static void
__rec_addr_ts_init(WT_RECONCILE *r, wt_timestamp_t *oldest_start_tsp,
- wt_timestamp_t *newest_start_tsp, wt_timestamp_t *newest_stop_tsp)
+ wt_timestamp_t *newest_durable_ts, wt_timestamp_t *newest_stop_tsp)
{
/*
* If the page format supports address timestamps (and not fixed-length
@@ -2254,9 +2251,9 @@ __rec_addr_ts_init(WT_RECONCILE *r, wt_timestamp_t *oldest_start_tsp,
* the oldest/newest timestamps to simple durability.
*/
*oldest_start_tsp = WT_TS_MAX;
- *newest_start_tsp = *newest_stop_tsp = WT_TS_NONE;
+ *newest_durable_ts = *newest_stop_tsp = WT_TS_NONE;
if (!__wt_process.page_version_ts || r->page->type == WT_PAGE_COL_FIX) {
- *oldest_start_tsp = *newest_start_tsp = WT_TS_NONE;
+ *oldest_start_tsp = *newest_durable_ts = WT_TS_NONE;
*newest_stop_tsp = WT_TS_MAX;
}
}
@@ -2267,12 +2264,12 @@ __rec_addr_ts_init(WT_RECONCILE *r, wt_timestamp_t *oldest_start_tsp,
*/
static inline void
__rec_addr_ts_update(WT_RECONCILE *r, wt_timestamp_t oldest_start_ts,
- wt_timestamp_t newest_start_ts, wt_timestamp_t newest_stop_ts)
+ wt_timestamp_t newest_durable_ts, wt_timestamp_t newest_stop_ts)
{
r->cur_ptr->oldest_start_ts =
WT_MIN(oldest_start_ts, r->cur_ptr->oldest_start_ts);
- r->cur_ptr->newest_start_ts =
- WT_MAX(newest_start_ts, r->cur_ptr->newest_start_ts);
+ r->cur_ptr->newest_durable_ts =
+ WT_MAX(newest_durable_ts, r->cur_ptr->newest_durable_ts);
r->cur_ptr->newest_stop_ts =
WT_MAX(newest_stop_ts, r->cur_ptr->newest_stop_ts);
}
@@ -2290,14 +2287,14 @@ __rec_split_chunk_init(
chunk->key.size = 0;
chunk->entries = 0;
__rec_addr_ts_init(r, &chunk->oldest_start_ts,
- &chunk->newest_start_ts, &chunk->newest_stop_ts);
+ &chunk->newest_durable_ts, &chunk->newest_stop_ts);
chunk->min_recno = WT_RECNO_OOB;
/* Don't touch the key item memory, that memory is reused. */
chunk->min_key.size = 0;
chunk->min_entries = 0;
__rec_addr_ts_init(r, &chunk->min_oldest_start_ts,
- &chunk->min_newest_start_ts, &chunk->min_newest_stop_ts);
+ &chunk->min_newest_durable_ts, &chunk->min_newest_stop_ts);
chunk->min_offset = 0;
/*
@@ -2760,7 +2757,8 @@ __rec_split_crossing_bnd(
WT_RET(__rec_split_row_promote(
session, r, &r->cur_ptr->min_key, r->page->type));
r->cur_ptr->min_oldest_start_ts = r->cur_ptr->oldest_start_ts;
- r->cur_ptr->min_newest_start_ts = r->cur_ptr->newest_start_ts;
+ r->cur_ptr->min_newest_durable_ts =
+ r->cur_ptr->newest_durable_ts;
r->cur_ptr->min_newest_stop_ts = r->cur_ptr->newest_stop_ts;
/* Assert we're not re-entering this code. */
@@ -2818,8 +2816,9 @@ __rec_split_finish_process_prev(WT_SESSION_IMPL *session, WT_RECONCILE *r)
prev_ptr->entries += cur_ptr->entries;
prev_ptr->oldest_start_ts =
WT_MIN(prev_ptr->oldest_start_ts, cur_ptr->oldest_start_ts);
- prev_ptr->newest_start_ts =
- WT_MAX(prev_ptr->newest_start_ts, cur_ptr->newest_start_ts);
+ prev_ptr->newest_durable_ts =
+ WT_MAX(prev_ptr->newest_durable_ts,
+ cur_ptr->newest_durable_ts);
prev_ptr->newest_stop_ts =
WT_MAX(prev_ptr->newest_stop_ts, cur_ptr->newest_stop_ts);
dsk = r->cur_ptr->image.mem;
@@ -2873,15 +2872,16 @@ __rec_split_finish_process_prev(WT_SESSION_IMPL *session, WT_RECONCILE *r)
prev_ptr->min_key.data, prev_ptr->min_key.size));
cur_ptr->oldest_start_ts =
WT_MIN(prev_ptr->oldest_start_ts, cur_ptr->oldest_start_ts);
- cur_ptr->newest_start_ts =
- WT_MAX(prev_ptr->newest_start_ts, cur_ptr->newest_start_ts);
+ cur_ptr->newest_durable_ts =
+ WT_MAX(prev_ptr->newest_durable_ts,
+ cur_ptr->newest_durable_ts);
cur_ptr->newest_stop_ts =
WT_MAX(prev_ptr->newest_stop_ts, cur_ptr->newest_stop_ts);
cur_ptr->image.size += len_to_move;
prev_ptr->entries = prev_ptr->min_entries;
prev_ptr->oldest_start_ts = prev_ptr->min_oldest_start_ts;
- prev_ptr->newest_start_ts = prev_ptr->min_newest_start_ts;
+ prev_ptr->newest_durable_ts = prev_ptr->min_newest_durable_ts;
prev_ptr->newest_stop_ts = prev_ptr->min_newest_stop_ts;
prev_ptr->image.size -= len_to_move;
}
@@ -3040,11 +3040,10 @@ done: if (F_ISSET(r, WT_REC_LOOKASIDE)) {
multi->page_las.unstable_txn = r->unstable_txn;
WT_ASSERT(session, r->unstable_txn != WT_TXN_NONE);
multi->page_las.max_timestamp = r->max_timestamp;
- /*
- * FIXME Disable this assertion until fixed by WT-4598.
- * WT_ASSERT(session, r->all_upd_prepare_in_prog == true ||
- * r->unstable_durable_timestamp >= r->unstable_timestamp);
- */
+
+ WT_ASSERT(session, r->all_upd_prepare_in_prog == true ||
+ r->unstable_durable_timestamp >= r->unstable_timestamp);
+
multi->page_las.unstable_timestamp = r->unstable_timestamp;
multi->page_las.unstable_durable_timestamp =
r->unstable_durable_timestamp;
@@ -3297,7 +3296,7 @@ __rec_split_write(WT_SESSION_IMPL *session, WT_RECONCILE *r,
/* Initialize the address (set the addr type for the parent). */
multi->addr.oldest_start_ts = chunk->oldest_start_ts;
- multi->addr.newest_start_ts = chunk->newest_start_ts;
+ multi->addr.newest_durable_ts = chunk->newest_durable_ts;
multi->addr.newest_stop_ts = chunk->newest_stop_ts;
switch (page->type) {
@@ -3765,7 +3764,7 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
WT_KV *val;
WT_PAGE *child, *page;
WT_REF *ref;
- wt_timestamp_t oldest_start_ts, newest_start_ts, newest_stop_ts;
+ wt_timestamp_t oldest_start_ts, newest_durable_ts, newest_stop_ts;
bool hazard;
btree = S2BT(session);
@@ -3854,13 +3853,13 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
val->cell_len = 0;
val->len = val->buf.size;
oldest_start_ts = vpack->oldest_start_ts;
- newest_start_ts = vpack->newest_start_ts;
+ newest_durable_ts = vpack->newest_durable_ts;
newest_stop_ts = vpack->newest_stop_ts;
} else {
__rec_cell_build_addr(
session, r, addr, false, ref->ref_recno);
oldest_start_ts = addr->oldest_start_ts;
- newest_start_ts = addr->newest_start_ts;
+ newest_durable_ts = addr->newest_durable_ts;
newest_stop_ts = addr->newest_stop_ts;
}
WT_CHILD_RELEASE_ERR(session, hazard, ref);
@@ -3871,8 +3870,8 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
/* Copy the value onto the page. */
__rec_image_copy(session, r, val);
- __rec_addr_ts_update(
- r, oldest_start_ts, newest_start_ts, newest_stop_ts);
+ __rec_addr_ts_update(r,
+ oldest_start_ts, newest_durable_ts, newest_stop_ts);
} WT_INTL_FOREACH_END;
/* Write the remnant page. */
@@ -3916,7 +3915,7 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
/* Copy the value onto the page. */
__rec_image_copy(session, r, val);
__rec_addr_ts_update(r, addr->oldest_start_ts,
- addr->newest_start_ts, addr->newest_stop_ts);
+ addr->newest_durable_ts, addr->newest_stop_ts);
}
return (0);
}
@@ -4128,7 +4127,7 @@ __rec_col_fix_slvg(WT_SESSION_IMPL *session,
static int
__rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r,
WT_SALVAGE_COOKIE *salvage, WT_ITEM *value,
- wt_timestamp_t start_ts, wt_timestamp_t stop_ts,
+ wt_timestamp_t start_ts, wt_timestamp_t durable_ts ,wt_timestamp_t stop_ts,
uint64_t rle, bool deleted, bool overflow_type)
{
WT_BTREE *btree;
@@ -4193,7 +4192,7 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r,
WT_RET(__rec_dict_replace(
session, r, start_ts, stop_ts, rle, val));
__rec_image_copy(session, r, val);
- __rec_addr_ts_update(r, start_ts, start_ts, stop_ts);
+ __rec_addr_ts_update(r, start_ts, durable_ts, stop_ts);
/* Update the starting record number in case we split. */
r->recno += rle;
@@ -4215,6 +4214,7 @@ __rec_col_var(WT_SESSION_IMPL *session,
wt_timestamp_t start_ts, stop_ts; /* Timestamps */
bool deleted; /* If deleted */
} last;
+ WT_ADDR *addr;
WT_BTREE *btree;
WT_CELL *cell;
WT_CELL_UNPACK *vpack, _vpack;
@@ -4226,7 +4226,7 @@ __rec_col_var(WT_SESSION_IMPL *session,
WT_PAGE *page;
WT_UPDATE *upd;
WT_UPDATE_SELECT upd_select;
- wt_timestamp_t start_ts, stop_ts;
+ wt_timestamp_t start_ts, durable_ts, newest_durable_ts, stop_ts;
uint64_t n, nrepeat, repeat_count, rle, skip, src_recno;
uint32_t i, size;
bool deleted, orig_deleted, update_no_copy;
@@ -4240,9 +4240,24 @@ __rec_col_var(WT_SESSION_IMPL *session,
size = 0;
data = NULL;
+ /*
+ * Acquire the newest-durable timestamp for this page so we can roll it
+ * forward. If it exists, it's in the WT_REF structure or the parent's
+ * disk image.
+ */
+ if ((addr = pageref->addr) == NULL)
+ newest_durable_ts = WT_TS_NONE;
+ else if (__wt_off_page(pageref->home, addr))
+ newest_durable_ts = addr->newest_durable_ts;
+ else {
+ __wt_cell_unpack(session, pageref->home, pageref->addr, vpack);
+ newest_durable_ts = vpack->newest_durable_ts;
+ }
+
/* Set the "last" values to cause failure if they're not set. */
last.value = r->last;
- last.start_ts = last.stop_ts = WT_TS_NONE;
+ last.start_ts = WT_TS_MAX;
+ last.stop_ts = WT_TS_NONE;
last.deleted = false;
/*
@@ -4250,7 +4265,8 @@ __rec_col_var(WT_SESSION_IMPL *session,
* [-Werror=maybe-uninitialized]
*/
/* NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) */
- start_ts = stop_ts = WT_TS_NONE;
+ start_ts = WT_TS_MAX;
+ durable_ts = stop_ts = WT_TS_NONE;
WT_RET(__rec_split_init(session,
r, page, pageref->ref_recno, btree->maxleafpage_precomp));
@@ -4283,7 +4299,7 @@ __rec_col_var(WT_SESSION_IMPL *session,
salvage->take += salvage->missing;
} else
WT_ERR(__rec_col_var_helper(session, r,
- NULL, NULL, WT_TS_NONE, WT_TS_MAX,
+ NULL, NULL, WT_TS_NONE, WT_TS_NONE, WT_TS_MAX,
salvage->missing, true, false));
}
@@ -4303,58 +4319,53 @@ __rec_col_var(WT_SESSION_IMPL *session,
/* For each entry in the in-memory page... */
WT_COL_FOREACH(page, cip, i) {
ovfl_state = OVFL_IGNORE;
- if ((cell = WT_COL_PTR(page, cip)) == NULL) {
- nrepeat = 1;
- ins = NULL;
- orig_deleted = true;
- } else {
- __wt_cell_unpack(session, page, cell, vpack);
- nrepeat = __wt_cell_rle(vpack);
- ins = WT_SKIP_FIRST(WT_COL_UPDATE(page, cip));
-
- /*
- * If the original value is "deleted", there's no value
- * to compare, we're done.
- */
- orig_deleted = vpack->type == WT_CELL_DEL;
- if (orig_deleted)
- goto record_loop;
+ cell = WT_COL_PTR(page, cip);
+ __wt_cell_unpack(session, page, cell, vpack);
+ nrepeat = __wt_cell_rle(vpack);
+ ins = WT_SKIP_FIRST(WT_COL_UPDATE(page, cip));
- /*
- * Overflow items are tricky: we don't know until we're
- * finished processing the set of values if we need the
- * overflow value or not. If we don't use the overflow
- * item at all, we have to discard it from the backing
- * file, otherwise we'll leak blocks on the checkpoint.
- * That's safe because if the backing overflow value is
- * still needed by any running transaction, we'll cache
- * a copy in the update list.
- *
- * Regardless, we avoid copying in overflow records: if
- * there's a WT_INSERT entry that modifies a reference
- * counted overflow record, we may have to write copies
- * of the overflow record, and in that case we'll do the
- * comparisons, but we don't read overflow items just to
- * see if they match records on either side.
- */
- if (vpack->ovfl) {
- ovfl_state = OVFL_UNUSED;
- goto record_loop;
- }
+ /*
+ * If the original value is "deleted", there's no value
+ * to compare, we're done.
+ */
+ orig_deleted = vpack->type == WT_CELL_DEL;
+ if (orig_deleted)
+ goto record_loop;
- /*
- * If data is Huffman encoded, we have to decode it in
- * order to compare it with the last item we saw, which
- * may have been an update string. This guarantees we
- * find every single pair of objects we can RLE encode,
- * including applications updating an existing record
- * where the new value happens (?) to match a Huffman-
- * encoded value in a previous or next record.
- */
- WT_ERR(__wt_dsk_cell_data_ref(
- session, WT_PAGE_COL_VAR, vpack, orig));
+ /*
+ * Overflow items are tricky: we don't know until we're
+ * finished processing the set of values if we need the
+ * overflow value or not. If we don't use the overflow
+ * item at all, we have to discard it from the backing
+ * file, otherwise we'll leak blocks on the checkpoint.
+ * That's safe because if the backing overflow value is
+ * still needed by any running transaction, we'll cache
+ * a copy in the update list.
+ *
+ * Regardless, we avoid copying in overflow records: if
+ * there's a WT_INSERT entry that modifies a reference
+ * counted overflow record, we may have to write copies
+ * of the overflow record, and in that case we'll do the
+ * comparisons, but we don't read overflow items just to
+ * see if they match records on either side.
+ */
+ if (vpack->ovfl) {
+ ovfl_state = OVFL_UNUSED;
+ goto record_loop;
}
+ /*
+ * If data is Huffman encoded, we have to decode it in
+ * order to compare it with the last item we saw, which
+ * may have been an update string. This guarantees we
+ * find every single pair of objects we can RLE encode,
+ * including applications updating an existing record
+ * where the new value happens (?) to match a Huffman-
+ * encoded value in a previous or next record.
+ */
+ WT_ERR(__wt_dsk_cell_data_ref(
+ session, WT_PAGE_COL_VAR, vpack, orig));
+
record_loop: /*
* Generate on-page entries: loop repeat records, looking for
* WT_INSERT entries matching the record number. The WT_INSERT
@@ -4363,6 +4374,7 @@ record_loop: /*
for (n = 0;
n < nrepeat; n += repeat_count, src_recno += repeat_count) {
start_ts = vpack->start_ts;
+ durable_ts = newest_durable_ts;
stop_ts = vpack->stop_ts;
upd = NULL;
if (ins != NULL && WT_INSERT_RECNO(ins) == src_recno) {
@@ -4378,9 +4390,11 @@ record_loop: /*
* the page.
*/
start_ts = WT_TS_NONE;
+ durable_ts = WT_TS_NONE;
stop_ts = WT_TS_MAX;
} else {
start_ts = upd_select.start_ts;
+ durable_ts = upd_select.durable_ts;
stop_ts = upd_select.stop_ts;
}
ins = WT_SKIP_NEXT(ins);
@@ -4471,8 +4485,8 @@ record_loop: /*
if (rle != 0) {
WT_ERR(__rec_col_var_helper(
session, r, salvage,
- last.value,
- last.start_ts, last.stop_ts,
+ last.value, last.start_ts,
+ durable_ts, last.stop_ts,
rle, last.deleted, false));
rle = 0;
}
@@ -4480,8 +4494,8 @@ record_loop: /*
last.value->data = vpack->data;
last.value->size = vpack->size;
WT_ERR(__rec_col_var_helper(session, r,
- salvage,
- last.value, start_ts, stop_ts,
+ salvage, last.value,
+ start_ts, durable_ts, stop_ts,
repeat_count, false, true));
/* Track if page has overflow items. */
@@ -4534,8 +4548,8 @@ compare: /*
continue;
}
WT_ERR(__rec_col_var_helper(session, r, salvage,
- last.value, last.start_ts, last.stop_ts,
- rle, last.deleted, false));
+ last.value, last.start_ts, durable_ts,
+ last.stop_ts, rle, last.deleted, false));
}
/*
@@ -4623,9 +4637,11 @@ compare: /*
* tombstone on the page.
*/
start_ts = WT_TS_NONE;
+ durable_ts = WT_TS_NONE;
stop_ts = WT_TS_MAX;
} else {
start_ts = upd_select.start_ts;
+ durable_ts = upd_select.durable_ts;
stop_ts = upd_select.stop_ts;
}
while (src_recno <= n) {
@@ -4665,11 +4681,13 @@ compare: /*
* the page.
*/
start_ts = WT_TS_NONE;
+ durable_ts = WT_TS_NONE;
stop_ts = WT_TS_MAX;
deleted = true;
} else {
start_ts = upd_select.start_ts;
+ durable_ts = upd_select.durable_ts;
stop_ts = upd_select.stop_ts;
switch (upd->type) {
@@ -4714,8 +4732,8 @@ compare: /*
goto next;
}
WT_ERR(__rec_col_var_helper(session, r, salvage,
- last.value, last.start_ts, last.stop_ts,
- rle, last.deleted, false));
+ last.value, last.start_ts, durable_ts,
+ last.stop_ts, rle, last.deleted, false));
}
/*
@@ -4762,8 +4780,9 @@ next: if (src_recno == UINT64_MAX)
/* If we were tracking a record, write it. */
if (rle != 0)
- WT_ERR(__rec_col_var_helper(session, r, salvage, last.value,
- last.start_ts, last.stop_ts, rle, last.deleted, false));
+ WT_ERR(__rec_col_var_helper(session, r, salvage,
+ last.value, last.start_ts, durable_ts, last.stop_ts,
+ rle, last.deleted, false));
/* Write the remnant page. */
ret = __rec_split_finish(session, r);
@@ -4789,7 +4808,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_KV *key, *val;
WT_PAGE *child;
WT_REF *ref;
- wt_timestamp_t oldest_start_ts, newest_start_ts, newest_stop_ts;
+ wt_timestamp_t oldest_start_ts, newest_durable_ts, newest_stop_ts;
size_t size;
bool hazard, key_onpage_ovfl, ovfl_key;
const void *p;
@@ -4943,7 +4962,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
__rec_cell_build_addr(session, r, addr,
state == WT_CHILD_PROXY, WT_RECNO_OOB);
oldest_start_ts = addr->oldest_start_ts;
- newest_start_ts = addr->newest_start_ts;
+ newest_durable_ts = addr->newest_durable_ts;
newest_stop_ts = addr->newest_stop_ts;
} else {
__wt_cell_unpack(session, page, ref->addr, vpack);
@@ -4959,7 +4978,7 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
val->cell_len = 0;
val->len = val->buf.size;
oldest_start_ts = vpack->oldest_start_ts;
- newest_start_ts = vpack->newest_start_ts;
+ newest_durable_ts = vpack->newest_durable_ts;
newest_stop_ts = vpack->newest_stop_ts;
}
WT_CHILD_RELEASE_ERR(session, hazard, ref);
@@ -5002,8 +5021,8 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
/* Copy the key and value onto the page. */
__rec_image_copy(session, r, key);
__rec_image_copy(session, r, val);
- __rec_addr_ts_update(
- r, oldest_start_ts, newest_start_ts, newest_stop_ts);
+ __rec_addr_ts_update(r,
+ oldest_start_ts, newest_durable_ts, newest_stop_ts);
/* Update compression state. */
__rec_key_state_update(r, ovfl_key);
@@ -5056,7 +5075,7 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
__rec_image_copy(session, r, key);
__rec_image_copy(session, r, val);
__rec_addr_ts_update(r, addr->oldest_start_ts,
- addr->newest_start_ts, addr->newest_stop_ts);
+ addr->newest_durable_ts, addr->newest_stop_ts);
/* Update compression state. */
__rec_key_state_update(r, ovfl_key);
@@ -5091,8 +5110,9 @@ __rec_row_zero_len(WT_SESSION_IMPL *session,
*/
static int
__rec_row_leaf(WT_SESSION_IMPL *session,
- WT_RECONCILE *r, WT_PAGE *page, WT_SALVAGE_COOKIE *salvage)
+ WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage)
{
+ WT_ADDR *addr;
WT_BTREE *btree;
WT_CELL *cell;
WT_CELL_UNPACK *kpack, _kpack, *vpack, _vpack;
@@ -5103,10 +5123,11 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
WT_IKEY *ikey;
WT_INSERT *ins;
WT_KV *key, *val;
+ WT_PAGE *page;
WT_ROW *rip;
WT_UPDATE *upd;
WT_UPDATE_SELECT upd_select;
- wt_timestamp_t start_ts, stop_ts;
+ wt_timestamp_t start_ts, durable_ts, newest_durable_ts, stop_ts;
size_t size;
uint64_t slvg_skip, txnid;
uint32_t i;
@@ -5116,12 +5137,27 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
btree = S2BT(session);
cbt = &r->update_modify_cbt;
+ page = pageref->page;
slvg_skip = salvage == NULL ? 0 : salvage->skip;
key = &r->k;
val = &r->v;
vpack = &_vpack;
+ /*
+ * Acquire the newest-durable timestamp for this page so we can roll it
+ * forward. If it exists, it's in the WT_REF structure or the parent's
+ * disk image.
+ */
+ if ((addr = pageref->addr) == NULL)
+ newest_durable_ts = WT_TS_NONE;
+ else if (__wt_off_page(pageref->home, addr))
+ newest_durable_ts = addr->newest_durable_ts;
+ else {
+ __wt_cell_unpack(session, pageref->home, pageref->addr, vpack);
+ newest_durable_ts = vpack->newest_durable_ts;
+ }
+
WT_RET(__rec_split_init(
session, r, page, 0, btree->maxleafpage_precomp));
@@ -5173,6 +5209,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
/* Unpack the on-page value cell, set the default timestamps. */
__wt_row_leaf_value_cell(session, page, rip, NULL, vpack);
start_ts = vpack->start_ts;
+ durable_ts = newest_durable_ts;
stop_ts = vpack->stop_ts;
txnid = WT_TXN_NONE;
@@ -5180,9 +5217,10 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
WT_ERR(__rec_upd_select(
session, r, NULL, rip, vpack, &upd_select));
if ((upd = upd_select.upd) != NULL) {
- txnid = upd_select.txnid;
start_ts = upd_select.start_ts;
+ durable_ts = upd_select.durable_ts;
stop_ts = upd_select.stop_ts;
+ txnid = upd_select.txnid;
}
/* Build value cell. */
@@ -5450,7 +5488,7 @@ build:
session, r, start_ts, stop_ts, 0, val));
__rec_image_copy(session, r, val);
}
- __rec_addr_ts_update(r, start_ts, start_ts, stop_ts);
+ __rec_addr_ts_update(r, start_ts, durable_ts, stop_ts);
/* Update compression state. */
__rec_key_state_update(r, ovfl_key);
@@ -5480,7 +5518,7 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
WT_KV *key, *val;
WT_UPDATE *upd;
WT_UPDATE_SELECT upd_select;
- wt_timestamp_t start_ts, stop_ts;
+ wt_timestamp_t start_ts, durable_ts, stop_ts;
uint64_t txnid;
bool ovfl_key, upd_saved;
@@ -5494,9 +5532,10 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
WT_RET(__rec_upd_select(
session, r, ins, NULL, NULL, &upd_select));
upd = upd_select.upd;
- txnid = upd_select.txnid;
start_ts = upd_select.start_ts;
+ durable_ts = upd_select.durable_ts;
stop_ts = upd_select.stop_ts;
+ txnid = upd_select.txnid;
upd_saved = upd_select.upd_saved;
if (upd == NULL) {
@@ -5580,7 +5619,7 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
session, r, start_ts, stop_ts, 0, val));
__rec_image_copy(session, r, val);
}
- __rec_addr_ts_update(r, start_ts, start_ts, stop_ts);
+ __rec_addr_ts_update(r, start_ts, durable_ts, stop_ts);
/* Update compression state. */
__rec_key_state_update(r, ovfl_key);
@@ -5837,7 +5876,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
r->wrapup_checkpoint_compressed));
__wt_checkpoint_tree_reconcile_update(session,
r->multi->addr.oldest_start_ts,
- r->multi->addr.newest_start_ts,
+ r->multi->addr.newest_durable_ts,
r->multi->addr.newest_stop_ts);
}
@@ -6179,10 +6218,9 @@ __rec_cell_build_addr(WT_SESSION_IMPL *session,
*/
val->buf.data = addr->addr;
val->buf.size = addr->size;
- val->cell_len = __wt_cell_pack_addr(session,
- &val->cell, cell_type, recno,
- addr->oldest_start_ts, addr->newest_start_ts, addr->newest_stop_ts,
- val->buf.size);
+ val->cell_len = __wt_cell_pack_addr(
+ session, &val->cell, cell_type, recno, addr->oldest_start_ts,
+ addr->newest_durable_ts, addr->newest_stop_ts, val->buf.size);
val->len = val->cell_len + val->buf.size;
}
diff --git a/src/third_party/wiredtiger/src/schema/schema_util.c b/src/third_party/wiredtiger/src/schema/schema_util.c
index 9626cf51d13..f3ad28708c9 100644
--- a/src/third_party/wiredtiger/src/schema/schema_util.c
+++ b/src/third_party/wiredtiger/src/schema/schema_util.c
@@ -9,24 +9,19 @@
#include "wt_internal.h"
/*
- * __wt_schema_backup_check --
- * Check if a backup cursor is open and give an error if the schema
- * operation will conflict. This is called after the schema operations
- * have taken the schema lock so no hot backup cursor can be created until
- * this is done.
+ * __schema_backup_check_int --
+ * Helper for __wt_schema_backup_check. Intended to be called while
+ * holding the hot backup read lock.
*/
-int
-__wt_schema_backup_check(WT_SESSION_IMPL *session, const char *name)
+static int
+__schema_backup_check_int(WT_SESSION_IMPL *session, const char *name)
{
WT_CONNECTION_IMPL *conn;
- WT_DECL_RET;
int i;
char **backup_list;
conn = S2C(session);
- if (!conn->hot_backup)
- return (0);
- __wt_readlock(session, &conn->hot_backup_lock);
+
/*
* There is a window at the end of a backup where the list has been
* cleared from the connection but the flag is still set. It is safe
@@ -34,16 +29,34 @@ __wt_schema_backup_check(WT_SESSION_IMPL *session, const char *name)
*/
if (!conn->hot_backup ||
(backup_list = conn->hot_backup_list) == NULL) {
- __wt_readunlock(session, &conn->hot_backup_lock);
return (0);
}
for (i = 0; backup_list[i] != NULL; ++i) {
- if (strcmp(backup_list[i], name) == 0) {
- ret = __wt_set_return(session, EBUSY);
- break;
- }
+ if (strcmp(backup_list[i], name) == 0)
+ return __wt_set_return(session, EBUSY);
}
- __wt_readunlock(session, &conn->hot_backup_lock);
+
+ return (0);
+}
+
+/*
+ * __wt_schema_backup_check --
+ * Check if a backup cursor is open and give an error if the schema
+ * operation will conflict. This is called after the schema operations
+ * have taken the schema lock so no hot backup cursor can be created until
+ * this is done.
+ */
+int
+__wt_schema_backup_check(WT_SESSION_IMPL *session, const char *name)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+ if (!conn->hot_backup)
+ return (0);
+ WT_WITH_HOTBACKUP_READ_LOCK_UNCOND(session,
+ ret = __schema_backup_check_int(session, name));
return (ret);
}
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 4a7b50d72e1..3a21171d781 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -1137,7 +1137,9 @@ static const char * const __stats_connection_desc[] = {
"transaction: transaction range of IDs currently pinned by named snapshots",
"transaction: transaction range of timestamps currently pinned",
"transaction: transaction range of timestamps pinned by a checkpoint",
+ "transaction: transaction range of timestamps pinned by the oldest active read timestamp",
"transaction: transaction range of timestamps pinned by the oldest timestamp",
+ "transaction: transaction read timestamp of the oldest active reader",
"transaction: transaction sync calls",
"transaction: transactions committed",
"transaction: transactions rolled back",
@@ -1568,7 +1570,9 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing txn_pinned_snapshot_range */
/* not clearing txn_pinned_timestamp */
/* not clearing txn_pinned_timestamp_checkpoint */
+ /* not clearing txn_pinned_timestamp_reader */
/* not clearing txn_pinned_timestamp_oldest */
+ /* not clearing txn_timestamp_oldest_active_read */
stats->txn_sync = 0;
stats->txn_commit = 0;
stats->txn_rollback = 0;
@@ -2167,8 +2171,12 @@ __wt_stat_connection_aggregate(
to->txn_pinned_timestamp += WT_STAT_READ(from, txn_pinned_timestamp);
to->txn_pinned_timestamp_checkpoint +=
WT_STAT_READ(from, txn_pinned_timestamp_checkpoint);
+ to->txn_pinned_timestamp_reader +=
+ WT_STAT_READ(from, txn_pinned_timestamp_reader);
to->txn_pinned_timestamp_oldest +=
WT_STAT_READ(from, txn_pinned_timestamp_oldest);
+ to->txn_timestamp_oldest_active_read +=
+ WT_STAT_READ(from, txn_timestamp_oldest_active_read);
to->txn_sync += WT_STAT_READ(from, txn_sync);
to->txn_commit += WT_STAT_READ(from, txn_commit);
to->txn_rollback += WT_STAT_READ(from, txn_rollback);
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 9967dc3b2b3..c45afbf5730 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -1304,6 +1304,7 @@ __wt_txn_stats_update(WT_SESSION_IMPL *session)
WT_TXN_GLOBAL *txn_global;
wt_timestamp_t checkpoint_timestamp;
wt_timestamp_t commit_timestamp;
+ wt_timestamp_t oldest_active_read_timestamp;
wt_timestamp_t pinned_timestamp;
uint64_t checkpoint_pinned, snapshot_pinned;
@@ -1329,6 +1330,21 @@ __wt_txn_stats_update(WT_SESSION_IMPL *session)
WT_STAT_SET(session, stats, txn_pinned_timestamp_oldest,
commit_timestamp - txn_global->oldest_timestamp);
+ if (__wt_txn_get_pinned_timestamp(
+ session, &oldest_active_read_timestamp, 0) == 0) {
+ WT_STAT_SET(session, stats,
+ txn_timestamp_oldest_active_read,
+ oldest_active_read_timestamp);
+ WT_STAT_SET(session, stats,
+ txn_pinned_timestamp_reader,
+ commit_timestamp - oldest_active_read_timestamp);
+ } else {
+ WT_STAT_SET(session,
+ stats, txn_timestamp_oldest_active_read, 0);
+ WT_STAT_SET(session,
+ stats, txn_pinned_timestamp_reader, 0);
+ }
+
WT_STAT_SET(session, stats, txn_pinned_snapshot_range,
snapshot_pinned == WT_TXN_NONE ?
0 : txn_global->current - snapshot_pinned);
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index a1c700661ce..ced994cbb9b 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -1272,6 +1272,93 @@ __drop_to(WT_CKPT *ckptbase, const char *name, size_t len)
}
/*
+ * __checkpoint_lock_dirty_tree_int --
+ * Helper for __checkpoint_lock_dirty_tree. Intended to be called while
+ * holding the hot backup lock.
+ */
+static int
+__checkpoint_lock_dirty_tree_int(
+ WT_SESSION_IMPL *session, bool is_checkpoint,
+ bool force, WT_BTREE *btree, WT_CKPT *ckpt, WT_CKPT *ckptbase)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+
+ WT_UNUSED(is_checkpoint);
+ conn = S2C(session);
+
+ /*
+ * We can't delete checkpoints if a backup cursor is open. WiredTiger
+ * checkpoints are uniquely named and it's OK to have multiple of them
+ * in the system: clear the delete flag for them, and otherwise fail.
+ * Hold the lock until we're done (blocking hot backups from starting),
+ * we don't want to race with a future hot backup.
+ */
+ if (conn->hot_backup)
+ WT_CKPT_FOREACH(ckptbase, ckpt) {
+ if (!F_ISSET(ckpt, WT_CKPT_DELETE))
+ continue;
+ if (WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT)) {
+ F_CLR(ckpt, WT_CKPT_DELETE);
+ continue;
+ }
+ WT_RET_MSG(session, EBUSY,
+ "checkpoint %s blocked by hot backup: it would"
+ "delete an existing checkpoint, and checkpoints "
+ "cannot be deleted during a hot backup",
+ ckpt->name);
+ }
+ /*
+ * Mark old checkpoints that are being deleted and figure out which
+ * trees we can skip in this checkpoint.
+ */
+ WT_RET(__checkpoint_mark_skip(session, ckptbase, force));
+ if (F_ISSET(btree, WT_BTREE_SKIP_CKPT))
+ return (0);
+ /*
+ * Lock the checkpoints that will be deleted.
+ *
+ * Checkpoints are only locked when tracking is enabled, which covers
+ * checkpoint and drop operations, but not close. The reasoning is
+ * there should be no access to a checkpoint during close, because any
+ * thread accessing a checkpoint will also have the current file handle
+ * open.
+ */
+ if (WT_META_TRACKING(session))
+ WT_CKPT_FOREACH(ckptbase, ckpt) {
+ if (!F_ISSET(ckpt, WT_CKPT_DELETE))
+ continue;
+ /*
+ * We can't delete checkpoints referenced by a cursor.
+ * WiredTiger checkpoints are uniquely named and it's
+ * OK to have multiple in the system: clear the delete
+ * flag for them, and otherwise fail.
+ */
+ ret = __wt_session_lock_checkpoint(session, ckpt->name);
+ if (ret == 0)
+ continue;
+ if (ret == EBUSY &&
+ WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT)) {
+ F_CLR(ckpt, WT_CKPT_DELETE);
+ continue;
+ }
+ WT_RET_MSG(session, ret,
+ "checkpoints cannot be dropped when in-use");
+ }
+ /*
+ * There are special trees: those being bulk-loaded, salvaged, upgraded
+ * or verified during the checkpoint. They should never be part of a
+ * checkpoint: we will fail to lock them because the operations have
+ * exclusive access to the handles. Named checkpoints will fail in that
+ * case, ordinary checkpoints skip files that cannot be opened normally.
+ */
+ WT_ASSERT(session,
+ !is_checkpoint || !F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS));
+
+ return (0);
+}
+
+/*
* __checkpoint_lock_dirty_tree --
* Decide whether the tree needs to be included in the checkpoint and if
* so, acquire the necessary locks.
@@ -1284,18 +1371,14 @@ __checkpoint_lock_dirty_tree(WT_SESSION_IMPL *session,
WT_CKPT *ckpt, *ckptbase;
WT_CONFIG dropconf;
WT_CONFIG_ITEM cval, k, v;
- WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
const char *name;
char *name_alloc;
- bool hot_backup_locked;
btree = S2BT(session);
- conn = S2C(session);
ckpt = ckptbase = NULL;
dhandle = session->dhandle;
- hot_backup_locked = false;
name_alloc = NULL;
/* Only referenced in diagnostic builds. */
@@ -1379,91 +1462,24 @@ __checkpoint_lock_dirty_tree(WT_SESSION_IMPL *session,
F_SET(ckpt, WT_CKPT_ADD);
/*
- * We can't delete checkpoints if a backup cursor is open. WiredTiger
- * checkpoints are uniquely named and it's OK to have multiple of them
- * in the system: clear the delete flag for them, and otherwise fail.
- * Hold the lock until we're done (blocking hot backups from starting),
- * we don't want to race with a future hot backup.
+ * There is some interaction between backups and checkpoints. Perform
+ * all backup related operations that the checkpoint needs now, while
+ * holding the hot backup read lock.
*/
- __wt_readlock(session, &conn->hot_backup_lock);
- hot_backup_locked = true;
- if (conn->hot_backup)
- WT_CKPT_FOREACH(ckptbase, ckpt) {
- if (!F_ISSET(ckpt, WT_CKPT_DELETE))
- continue;
- if (WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT)) {
- F_CLR(ckpt, WT_CKPT_DELETE);
- continue;
- }
- WT_ERR_MSG(session, EBUSY,
- "checkpoint %s blocked by hot backup: it would "
- "delete an existing checkpoint, and checkpoints "
- "cannot be deleted during a hot backup",
- ckpt->name);
- }
-
- /*
- * Mark old checkpoints that are being deleted and figure out which
- * trees we can skip in this checkpoint.
- */
- WT_ERR(__checkpoint_mark_skip(session, ckptbase, force));
+ WT_WITH_HOTBACKUP_READ_LOCK_UNCOND(session,
+ ret = __checkpoint_lock_dirty_tree_int(
+ session, is_checkpoint, force, btree, ckpt, ckptbase));
+ WT_ERR(ret);
if (F_ISSET(btree, WT_BTREE_SKIP_CKPT))
goto err;
- /*
- * Lock the checkpoints that will be deleted.
- *
- * Checkpoints are only locked when tracking is enabled, which covers
- * checkpoint and drop operations, but not close. The reasoning is
- * there should be no access to a checkpoint during close, because any
- * thread accessing a checkpoint will also have the current file handle
- * open.
- */
- if (WT_META_TRACKING(session))
- WT_CKPT_FOREACH(ckptbase, ckpt) {
- if (!F_ISSET(ckpt, WT_CKPT_DELETE))
- continue;
-
- /*
- * We can't delete checkpoints referenced by a cursor.
- * WiredTiger checkpoints are uniquely named and it's
- * OK to have multiple in the system: clear the delete
- * flag for them, and otherwise fail.
- */
- ret = __wt_session_lock_checkpoint(session, ckpt->name);
- if (ret == 0)
- continue;
- if (ret == EBUSY &&
- WT_PREFIX_MATCH(ckpt->name, WT_CHECKPOINT)) {
- F_CLR(ckpt, WT_CKPT_DELETE);
- continue;
- }
- WT_ERR_MSG(session, ret,
- "checkpoints cannot be dropped when in-use");
- }
-
- /*
- * There are special trees: those being bulk-loaded, salvaged, upgraded
- * or verified during the checkpoint. They should never be part of a
- * checkpoint: we will fail to lock them because the operations have
- * exclusive access to the handles. Named checkpoints will fail in that
- * case, ordinary checkpoints skip files that cannot be opened normally.
- */
- WT_ASSERT(session,
- !is_checkpoint || !F_ISSET(btree, WT_BTREE_SPECIAL_FLAGS));
-
- __wt_readunlock(session, &conn->hot_backup_lock);
-
WT_ASSERT(session, btree->ckpt == NULL &&
!F_ISSET(btree, WT_BTREE_SKIP_CKPT));
btree->ckpt = ckptbase;
return (0);
-err: if (hot_backup_locked)
- __wt_readunlock(session, &conn->hot_backup_lock);
-
- __wt_meta_ckptlist_free(session, &ckptbase);
+err: __wt_meta_ckptlist_free(session, &ckptbase);
__wt_free(session, name_alloc);
return (ret);
@@ -1543,16 +1559,13 @@ __checkpoint_mark_skip(
void
__wt_checkpoint_tree_reconcile_update(
WT_SESSION_IMPL *session, wt_timestamp_t oldest_start_ts,
- wt_timestamp_t newest_start_ts, wt_timestamp_t newest_stop_ts)
+ wt_timestamp_t newest_durable_ts, wt_timestamp_t newest_stop_ts)
{
WT_BTREE *btree;
WT_CKPT *ckpt, *ckptbase;
btree = S2BT(session);
- __wt_timestamp_addr_check(session,
- oldest_start_ts, newest_start_ts, newest_stop_ts);
-
/*
* Reconciliation just wrote a checkpoint, everything has been written.
* Update the checkpoint with reconciliation information. The reason
@@ -1564,7 +1577,7 @@ __wt_checkpoint_tree_reconcile_update(
if (F_ISSET(ckpt, WT_CKPT_ADD)) {
ckpt->write_gen = btree->write_gen;
ckpt->oldest_start_ts = oldest_start_ts;
- ckpt->newest_start_ts = newest_start_ts;
+ ckpt->newest_durable_ts = newest_durable_ts;
ckpt->newest_stop_ts = newest_stop_ts;
}
}
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index 7a502265602..50d24778ffb 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -8,12 +8,6 @@
#include "wt_internal.h"
-/* AUTOMATIC FLAG VALUE GENERATION START */
-#define WT_TXN_TS_ALREADY_LOCKED 0x1u
-#define WT_TXN_TS_INCLUDE_CKPT 0x2u
-#define WT_TXN_TS_INCLUDE_OLDEST 0x4u
-/* AUTOMATIC FLAG VALUE GENERATION STOP */
-
/*
* __wt_timestamp_to_string --
* Convert a timestamp to the MongoDB string representation.
@@ -164,11 +158,11 @@ __txn_get_read_timestamp(
}
/*
- * __txn_get_pinned_timestamp --
+ * __wt_txn_get_pinned_timestamp --
* Calculate the current pinned timestamp.
*/
-static int
-__txn_get_pinned_timestamp(
+int
+__wt_txn_get_pinned_timestamp(
WT_SESSION_IMPL *session, wt_timestamp_t *tsp, uint32_t flags)
{
WT_CONNECTION_IMPL *conn;
@@ -289,10 +283,10 @@ __txn_global_query_timestamp(
return (WT_NOTFOUND);
ts = txn_global->oldest_timestamp;
} else if (WT_STRING_MATCH("oldest_reader", cval.str, cval.len))
- WT_RET(__txn_get_pinned_timestamp(
+ WT_RET(__wt_txn_get_pinned_timestamp(
session, &ts, WT_TXN_TS_INCLUDE_CKPT));
else if (WT_STRING_MATCH("pinned", cval.str, cval.len))
- WT_RET(__txn_get_pinned_timestamp(session, &ts,
+ WT_RET(__wt_txn_get_pinned_timestamp(session, &ts,
WT_TXN_TS_INCLUDE_CKPT | WT_TXN_TS_INCLUDE_OLDEST));
else if (WT_STRING_MATCH("recovery", cval.str, cval.len))
/* Read-only value forever. No lock needed. */
@@ -381,7 +375,7 @@ __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force)
return (0);
/* Scan to find the global pinned timestamp. */
- if ((ret = __txn_get_pinned_timestamp(
+ if ((ret = __wt_txn_get_pinned_timestamp(
session, &pinned_timestamp, WT_TXN_TS_INCLUDE_OLDEST)) != 0)
return (ret == WT_NOTFOUND ? 0 : ret);
@@ -397,7 +391,7 @@ __wt_txn_update_pinned_timestamp(WT_SESSION_IMPL *session, bool force)
* Scan the global pinned timestamp again, it's possible that it got
* changed after the previous scan.
*/
- if ((ret = __txn_get_pinned_timestamp(session, &pinned_timestamp,
+ if ((ret = __wt_txn_get_pinned_timestamp(session, &pinned_timestamp,
WT_TXN_TS_ALREADY_LOCKED | WT_TXN_TS_INCLUDE_OLDEST)) != 0) {
__wt_writeunlock(session, &txn_global->rwlock);
return (ret == WT_NOTFOUND ? 0 : ret);
@@ -636,8 +630,7 @@ __wt_txn_set_commit_timestamp(
*/
if (has_oldest_ts && commit_ts < oldest_ts) {
__wt_timestamp_to_string(commit_ts, ts_string[0]);
- __wt_timestamp_to_string(
- oldest_ts, ts_string[1]);
+ __wt_timestamp_to_string(oldest_ts, ts_string[1]);
WT_RET_MSG(session, EINVAL,
"commit timestamp %s is less than the oldest "
"timestamp %s",
@@ -646,8 +639,7 @@ __wt_txn_set_commit_timestamp(
if (has_stable_ts && commit_ts < stable_ts) {
__wt_timestamp_to_string(commit_ts, ts_string[0]);
- __wt_timestamp_to_string(
- oldest_ts, ts_string[1]);
+ __wt_timestamp_to_string(stable_ts, ts_string[1]);
WT_RET_MSG(session, EINVAL,
"commit timestamp %s is less than the stable "
"timestamp %s",
@@ -746,7 +738,7 @@ __wt_txn_set_durable_timestamp(
if (has_stable_ts && durable_ts < stable_ts) {
__wt_timestamp_to_string(durable_ts, ts_string[0]);
- __wt_timestamp_to_string(oldest_ts, ts_string[1]);
+ __wt_timestamp_to_string(stable_ts, ts_string[1]);
WT_RET_MSG(session, EINVAL,
"durable timestamp %s is less than the stable timestamp %s",
ts_string[0], ts_string[1]);
@@ -878,7 +870,7 @@ __wt_txn_set_read_timestamp(
WT_TXN_GLOBAL *txn_global = &S2C(session)->txn_global;
wt_timestamp_t ts_oldest;
char ts_string[2][WT_TS_INT_STRING_SIZE];
- bool roundup_to_oldest;
+ bool did_roundup_to_oldest;
WT_RET(__wt_txn_context_prepare_check(session));
@@ -896,45 +888,37 @@ __wt_txn_set_read_timestamp(
" may only be set once per transaction");
/*
- * The read timestamp could be rounded to the oldest timestamp.
- */
- roundup_to_oldest = F_ISSET(txn, WT_TXN_TS_ROUND_READ);
-
- /*
* This code is not using the timestamp validate function to
* avoid a race between checking and setting transaction
* timestamp.
*/
__wt_readlock(session, &txn_global->rwlock);
ts_oldest = txn_global->oldest_timestamp;
+ did_roundup_to_oldest = false;
if (read_ts < ts_oldest) {
/*
* If given read timestamp is earlier than oldest
* timestamp then round the read timestamp to
* oldest timestamp.
*/
- if (roundup_to_oldest)
+ if (F_ISSET(txn, WT_TXN_TS_ROUND_READ)) {
txn->read_timestamp = ts_oldest;
- else {
- __wt_readunlock(session, &txn_global->rwlock);
- __wt_timestamp_to_string(read_ts, ts_string[0]);
- __wt_timestamp_to_string(ts_oldest, ts_string[1]);
- WT_RET_MSG(session, EINVAL, "read timestamp "
- "%s less than the oldest timestamp %s",
- ts_string[0], ts_string[1]);
+ did_roundup_to_oldest = true;
+ } else {
+ __wt_readunlock(session, &txn_global->rwlock);
+ __wt_timestamp_to_string(read_ts, ts_string[0]);
+ __wt_timestamp_to_string(ts_oldest, ts_string[1]);
+ WT_RET_MSG(session, EINVAL, "read timestamp "
+ "%s less than the oldest timestamp %s",
+ ts_string[0], ts_string[1]);
}
- } else {
+ } else
txn->read_timestamp = read_ts;
- /*
- * Reset to avoid a verbose message as read
- * timestamp is not rounded to oldest timestamp.
- */
- roundup_to_oldest = false;
- }
__wt_txn_publish_read_timestamp(session);
__wt_readunlock(session, &txn_global->rwlock);
- if (roundup_to_oldest && WT_VERBOSE_ISSET(session, WT_VERB_TIMESTAMP)) {
+ if (did_roundup_to_oldest &&
+ WT_VERBOSE_ISSET(session, WT_VERB_TIMESTAMP)) {
/*
* This message is generated here to reduce the span of
* critical section.