summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuke Chen <luke.chen@mongodb.com>2020-07-01 16:53:26 +1000
committerLuke Chen <luke.chen@mongodb.com>2020-07-01 17:37:58 +1000
commit8100fd85b68031907520613ac686e52588835bd3 (patch)
treef6aed217e281e8c737c40a34f384aa3b48eadc2c
parent6c7c37470bdf4b9dc5ed0215ed161945f9553f0f (diff)
downloadmongo-8100fd85b68031907520613ac686e52588835bd3.tar.gz
Import wiredtiger: 5d5d26e79db5244a5fc748346e0e578aed306be1 from branch mongodb-4.4
ref: 46eb0217d4..5d5d26e79d for: 4.4.0-rc12 WT-6349 Don't truncate history store updates for globally visible timestamped deletes WT-6462 Use read uncommitted isolation level for history store operations WT-6483 Add debugging for log file lists WT-6484 Don't retry eviction if last_eviction_timestamp is not set
-rw-r--r--src/third_party/wiredtiger/import.data2
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_read.c3
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_api.c6
-rw-r--r--src/third_party/wiredtiger/src/history/hs.c117
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i15
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h6
-rw-r--r--src/third_party/wiredtiger/src/include/txn.i15
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_row.c11
-rw-r--r--src/third_party/wiredtiger/src/txn/txn.c6
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c14
-rw-r--r--src/third_party/wiredtiger/test/suite/test_bug019.py10
-rw-r--r--src/third_party/wiredtiger/test/suite/test_hs11.py67
12 files changed, 206 insertions, 66 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 3c6640654e9..2fd697956fd 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -2,5 +2,5 @@
"vendor": "wiredtiger",
"github": "wiredtiger/wiredtiger.git",
"branch": "mongodb-4.4",
- "commit": "46eb0217d46b98e7631fc463791f9e16c08ae198"
+ "commit": "5d5d26e79db5244a5fc748346e0e578aed306be1"
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index 7f733691119..a54a4c3beda 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -241,7 +241,8 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
case WT_REF_DELETED:
if (LF_ISSET(WT_READ_DELETED_SKIP | WT_READ_NO_WAIT))
return (WT_NOTFOUND);
- if (LF_ISSET(WT_READ_DELETED_CHECK) && __wt_delete_page_skip(session, ref, false))
+ if (LF_ISSET(WT_READ_DELETED_CHECK) &&
+ __wt_delete_page_skip(session, ref, !F_ISSET(session->txn, WT_TXN_HAS_SNAPSHOT)))
return (WT_NOTFOUND);
goto read;
case WT_REF_DISK:
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index 9666842e9c3..6b86300e4e6 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -1051,6 +1051,12 @@ err:
WT_TRET(__wt_txn_activity_drain(session));
/*
+ * There should be no active transactions running now. Therefore, it's safe for operations to
+ * proceed without doing snapshot visibility checks.
+ */
+ session->txn->isolation = WT_ISO_READ_UNCOMMITTED;
+
+ /*
* Clear any pending async operations and shut down the async worker threads and system before
* closing LSM.
*/
diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c
index 8c9c39e7f12..e7cd883aaf2 100644
--- a/src/third_party/wiredtiger/src/history/hs.c
+++ b/src/third_party/wiredtiger/src/history/hs.c
@@ -571,7 +571,7 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT
* updates, we should remove them and reinsert them at the current timestamp.
*/
if (upd->start_ts != WT_TS_NONE) {
- WT_ERR_NOTFOUND_OK(cursor->next(cursor), true);
+ WT_ERR_NOTFOUND_OK(__wt_hs_cursor_next(session, cursor), true);
if (ret == 0)
WT_ERR(__hs_fixup_out_of_order_from_pos(
session, cursor, btree, key, upd->start_ts, &counter, srch_key));
@@ -603,7 +603,7 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT
* timestamped tables that are occasionally getting a non-timestamped update, that means that
* all timestamped updates should get removed.
*/
- WT_ERR_NOTFOUND_OK(cursor->next(cursor), true);
+ WT_ERR_NOTFOUND_OK(__wt_hs_cursor_next(session, cursor), true);
/* No records to delete. */
if (ret == WT_NOTFOUND) {
@@ -1050,15 +1050,12 @@ err:
}
/*
- * __wt_hs_cursor_position --
- * Position a history store cursor at the end of a set of updates for a given btree id, record
- * key and timestamp. There may be no history store entries for the given btree id and record
- * key if they have been removed by WT_CONNECTION::rollback_to_stable. There is an optional
- * argument to store the key that we used to position the cursor which can be used to assess
- * where the cursor is relative to it.
+ * __hs_cursor_position_int --
+ * Internal function to position a history store cursor at the end of a set of updates for a
+ * given btree id, record key and timestamp.
*/
-int
-__wt_hs_cursor_position(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id,
+static int
+__hs_cursor_position_int(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id,
const WT_ITEM *key, wt_timestamp_t timestamp, WT_ITEM *user_srch_key)
{
WT_DECL_ITEM(srch_key);
@@ -1115,6 +1112,66 @@ err:
}
/*
+ * __wt_hs_cursor_position --
+ * Position a history store cursor at the end of a set of updates for a given btree id, record
+ * key and timestamp. There may be no history store entries for the given btree id and record
+ * key if they have been removed by WT_CONNECTION::rollback_to_stable. There is an optional
+ * argument to store the key that we used to position the cursor which can be used to assess
+ * where the cursor is relative to it. The function executes with isolation level set as
+ * WT_ISO_READ_UNCOMMITTED.
+ */
+int
+__wt_hs_cursor_position(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id,
+ const WT_ITEM *key, wt_timestamp_t timestamp, WT_ITEM *user_srch_key)
+{
+ WT_DECL_RET;
+ WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED,
+ ret = __hs_cursor_position_int(session, cursor, btree_id, key, timestamp, user_srch_key));
+ return (ret);
+}
+
+/*
+ * __wt_hs_cursor_prev --
+ * Execute a prev operation on a history store cursor with the appropriate isolation level.
+ */
+int
+__wt_hs_cursor_prev(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
+{
+ WT_DECL_RET;
+
+ WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED, ret = cursor->prev(cursor));
+ return (ret);
+}
+
+/*
+ * __wt_hs_cursor_next --
+ * Execute a next operation on a history store cursor with the appropriate isolation level.
+ */
+int
+__wt_hs_cursor_next(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
+{
+ WT_DECL_RET;
+
+ WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED, ret = cursor->next(cursor));
+ return (ret);
+}
+
+/*
+ * __wt_hs_cursor_search_near --
+ * Execute a search near operation on a history store cursor with the appropriate isolation
+ * level.
+ */
+int
+__wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *cursor, int *exactp)
+{
+ WT_DECL_RET;
+
+ WT_WITH_TXN_ISOLATION(
+ session, WT_ISO_READ_UNCOMMITTED, ret = cursor->search_near(cursor, exactp));
+ return (ret);
+}
+
+/*
* __wt_hs_find_upd --
* Scan the history store for a record the btree cursor wants to position on. Create an update
* for the record and return to the caller. The caller may choose to optionally allow prepared
@@ -1190,7 +1247,7 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma
ret = 0;
goto done;
}
- for (;; ret = hs_cursor->prev(hs_cursor)) {
+ for (;; ret = __wt_hs_cursor_prev(session, hs_cursor)) {
WT_ERR_NOTFOUND_OK(ret, true);
/* If we hit the end of the table, let's get out of here. */
if (ret == WT_NOTFOUND) {
@@ -1215,8 +1272,7 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma
* If the stop time pair on the tombstone in the history store is already globally visible
* we can skip it.
*/
- if (__wt_txn_visible_all(
- session, hs_cbt->upd_value->tw.stop_txn, hs_cbt->upd_value->tw.durable_stop_ts)) {
+ if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) {
WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone);
continue;
}
@@ -1270,14 +1326,14 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma
* update here we fall back to the datastore version. If its timestamp doesn't match our
* timestamp then we return not found.
*/
- if ((ret = hs_cursor->next(hs_cursor)) == WT_NOTFOUND) {
+ WT_ERR_NOTFOUND_OK(__wt_hs_cursor_next(session, hs_cursor), true);
+ if (ret == WT_NOTFOUND) {
/* Fallback to the onpage value as the base value. */
orig_hs_value_buf = hs_value;
hs_value = on_disk_buf;
upd_type = WT_UPDATE_STANDARD;
break;
}
- WT_ERR(ret);
hs_start_ts_tmp = WT_TS_NONE;
/*
* Make sure we use the temporary variants of these variables. We need to retain the
@@ -1387,7 +1443,7 @@ __hs_delete_key_from_ts_int(
hs_cursor->set_key(hs_cursor, btree_id, key, ts, 0);
WT_ERR(__wt_buf_set(session, srch_key, hs_cursor->key.data, hs_cursor->key.size));
- WT_ERR_NOTFOUND_OK(hs_cursor->search_near(hs_cursor, &exact), true);
+ WT_ERR_NOTFOUND_OK(__wt_hs_cursor_search_near(session, hs_cursor, &exact), true);
/* Empty history store is fine. */
if (ret == WT_NOTFOUND)
goto done;
@@ -1400,7 +1456,7 @@ __hs_delete_key_from_ts_int(
* beginning.
*/
if (exact < 0) {
- while ((ret = hs_cursor->next(hs_cursor)) == 0) {
+ while ((ret = __wt_hs_cursor_next(session, hs_cursor)) == 0) {
WT_ERR(__wt_compare(session, NULL, &hs_cursor->key, srch_key, &cmp));
if (cmp >= 0)
break;
@@ -1497,7 +1553,7 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
* to keep doing "next" until we've got a key greater than the one we attempted to position
* ourselves with.
*/
- for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
+ for (; ret == 0; ret = __wt_hs_cursor_next(session, hs_cursor)) {
/*
* Prior to getting here, we've done a "search near" on our key for the timestamp we're
* inserting and then a "next". In the regular case, our cursor will be positioned on the
@@ -1532,7 +1588,7 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
* 2 foo 3 2 ccc
* 2 foo 3 3 ddd
*/
- for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
+ for (; ret == 0; ret = __wt_hs_cursor_next(session, hs_cursor)) {
/*
* Prior to getting here, we've done a "search near" on our key for the timestamp we're
* inserting and then a "next". In the regular case, our cursor will be positioned on the
@@ -1549,8 +1605,7 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor,
* If the stop time pair on the tombstone in the history store is already globally visible
* we can skip it.
*/
- if (__wt_txn_visible_all(
- session, hs_cbt->upd_value->tw.stop_txn, hs_cbt->upd_value->tw.durable_stop_ts)) {
+ if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) {
WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone);
continue;
}
@@ -1653,15 +1708,15 @@ __hs_delete_key_from_pos(
upd = NULL;
/* If there is nothing else in history store, we're done here. */
- for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
- WT_RET(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter));
+ for (; ret == 0; ret = __wt_hs_cursor_next(session, hs_cursor)) {
+ WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter));
/*
* If the btree id or key isn't ours, that means that we've hit the end of the key range and
* that there is no more history store content for this key.
*/
if (hs_btree_id != btree_id)
break;
- WT_RET(__wt_compare(session, NULL, &hs_key, key, &cmp));
+ WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp));
if (cmp != 0)
break;
@@ -1669,8 +1724,7 @@ __hs_delete_key_from_pos(
* If the stop time pair on the tombstone in the history store is already globally visible
* we can skip it.
*/
- if (__wt_txn_visible_all(
- session, hs_cbt->upd_value->tw.stop_txn, hs_cbt->upd_value->tw.durable_stop_ts)) {
+ if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) {
WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone);
continue;
}
@@ -1683,7 +1737,7 @@ __hs_delete_key_from_pos(
* Append a globally visible tombstone to the update list. This will effectively make the
* value invisible and the key itself will eventually get removed during reconciliation.
*/
- WT_RET(__wt_upd_alloc_tombstone(session, &upd, NULL));
+ WT_ERR(__wt_upd_alloc_tombstone(session, &upd, NULL));
upd->txnid = WT_TXN_NONE;
upd->start_ts = upd->durable_ts = WT_TS_NONE;
WT_ERR(__wt_hs_modify(hs_cbt, upd));
@@ -1735,7 +1789,7 @@ __verify_history_store_id(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint32
* verify. When we return after moving to a new key the caller is responsible for keeping the
* cursor there or deciding they're done.
*/
- for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
+ for (; ret == 0; ret = __wt_hs_cursor_next(session, hs_cursor)) {
WT_ERR(hs_cursor->get_key(hs_cursor, &btree_id, &hs_key, &hs_start_ts, &hs_counter));
/*
@@ -1807,9 +1861,9 @@ __wt_history_store_verify_one(WT_SESSION_IMPL *session)
*/
memset(&hs_key, 0, sizeof(hs_key));
cursor->set_key(cursor, btree_id, &hs_key, 0, 0);
- ret = cursor->search_near(cursor, &exact);
+ ret = __wt_hs_cursor_search_near(session, cursor, &exact);
if (ret == 0 && exact < 0)
- ret = cursor->next(cursor);
+ ret = __wt_hs_cursor_next(session, cursor);
/* If we positioned the cursor there is something to verify. */
if (ret == 0) {
@@ -1852,8 +1906,7 @@ __wt_history_store_verify(WT_SESSION_IMPL *session)
WT_ERR(__wt_scr_alloc(session, 0, &buf));
WT_ERR(__wt_hs_cursor(session, &session_flags, &is_owner));
cursor = session->hs_cursor;
- ret = cursor->next(cursor);
- WT_ERR_NOTFOUND_OK(ret, true);
+ WT_ERR_NOTFOUND_OK(__wt_hs_cursor_next(session, cursor), true);
stop = ret == WT_NOTFOUND ? true : false;
ret = 0;
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 8c5d63aa911..84b4f7d2aa0 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -1424,12 +1424,15 @@ __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
mod->last_eviction_id != __wt_txn_oldest_id(session))
return (true);
- if (mod->last_eviction_timestamp == WT_TS_NONE)
- return (true);
-
- __wt_txn_pinned_timestamp(session, &pinned_ts);
- if (pinned_ts > mod->last_eviction_timestamp)
- return (true);
+ /*
+ * It is possible that we have not started using the timestamps just yet. So, check for the last
+ * time we evicted only if there is a timestamp set.
+ */
+ if (mod->last_eviction_timestamp != WT_TS_NONE) {
+ __wt_txn_pinned_timestamp(session, &pinned_ts);
+ if (pinned_ts > mod->last_eviction_timestamp)
+ return (true);
+ }
return (false);
}
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index bfb6647c88f..143bd3e9e45 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -757,11 +757,17 @@ extern int __wt_hs_cursor(WT_SESSION_IMPL *session, uint32_t *session_flags, boo
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_cursor_close(WT_SESSION_IMPL *session, uint32_t session_flags, bool is_owner)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_hs_cursor_next(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_cursor_open(WT_SESSION_IMPL *session)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_cursor_position(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id,
const WT_ITEM *key, wt_timestamp_t timestamp, WT_ITEM *user_srch_key)
WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_hs_cursor_prev(WT_SESSION_IMPL *session, WT_CURSOR *cursor)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *cursor, int *exactp)
+ WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, uint32_t btree_id,
const WT_ITEM *key, wt_timestamp_t ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
extern int __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format,
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 70eb6f13dd1..688875072cb 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -664,19 +664,16 @@ __txn_visible_id(WT_SESSION_IMPL *session, uint64_t id)
if (id == WT_TXN_ABORTED)
return (false);
+ /* Transactions see their own changes. */
+ if (id == txn->id)
+ return (true);
+
/* Read-uncommitted transactions see all other changes. */
if (txn->isolation == WT_ISO_READ_UNCOMMITTED)
return (true);
- /*
- * If we don't have a transactional snapshot, only make stable updates visible.
- */
- if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT))
- return (__txn_visible_all_id(session, id));
-
- /* Transactions see their own changes. */
- if (id == txn->id)
- return (true);
+ /* Otherwise, we should be called with a snapshot. */
+ WT_ASSERT(session, F_ISSET(txn, WT_TXN_HAS_SNAPSHOT) || session->dhandle->checkpoint != NULL);
/*
* WT_ISO_SNAPSHOT, WT_ISO_READ_COMMITTED: the ID is visible if it is not the result of a
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c
index 0bf79f2802a..59ea0e1b4fd 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_row.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c
@@ -860,12 +860,13 @@ __wt_rec_row_leaf(
}
/*
- * If we're removing a key, also remove the history store contents associated with
- * that key. Even if we fail reconciliation after this point, we're safe to do this.
- * The history store content must be obsolete in order for us to consider removing
- * the key.
+ * If we're removing a key due to a tombstone with a durable timestamp of "none",
+ * also remove the history store contents associated with that key. Even if we fail
+ * reconciliation after this point, we're safe to do this. The history store content
+ * must be obsolete in order for us to consider removing the key.
*/
- if (F_ISSET(S2C(session), WT_CONN_HS_OPEN) && !WT_IS_HS(btree)) {
+ if (tw.durable_stop_ts == WT_TS_NONE && F_ISSET(S2C(session), WT_CONN_HS_OPEN) &&
+ !WT_IS_HS(btree)) {
WT_ERR(__wt_row_leaf_key(session, page, rip, tmpkey, true));
/* Start from WT_TS_NONE to delete all the history store content of the key. */
WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, tmpkey, WT_TS_NONE));
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index ac0fc58fab1..ebef4510f89 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -654,7 +654,7 @@ __txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM *
WT_ERR(__wt_scr_alloc(session, 0, &hs_key));
WT_ERR(__wt_scr_alloc(session, 0, &hs_value));
- for (; ret == 0; ret = hs_cursor->prev(hs_cursor)) {
+ for (; ret == 0; ret = __wt_hs_cursor_prev(session, hs_cursor)) {
WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter));
/* Stop before crossing over to the next btree */
@@ -677,8 +677,7 @@ __txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM *
* If the stop time pair on the tombstone in the history store is already globally visible
* we can skip it.
*/
- if (!__wt_txn_visible_all(
- session, hs_cbt->upd_value->tw.stop_txn, hs_cbt->upd_value->tw.durable_stop_ts))
+ if (!__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw))
break;
else
WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone);
@@ -687,6 +686,7 @@ __txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM *
/* We walked off the top of the history store. */
if (ret == WT_NOTFOUND)
goto done;
+ WT_ERR(ret);
/*
* As part of the history store search, we never get an exact match based on our search criteria
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 852aaad1169..de53843e3c9 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -212,7 +212,7 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
* the given timestamp, the key is removed from data store.
*/
ret = __wt_hs_cursor_position(session, hs_cursor, hs_btree_id, key, WT_TS_MAX, NULL);
- for (; ret == 0; ret = hs_cursor->prev(hs_cursor)) {
+ for (; ret == 0; ret = __wt_hs_cursor_prev(session, hs_cursor)) {
WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter));
/* Stop before crossing over to the next btree */
@@ -231,8 +231,7 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW
* If the stop time pair on the tombstone in the history store is already globally visible
* we can skip it.
*/
- if (__wt_txn_visible_all(
- session, cbt->upd_value->tw.stop_txn, cbt->upd_value->tw.durable_stop_ts)) {
+ if (__wt_txn_tw_stop_visible_all(session, &cbt->upd_value->tw)) {
WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone_rts);
continue;
}
@@ -1010,7 +1009,7 @@ __rollback_to_stable_btree_hs_truncate(WT_SESSION_IMPL *session, uint32_t btree_
/* Walk the history store for the given btree. */
hs_cursor->set_key(hs_cursor, btree_id, &key, WT_TS_NONE, 0);
- ret = hs_cursor->search_near(hs_cursor, &exact);
+ ret = __wt_hs_cursor_search_near(session, hs_cursor, &exact);
/*
* The search should always end up pointing to the start of the required btree or end of the
@@ -1018,9 +1017,9 @@ __rollback_to_stable_btree_hs_truncate(WT_SESSION_IMPL *session, uint32_t btree_
*/
WT_ASSERT(session, (ret != 0 || exact != 0));
if (ret == 0 && exact < 0)
- ret = hs_cursor->next(hs_cursor);
+ ret = __wt_hs_cursor_next(session, hs_cursor);
- for (; ret == 0; ret = hs_cursor->next(hs_cursor)) {
+ for (; ret == 0; ret = __wt_hs_cursor_next(session, hs_cursor)) {
WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter));
/* Stop crossing into the next btree boundary. */
@@ -1031,8 +1030,7 @@ __rollback_to_stable_btree_hs_truncate(WT_SESSION_IMPL *session, uint32_t btree_
* If the stop time pair on the tombstone in the history store is already globally visible
* we can skip it.
*/
- if (__wt_txn_visible_all(
- session, cbt->upd_value->tw.stop_txn, cbt->upd_value->tw.durable_stop_ts)) {
+ if (__wt_txn_tw_stop_visible_all(session, &cbt->upd_value->tw)) {
WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone_rts);
continue;
}
diff --git a/src/third_party/wiredtiger/test/suite/test_bug019.py b/src/third_party/wiredtiger/test/suite/test_bug019.py
index 0cfcc2b13cb..abf68827959 100644
--- a/src/third_party/wiredtiger/test/suite/test_bug019.py
+++ b/src/third_party/wiredtiger/test/suite/test_bug019.py
@@ -86,6 +86,7 @@ class test_bug019(wttest.WiredTigerTestCase):
self.assertTrue(self.max_prealloc > start_prealloc)
# Loop, making sure pre-allocation is working and the range is moving.
+ self.pr("Check pre-allocation range is moving")
older = self.prepfiles()
for i in range(1, 10):
self.populate(self.entries)
@@ -94,6 +95,12 @@ class test_bug019(wttest.WiredTigerTestCase):
# Files can be returned in any order when reading a directory, older
# pre-allocated files can persist longer than newer files when newer
# files are returned first. Confirm files are being consumed.
+ if set(older) < set(newer):
+ self.pr("FAILURE on Iteration " + str(i))
+ self.pr("FAILURE: Older")
+ self.pr(str(older))
+ self.pr("FAILURE: Newer")
+ self.pr(str(newer))
self.assertFalse(set(older) < set(newer))
older = newer
@@ -107,6 +114,9 @@ class test_bug019(wttest.WiredTigerTestCase):
if new_prealloc < self.max_prealloc:
break
time.sleep(1.0)
+ if sleepcount >= max_wait_time:
+ self.pr("FAILURE: sleepcount " + str(sleepcount))
+ self.pr("FAILURE: max_wait_time " + str(max_wait_time))
self.assertTrue(sleepcount < max_wait_time)
if __name__ == '__main__':
diff --git a/src/third_party/wiredtiger/test/suite/test_hs11.py b/src/third_party/wiredtiger/test/suite/test_hs11.py
index eaa991557ee..a1b9d037cb6 100644
--- a/src/third_party/wiredtiger/test/suite/test_hs11.py
+++ b/src/third_party/wiredtiger/test/suite/test_hs11.py
@@ -28,6 +28,7 @@
import wiredtiger, wttest
from wtscenario import make_scenarios
+from wiredtiger import stat
def timestamp_str(t):
return '%x' % t
@@ -35,13 +36,19 @@ def timestamp_str(t):
# test_hs11.py
# Ensure that updates without timestamps clear the history store records.
class test_hs11(wttest.WiredTigerTestCase):
- conn_config = 'cache_size=50MB'
+ conn_config = 'cache_size=50MB,statistics=(all)'
session_config = 'isolation=snapshot'
scenarios = make_scenarios([
('deletion', dict(update_type='deletion')),
('update', dict(update_type='update')),
])
+ def get_stat(self, stat):
+ stat_cursor = self.session.open_cursor('statistics:')
+ val = stat_cursor[stat][2]
+ stat_cursor.close()
+ return val
+
def test_non_ts_updates_clears_hs(self):
uri = 'table:test_hs11'
create_params = 'key_format=S,value_format=S'
@@ -71,6 +78,9 @@ class test_hs11(wttest.WiredTigerTestCase):
else:
cursor[str(i)] = value2
+ # Reconcile and remove the obsolete entries.
+ self.session.checkpoint()
+
# Now apply an update at timestamp 10.
for i in range(1, 10000):
self.session.begin_transaction()
@@ -90,3 +100,58 @@ class test_hs11(wttest.WiredTigerTestCase):
else:
self.assertEqual(cursor[str(i)], value1)
self.session.rollback_transaction()
+
+ if self.update_type == 'deletion':
+ hs_truncate = self.get_stat(stat.conn.cache_hs_key_truncate_onpage_removal)
+ self.assertGreater(hs_truncate, 0)
+
+ def test_ts_updates_donot_clears_hs(self):
+ uri = 'table:test_hs11'
+ create_params = 'key_format=S,value_format=S'
+ self.session.create(uri, create_params)
+
+ value1 = 'a' * 500
+ value2 = 'b' * 500
+
+ # Apply a series of updates from timestamps 1-4.
+ self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(1))
+ cursor = self.session.open_cursor(uri)
+ for ts in range(1, 5):
+ for i in range(1, 10000):
+ self.session.begin_transaction()
+ cursor[str(i)] = value1
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(ts))
+
+ # Reconcile and flush versions 1-3 to the history store.
+ self.session.checkpoint()
+
+ # Remove the key with timestamp 10.
+ for i in range(1, 10000):
+ if i % 2 == 0:
+ self.session.begin_transaction()
+ cursor.set_key(str(i))
+ cursor.remove()
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(10))
+
+ # Reconcile and remove the obsolete entries.
+ self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(10))
+ self.session.checkpoint()
+
+ # Now apply an update at timestamp 20.
+ for i in range(1, 10000):
+ self.session.begin_transaction()
+ cursor[str(i)] = value2
+ self.session.commit_transaction('commit_timestamp=' + timestamp_str(20))
+
+ # Ensure that we didn't select old history store content even if it is not blew away.
+ self.session.begin_transaction('read_timestamp=' + timestamp_str(10))
+ for i in range(1, 10000):
+ if i % 2 == 0:
+ cursor.set_key(str(i))
+ self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND)
+ else:
+ self.assertEqual(cursor[str(i)], value1)
+ self.session.rollback_transaction()
+
+ hs_truncate = self.get_stat(stat.conn.cache_hs_key_truncate_onpage_removal)
+ self.assertEqual(hs_truncate, 0)