diff options
-rw-r--r-- | src/third_party/wiredtiger/import.data | 2 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/btree/bt_read.c | 3 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/conn/conn_api.c | 6 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/history/hs.c | 117 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/btree.i | 15 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/extern.h | 6 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/include/txn.i | 15 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/reconcile/rec_row.c | 11 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/txn/txn.c | 6 | ||||
-rw-r--r-- | src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c | 14 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/suite/test_bug019.py | 10 | ||||
-rw-r--r-- | src/third_party/wiredtiger/test/suite/test_hs11.py | 67 |
12 files changed, 206 insertions, 66 deletions
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data index 3c6640654e9..2fd697956fd 100644 --- a/src/third_party/wiredtiger/import.data +++ b/src/third_party/wiredtiger/import.data @@ -2,5 +2,5 @@ "vendor": "wiredtiger", "github": "wiredtiger/wiredtiger.git", "branch": "mongodb-4.4", - "commit": "46eb0217d46b98e7631fc463791f9e16c08ae198" + "commit": "5d5d26e79db5244a5fc748346e0e578aed306be1" } diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c index 7f733691119..a54a4c3beda 100644 --- a/src/third_party/wiredtiger/src/btree/bt_read.c +++ b/src/third_party/wiredtiger/src/btree/bt_read.c @@ -241,7 +241,8 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags case WT_REF_DELETED: if (LF_ISSET(WT_READ_DELETED_SKIP | WT_READ_NO_WAIT)) return (WT_NOTFOUND); - if (LF_ISSET(WT_READ_DELETED_CHECK) && __wt_delete_page_skip(session, ref, false)) + if (LF_ISSET(WT_READ_DELETED_CHECK) && + __wt_delete_page_skip(session, ref, !F_ISSET(session->txn, WT_TXN_HAS_SNAPSHOT))) return (WT_NOTFOUND); goto read; case WT_REF_DISK: diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c index 9666842e9c3..6b86300e4e6 100644 --- a/src/third_party/wiredtiger/src/conn/conn_api.c +++ b/src/third_party/wiredtiger/src/conn/conn_api.c @@ -1051,6 +1051,12 @@ err: WT_TRET(__wt_txn_activity_drain(session)); /* + * There should be no active transactions running now. Therefore, it's safe for operations to + * proceed without doing snapshot visibility checks. + */ + session->txn->isolation = WT_ISO_READ_UNCOMMITTED; + + /* * Clear any pending async operations and shut down the async worker threads and system before * closing LSM. */ diff --git a/src/third_party/wiredtiger/src/history/hs.c b/src/third_party/wiredtiger/src/history/hs.c index 8c9c39e7f12..e7cd883aaf2 100644 --- a/src/third_party/wiredtiger/src/history/hs.c +++ b/src/third_party/wiredtiger/src/history/hs.c @@ -571,7 +571,7 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT * updates, we should remove them and reinsert them at the current timestamp. */ if (upd->start_ts != WT_TS_NONE) { - WT_ERR_NOTFOUND_OK(cursor->next(cursor), true); + WT_ERR_NOTFOUND_OK(__wt_hs_cursor_next(session, cursor), true); if (ret == 0) WT_ERR(__hs_fixup_out_of_order_from_pos( session, cursor, btree, key, upd->start_ts, &counter, srch_key)); @@ -603,7 +603,7 @@ __hs_insert_record_with_btree(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_BT * timestamped tables that are occasionally getting a non-timestamped update, that means that * all timestamped updates should get removed. */ - WT_ERR_NOTFOUND_OK(cursor->next(cursor), true); + WT_ERR_NOTFOUND_OK(__wt_hs_cursor_next(session, cursor), true); /* No records to delete. */ if (ret == WT_NOTFOUND) { @@ -1050,15 +1050,12 @@ err: } /* - * __wt_hs_cursor_position -- - * Position a history store cursor at the end of a set of updates for a given btree id, record - * key and timestamp. There may be no history store entries for the given btree id and record - * key if they have been removed by WT_CONNECTION::rollback_to_stable. There is an optional - * argument to store the key that we used to position the cursor which can be used to assess - * where the cursor is relative to it. + * __hs_cursor_position_int -- + * Internal function to position a history store cursor at the end of a set of updates for a + * given btree id, record key and timestamp. */ -int -__wt_hs_cursor_position(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, +static int +__hs_cursor_position_int(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t timestamp, WT_ITEM *user_srch_key) { WT_DECL_ITEM(srch_key); @@ -1115,6 +1112,66 @@ err: } /* + * __wt_hs_cursor_position -- + * Position a history store cursor at the end of a set of updates for a given btree id, record + * key and timestamp. There may be no history store entries for the given btree id and record + * key if they have been removed by WT_CONNECTION::rollback_to_stable. There is an optional + * argument to store the key that we used to position the cursor which can be used to assess + * where the cursor is relative to it. The function executes with isolation level set as + * WT_ISO_READ_UNCOMMITTED. + */ +int +__wt_hs_cursor_position(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, + const WT_ITEM *key, wt_timestamp_t timestamp, WT_ITEM *user_srch_key) +{ + WT_DECL_RET; + WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED, + ret = __hs_cursor_position_int(session, cursor, btree_id, key, timestamp, user_srch_key)); + return (ret); +} + +/* + * __wt_hs_cursor_prev -- + * Execute a prev operation on a history store cursor with the appropriate isolation level. + */ +int +__wt_hs_cursor_prev(WT_SESSION_IMPL *session, WT_CURSOR *cursor) +{ + WT_DECL_RET; + + WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED, ret = cursor->prev(cursor)); + return (ret); +} + +/* + * __wt_hs_cursor_next -- + * Execute a next operation on a history store cursor with the appropriate isolation level. + */ +int +__wt_hs_cursor_next(WT_SESSION_IMPL *session, WT_CURSOR *cursor) +{ + WT_DECL_RET; + + WT_WITH_TXN_ISOLATION(session, WT_ISO_READ_UNCOMMITTED, ret = cursor->next(cursor)); + return (ret); +} + +/* + * __wt_hs_cursor_search_near -- + * Execute a search near operation on a history store cursor with the appropriate isolation + * level. + */ +int +__wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *cursor, int *exactp) +{ + WT_DECL_RET; + + WT_WITH_TXN_ISOLATION( + session, WT_ISO_READ_UNCOMMITTED, ret = cursor->search_near(cursor, exactp)); + return (ret); +} + +/* * __wt_hs_find_upd -- * Scan the history store for a record the btree cursor wants to position on. Create an update * for the record and return to the caller. The caller may choose to optionally allow prepared @@ -1190,7 +1247,7 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma ret = 0; goto done; } - for (;; ret = hs_cursor->prev(hs_cursor)) { + for (;; ret = __wt_hs_cursor_prev(session, hs_cursor)) { WT_ERR_NOTFOUND_OK(ret, true); /* If we hit the end of the table, let's get out of here. */ if (ret == WT_NOTFOUND) { @@ -1215,8 +1272,7 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma * If the stop time pair on the tombstone in the history store is already globally visible * we can skip it. */ - if (__wt_txn_visible_all( - session, hs_cbt->upd_value->tw.stop_txn, hs_cbt->upd_value->tw.durable_stop_ts)) { + if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) { WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone); continue; } @@ -1270,14 +1326,14 @@ __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_forma * update here we fall back to the datastore version. If its timestamp doesn't match our * timestamp then we return not found. */ - if ((ret = hs_cursor->next(hs_cursor)) == WT_NOTFOUND) { + WT_ERR_NOTFOUND_OK(__wt_hs_cursor_next(session, hs_cursor), true); + if (ret == WT_NOTFOUND) { /* Fallback to the onpage value as the base value. */ orig_hs_value_buf = hs_value; hs_value = on_disk_buf; upd_type = WT_UPDATE_STANDARD; break; } - WT_ERR(ret); hs_start_ts_tmp = WT_TS_NONE; /* * Make sure we use the temporary variants of these variables. We need to retain the @@ -1387,7 +1443,7 @@ __hs_delete_key_from_ts_int( hs_cursor->set_key(hs_cursor, btree_id, key, ts, 0); WT_ERR(__wt_buf_set(session, srch_key, hs_cursor->key.data, hs_cursor->key.size)); - WT_ERR_NOTFOUND_OK(hs_cursor->search_near(hs_cursor, &exact), true); + WT_ERR_NOTFOUND_OK(__wt_hs_cursor_search_near(session, hs_cursor, &exact), true); /* Empty history store is fine. */ if (ret == WT_NOTFOUND) goto done; @@ -1400,7 +1456,7 @@ __hs_delete_key_from_ts_int( * beginning. */ if (exact < 0) { - while ((ret = hs_cursor->next(hs_cursor)) == 0) { + while ((ret = __wt_hs_cursor_next(session, hs_cursor)) == 0) { WT_ERR(__wt_compare(session, NULL, &hs_cursor->key, srch_key, &cmp)); if (cmp >= 0) break; @@ -1497,7 +1553,7 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, * to keep doing "next" until we've got a key greater than the one we attempted to position * ourselves with. */ - for (; ret == 0; ret = hs_cursor->next(hs_cursor)) { + for (; ret == 0; ret = __wt_hs_cursor_next(session, hs_cursor)) { /* * Prior to getting here, we've done a "search near" on our key for the timestamp we're * inserting and then a "next". In the regular case, our cursor will be positioned on the @@ -1532,7 +1588,7 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, * 2 foo 3 2 ccc * 2 foo 3 3 ddd */ - for (; ret == 0; ret = hs_cursor->next(hs_cursor)) { + for (; ret == 0; ret = __wt_hs_cursor_next(session, hs_cursor)) { /* * Prior to getting here, we've done a "search near" on our key for the timestamp we're * inserting and then a "next". In the regular case, our cursor will be positioned on the @@ -1549,8 +1605,7 @@ __hs_fixup_out_of_order_from_pos(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, * If the stop time pair on the tombstone in the history store is already globally visible * we can skip it. */ - if (__wt_txn_visible_all( - session, hs_cbt->upd_value->tw.stop_txn, hs_cbt->upd_value->tw.durable_stop_ts)) { + if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) { WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone); continue; } @@ -1653,15 +1708,15 @@ __hs_delete_key_from_pos( upd = NULL; /* If there is nothing else in history store, we're done here. */ - for (; ret == 0; ret = hs_cursor->next(hs_cursor)) { - WT_RET(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter)); + for (; ret == 0; ret = __wt_hs_cursor_next(session, hs_cursor)) { + WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, &hs_key, &hs_start_ts, &hs_counter)); /* * If the btree id or key isn't ours, that means that we've hit the end of the key range and * that there is no more history store content for this key. */ if (hs_btree_id != btree_id) break; - WT_RET(__wt_compare(session, NULL, &hs_key, key, &cmp)); + WT_ERR(__wt_compare(session, NULL, &hs_key, key, &cmp)); if (cmp != 0) break; @@ -1669,8 +1724,7 @@ __hs_delete_key_from_pos( * If the stop time pair on the tombstone in the history store is already globally visible * we can skip it. */ - if (__wt_txn_visible_all( - session, hs_cbt->upd_value->tw.stop_txn, hs_cbt->upd_value->tw.durable_stop_ts)) { + if (__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) { WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone); continue; } @@ -1683,7 +1737,7 @@ __hs_delete_key_from_pos( * Append a globally visible tombstone to the update list. This will effectively make the * value invisible and the key itself will eventually get removed during reconciliation. */ - WT_RET(__wt_upd_alloc_tombstone(session, &upd, NULL)); + WT_ERR(__wt_upd_alloc_tombstone(session, &upd, NULL)); upd->txnid = WT_TXN_NONE; upd->start_ts = upd->durable_ts = WT_TS_NONE; WT_ERR(__wt_hs_modify(hs_cbt, upd)); @@ -1735,7 +1789,7 @@ __verify_history_store_id(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint32 * verify. When we return after moving to a new key the caller is responsible for keeping the * cursor there or deciding they're done. */ - for (; ret == 0; ret = hs_cursor->next(hs_cursor)) { + for (; ret == 0; ret = __wt_hs_cursor_next(session, hs_cursor)) { WT_ERR(hs_cursor->get_key(hs_cursor, &btree_id, &hs_key, &hs_start_ts, &hs_counter)); /* @@ -1807,9 +1861,9 @@ __wt_history_store_verify_one(WT_SESSION_IMPL *session) */ memset(&hs_key, 0, sizeof(hs_key)); cursor->set_key(cursor, btree_id, &hs_key, 0, 0); - ret = cursor->search_near(cursor, &exact); + ret = __wt_hs_cursor_search_near(session, cursor, &exact); if (ret == 0 && exact < 0) - ret = cursor->next(cursor); + ret = __wt_hs_cursor_next(session, cursor); /* If we positioned the cursor there is something to verify. */ if (ret == 0) { @@ -1852,8 +1906,7 @@ __wt_history_store_verify(WT_SESSION_IMPL *session) WT_ERR(__wt_scr_alloc(session, 0, &buf)); WT_ERR(__wt_hs_cursor(session, &session_flags, &is_owner)); cursor = session->hs_cursor; - ret = cursor->next(cursor); - WT_ERR_NOTFOUND_OK(ret, true); + WT_ERR_NOTFOUND_OK(__wt_hs_cursor_next(session, cursor), true); stop = ret == WT_NOTFOUND ? true : false; ret = 0; diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i index 8c5d63aa911..84b4f7d2aa0 100644 --- a/src/third_party/wiredtiger/src/include/btree.i +++ b/src/third_party/wiredtiger/src/include/btree.i @@ -1424,12 +1424,15 @@ __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page) mod->last_eviction_id != __wt_txn_oldest_id(session)) return (true); - if (mod->last_eviction_timestamp == WT_TS_NONE) - return (true); - - __wt_txn_pinned_timestamp(session, &pinned_ts); - if (pinned_ts > mod->last_eviction_timestamp) - return (true); + /* + * It is possible that we have not started using the timestamps just yet. So, check for the last + * time we evicted only if there is a timestamp set. + */ + if (mod->last_eviction_timestamp != WT_TS_NONE) { + __wt_txn_pinned_timestamp(session, &pinned_ts); + if (pinned_ts > mod->last_eviction_timestamp) + return (true); + } return (false); } diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h index bfb6647c88f..143bd3e9e45 100644 --- a/src/third_party/wiredtiger/src/include/extern.h +++ b/src/third_party/wiredtiger/src/include/extern.h @@ -757,11 +757,17 @@ extern int __wt_hs_cursor(WT_SESSION_IMPL *session, uint32_t *session_flags, boo WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_cursor_close(WT_SESSION_IMPL *session, uint32_t session_flags, bool is_owner) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_hs_cursor_next(WT_SESSION_IMPL *session, WT_CURSOR *cursor) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_cursor_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_cursor_position(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t timestamp, WT_ITEM *user_srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_hs_cursor_prev(WT_SESSION_IMPL *session, WT_CURSOR *cursor) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); +extern int __wt_hs_cursor_search_near(WT_SESSION_IMPL *session, WT_CURSOR *cursor, int *exactp) + WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_delete_key_from_ts(WT_SESSION_IMPL *session, uint32_t btree_id, const WT_ITEM *key, wt_timestamp_t ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result)); extern int __wt_hs_find_upd(WT_SESSION_IMPL *session, WT_ITEM *key, const char *value_format, diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i index 70eb6f13dd1..688875072cb 100644 --- a/src/third_party/wiredtiger/src/include/txn.i +++ b/src/third_party/wiredtiger/src/include/txn.i @@ -664,19 +664,16 @@ __txn_visible_id(WT_SESSION_IMPL *session, uint64_t id) if (id == WT_TXN_ABORTED) return (false); + /* Transactions see their own changes. */ + if (id == txn->id) + return (true); + /* Read-uncommitted transactions see all other changes. */ if (txn->isolation == WT_ISO_READ_UNCOMMITTED) return (true); - /* - * If we don't have a transactional snapshot, only make stable updates visible. - */ - if (!F_ISSET(txn, WT_TXN_HAS_SNAPSHOT)) - return (__txn_visible_all_id(session, id)); - - /* Transactions see their own changes. */ - if (id == txn->id) - return (true); + /* Otherwise, we should be called with a snapshot. */ + WT_ASSERT(session, F_ISSET(txn, WT_TXN_HAS_SNAPSHOT) || session->dhandle->checkpoint != NULL); /* * WT_ISO_SNAPSHOT, WT_ISO_READ_COMMITTED: the ID is visible if it is not the result of a diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c index 0bf79f2802a..59ea0e1b4fd 100644 --- a/src/third_party/wiredtiger/src/reconcile/rec_row.c +++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c @@ -860,12 +860,13 @@ __wt_rec_row_leaf( } /* - * If we're removing a key, also remove the history store contents associated with - * that key. Even if we fail reconciliation after this point, we're safe to do this. - * The history store content must be obsolete in order for us to consider removing - * the key. + * If we're removing a key due to a tombstone with a durable timestamp of "none", + * also remove the history store contents associated with that key. Even if we fail + * reconciliation after this point, we're safe to do this. The history store content + * must be obsolete in order for us to consider removing the key. */ - if (F_ISSET(S2C(session), WT_CONN_HS_OPEN) && !WT_IS_HS(btree)) { + if (tw.durable_stop_ts == WT_TS_NONE && F_ISSET(S2C(session), WT_CONN_HS_OPEN) && + !WT_IS_HS(btree)) { WT_ERR(__wt_row_leaf_key(session, page, rip, tmpkey, true)); /* Start from WT_TS_NONE to delete all the history store content of the key. */ WT_ERR(__wt_hs_delete_key_from_ts(session, btree->id, tmpkey, WT_TS_NONE)); diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c index ac0fc58fab1..ebef4510f89 100644 --- a/src/third_party/wiredtiger/src/txn/txn.c +++ b/src/third_party/wiredtiger/src/txn/txn.c @@ -654,7 +654,7 @@ __txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM * WT_ERR(__wt_scr_alloc(session, 0, &hs_key)); WT_ERR(__wt_scr_alloc(session, 0, &hs_value)); - for (; ret == 0; ret = hs_cursor->prev(hs_cursor)) { + for (; ret == 0; ret = __wt_hs_cursor_prev(session, hs_cursor)) { WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter)); /* Stop before crossing over to the next btree */ @@ -677,8 +677,7 @@ __txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM * * If the stop time pair on the tombstone in the history store is already globally visible * we can skip it. */ - if (!__wt_txn_visible_all( - session, hs_cbt->upd_value->tw.stop_txn, hs_cbt->upd_value->tw.durable_stop_ts)) + if (!__wt_txn_tw_stop_visible_all(session, &hs_cbt->upd_value->tw)) break; else WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone); @@ -687,6 +686,7 @@ __txn_append_hs_record(WT_SESSION_IMPL *session, WT_CURSOR *hs_cursor, WT_ITEM * /* We walked off the top of the history store. */ if (ret == WT_NOTFOUND) goto done; + WT_ERR(ret); /* * As part of the history store search, we never get an exact match based on our search criteria diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c index 852aaad1169..de53843e3c9 100644 --- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c +++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c @@ -212,7 +212,7 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW * the given timestamp, the key is removed from data store. */ ret = __wt_hs_cursor_position(session, hs_cursor, hs_btree_id, key, WT_TS_MAX, NULL); - for (; ret == 0; ret = hs_cursor->prev(hs_cursor)) { + for (; ret == 0; ret = __wt_hs_cursor_prev(session, hs_cursor)) { WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter)); /* Stop before crossing over to the next btree */ @@ -231,8 +231,7 @@ __rollback_row_ondisk_fixup_key(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW * If the stop time pair on the tombstone in the history store is already globally visible * we can skip it. */ - if (__wt_txn_visible_all( - session, cbt->upd_value->tw.stop_txn, cbt->upd_value->tw.durable_stop_ts)) { + if (__wt_txn_tw_stop_visible_all(session, &cbt->upd_value->tw)) { WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone_rts); continue; } @@ -1010,7 +1009,7 @@ __rollback_to_stable_btree_hs_truncate(WT_SESSION_IMPL *session, uint32_t btree_ /* Walk the history store for the given btree. */ hs_cursor->set_key(hs_cursor, btree_id, &key, WT_TS_NONE, 0); - ret = hs_cursor->search_near(hs_cursor, &exact); + ret = __wt_hs_cursor_search_near(session, hs_cursor, &exact); /* * The search should always end up pointing to the start of the required btree or end of the @@ -1018,9 +1017,9 @@ __rollback_to_stable_btree_hs_truncate(WT_SESSION_IMPL *session, uint32_t btree_ */ WT_ASSERT(session, (ret != 0 || exact != 0)); if (ret == 0 && exact < 0) - ret = hs_cursor->next(hs_cursor); + ret = __wt_hs_cursor_next(session, hs_cursor); - for (; ret == 0; ret = hs_cursor->next(hs_cursor)) { + for (; ret == 0; ret = __wt_hs_cursor_next(session, hs_cursor)) { WT_ERR(hs_cursor->get_key(hs_cursor, &hs_btree_id, hs_key, &hs_start_ts, &hs_counter)); /* Stop crossing into the next btree boundary. */ @@ -1031,8 +1030,7 @@ __rollback_to_stable_btree_hs_truncate(WT_SESSION_IMPL *session, uint32_t btree_ * If the stop time pair on the tombstone in the history store is already globally visible * we can skip it. */ - if (__wt_txn_visible_all( - session, cbt->upd_value->tw.stop_txn, cbt->upd_value->tw.durable_stop_ts)) { + if (__wt_txn_tw_stop_visible_all(session, &cbt->upd_value->tw)) { WT_STAT_CONN_INCR(session, cursor_prev_hs_tombstone_rts); continue; } diff --git a/src/third_party/wiredtiger/test/suite/test_bug019.py b/src/third_party/wiredtiger/test/suite/test_bug019.py index 0cfcc2b13cb..abf68827959 100644 --- a/src/third_party/wiredtiger/test/suite/test_bug019.py +++ b/src/third_party/wiredtiger/test/suite/test_bug019.py @@ -86,6 +86,7 @@ class test_bug019(wttest.WiredTigerTestCase): self.assertTrue(self.max_prealloc > start_prealloc) # Loop, making sure pre-allocation is working and the range is moving. + self.pr("Check pre-allocation range is moving") older = self.prepfiles() for i in range(1, 10): self.populate(self.entries) @@ -94,6 +95,12 @@ class test_bug019(wttest.WiredTigerTestCase): # Files can be returned in any order when reading a directory, older # pre-allocated files can persist longer than newer files when newer # files are returned first. Confirm files are being consumed. + if set(older) < set(newer): + self.pr("FAILURE on Iteration " + str(i)) + self.pr("FAILURE: Older") + self.pr(str(older)) + self.pr("FAILURE: Newer") + self.pr(str(newer)) self.assertFalse(set(older) < set(newer)) older = newer @@ -107,6 +114,9 @@ class test_bug019(wttest.WiredTigerTestCase): if new_prealloc < self.max_prealloc: break time.sleep(1.0) + if sleepcount >= max_wait_time: + self.pr("FAILURE: sleepcount " + str(sleepcount)) + self.pr("FAILURE: max_wait_time " + str(max_wait_time)) self.assertTrue(sleepcount < max_wait_time) if __name__ == '__main__': diff --git a/src/third_party/wiredtiger/test/suite/test_hs11.py b/src/third_party/wiredtiger/test/suite/test_hs11.py index eaa991557ee..a1b9d037cb6 100644 --- a/src/third_party/wiredtiger/test/suite/test_hs11.py +++ b/src/third_party/wiredtiger/test/suite/test_hs11.py @@ -28,6 +28,7 @@ import wiredtiger, wttest from wtscenario import make_scenarios +from wiredtiger import stat def timestamp_str(t): return '%x' % t @@ -35,13 +36,19 @@ def timestamp_str(t): # test_hs11.py # Ensure that updates without timestamps clear the history store records. class test_hs11(wttest.WiredTigerTestCase): - conn_config = 'cache_size=50MB' + conn_config = 'cache_size=50MB,statistics=(all)' session_config = 'isolation=snapshot' scenarios = make_scenarios([ ('deletion', dict(update_type='deletion')), ('update', dict(update_type='update')), ]) + def get_stat(self, stat): + stat_cursor = self.session.open_cursor('statistics:') + val = stat_cursor[stat][2] + stat_cursor.close() + return val + def test_non_ts_updates_clears_hs(self): uri = 'table:test_hs11' create_params = 'key_format=S,value_format=S' @@ -71,6 +78,9 @@ class test_hs11(wttest.WiredTigerTestCase): else: cursor[str(i)] = value2 + # Reconcile and remove the obsolete entries. + self.session.checkpoint() + # Now apply an update at timestamp 10. for i in range(1, 10000): self.session.begin_transaction() @@ -90,3 +100,58 @@ class test_hs11(wttest.WiredTigerTestCase): else: self.assertEqual(cursor[str(i)], value1) self.session.rollback_transaction() + + if self.update_type == 'deletion': + hs_truncate = self.get_stat(stat.conn.cache_hs_key_truncate_onpage_removal) + self.assertGreater(hs_truncate, 0) + + def test_ts_updates_donot_clears_hs(self): + uri = 'table:test_hs11' + create_params = 'key_format=S,value_format=S' + self.session.create(uri, create_params) + + value1 = 'a' * 500 + value2 = 'b' * 500 + + # Apply a series of updates from timestamps 1-4. + self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(1)) + cursor = self.session.open_cursor(uri) + for ts in range(1, 5): + for i in range(1, 10000): + self.session.begin_transaction() + cursor[str(i)] = value1 + self.session.commit_transaction('commit_timestamp=' + timestamp_str(ts)) + + # Reconcile and flush versions 1-3 to the history store. + self.session.checkpoint() + + # Remove the key with timestamp 10. + for i in range(1, 10000): + if i % 2 == 0: + self.session.begin_transaction() + cursor.set_key(str(i)) + cursor.remove() + self.session.commit_transaction('commit_timestamp=' + timestamp_str(10)) + + # Reconcile and remove the obsolete entries. + self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(10)) + self.session.checkpoint() + + # Now apply an update at timestamp 20. + for i in range(1, 10000): + self.session.begin_transaction() + cursor[str(i)] = value2 + self.session.commit_transaction('commit_timestamp=' + timestamp_str(20)) + + # Ensure that we didn't select old history store content even if it is not blew away. + self.session.begin_transaction('read_timestamp=' + timestamp_str(10)) + for i in range(1, 10000): + if i % 2 == 0: + cursor.set_key(str(i)) + self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND) + else: + self.assertEqual(cursor[str(i)], value1) + self.session.rollback_transaction() + + hs_truncate = self.get_stat(stat.conn.cache_hs_key_truncate_onpage_removal) + self.assertEqual(hs_truncate, 0) |