summaryrefslogtreecommitdiff
path: root/src/third_party/wiredtiger/src/include/btree.i
diff options
context:
space:
mode:
Diffstat (limited to 'src/third_party/wiredtiger/src/include/btree.i')
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i118
1 files changed, 24 insertions, 94 deletions
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 81c166eb0e4..7813f1299fd 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -1158,8 +1158,7 @@ __wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref)
* Return if a truncate operation is active.
*/
static inline bool
-__wt_page_del_active(
- WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
+__wt_page_del_active(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
{
WT_PAGE_DELETED *page_del;
uint8_t prepare_state;
@@ -1190,10 +1189,10 @@ __wt_page_las_active(WT_SESSION_IMPL *session, WT_REF *ref)
if ((page_las = ref->page_las) == NULL)
return (false);
- if (page_las->invalid || !ref->page_las->las_skew_newest)
+ if (page_las->invalid || !ref->page_las->skew_newest)
return (true);
- if (__wt_txn_visible_all(session, page_las->las_max_txn,
- WT_TIMESTAMP_NULL(&page_las->onpage_timestamp)))
+ if (__wt_txn_visible_all(session, page_las->max_txn,
+ WT_TIMESTAMP_NULL(&page_las->max_timestamp)))
return (false);
return (true);
@@ -1329,6 +1328,7 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
static inline bool
__wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
{
+ WT_DECL_TIMESTAMP(pinned_ts)
WT_PAGE_MODIFY *mod;
WT_TXN_GLOBAL *txn_global;
@@ -1338,7 +1338,8 @@ __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
* If the page hasn't been through one round of update/restore, give it
* a try.
*/
- if ((mod = page->modify) == NULL || !mod->update_restored)
+ if ((mod = page->modify) == NULL ||
+ !FLD_ISSET(mod->restore_state, WT_PAGE_RS_RESTORED))
return (true);
/*
@@ -1356,17 +1357,12 @@ __wt_page_evict_retry(WT_SESSION_IMPL *session, WT_PAGE *page)
return (true);
#ifdef HAVE_TIMESTAMPS
- {
- bool same_timestamp;
-
- same_timestamp = false;
- if (!__wt_timestamp_iszero(&mod->last_eviction_timestamp))
- WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
- same_timestamp = __wt_timestamp_cmp(
+ if (!__wt_timestamp_iszero(&mod->last_eviction_timestamp)) {
+ __wt_txn_pinned_timestamp(session, &pinned_ts);
+ if (__wt_timestamp_cmp(
&mod->last_eviction_timestamp,
- &txn_global->pinned_timestamp) == 0);
- if (!same_timestamp)
- return (true);
+ &txn_global->pinned_timestamp) != 0)
+ return (true);
}
#endif
@@ -1605,6 +1601,8 @@ __wt_split_descent_race(
* update. A thread can read the parent page's original page index and
* then read the split page's replacement index.
*
+ * For example, imagine a search descending the tree.
+ *
* Because internal page splits work by truncating the original page to
* the initial part of the original page, the result of this race is we
* will have a search key that points past the end of the current page.
@@ -1649,73 +1647,17 @@ __wt_split_descent_race(
* work by truncating the split page, so the split page search is for
* content the split page retains after the split, and we ignore this
* race.
- */
- WT_INTL_INDEX_GET(session, ref->home, pindex);
- return (pindex != saved_pindex);
-}
-
-/*
- * __wt_split_prev_race --
- * Return if we raced with an internal page split when moving backwards
- * through the tree.
- */
-static inline bool
-__wt_split_prev_race(WT_SESSION_IMPL *session, WT_REF *ref)
-{
- WT_PAGE_INDEX *pindex;
-
- /*
- * There's a split race when a cursor moving backwards through the tree
- * descends the tree. If we're splitting an internal page into its
- * parent, we move the WT_REF structures and update the parent's page
- * index before updating the split page's page index, and it's not an
- * atomic update. A thread can read the parent and split page's original
- * indexes during a split, or read the parent page's replacement page
- * index and then read the split page's original index, either of which
- * can lead to skipping pages.
*
- * For example, imagine an internal page with 3 child pages, with the
- * namespaces a-f, g-h and i-j; the first child page splits. The parent
- * starts out with the following page-index:
+ * This code is a general purpose check for a descent race and we call
+ * it in other cases, for example, a cursor traversing backwards through
+ * the tree.
*
- * | ... | a | g | i | ... |
- *
- * The split page starts out with the following page-index:
- *
- * | a | b | c | d | e | f |
- *
- * The first step is to move the c-f ranges into a new subtree, so, for
- * example we might have two new internal pages 'c' and 'e', where the
- * new 'c' page references the c-d namespace and the new 'e' page
- * references the e-f namespace. The top of the subtree references the
- * parent page, but until the parent's page index is updated, threads in
- * the subtree won't be able to ascend out of the subtree. However, once
- * the parent page's page index is updated to this:
- *
- * | ... | a | c | e | g | i | ... |
- *
- * threads in the subtree can ascend into the parent. Imagine a cursor
- * in the c-d part of the namespace that ascends to the parent's 'c'
- * slot. It would then decrement to the slot before the 'c' slot, the
- * 'a' slot.
- *
- * The previous-cursor movement selects the last slot in the 'a' page;
- * if the split page's page-index hasn't been updated yet, it selects
- * the 'f' slot, which is incorrect. Once the split page's page index is
- * updated to this:
- *
- * | a | b |
- *
- * the previous-cursor movement will select the 'b' slot, which is
- * correct.
- *
- * This function takes an argument which is the internal page into which
- * we're coupling. If the last slot on the page no longer points to
- * the current page as its "home", the page is being split and part of
- * its namespace moved, we have to restart.
+ * Presumably we acquired a page index on the child page before calling
+ * this code, don't re-order that acquisition with this check.
*/
- WT_INTL_INDEX_GET(session, ref->page, pindex);
- return (pindex->index[pindex->entries - 1]->home != ref->page);
+ WT_BARRIER();
+ WT_INTL_INDEX_GET(session, ref->home, pindex);
+ return (pindex != saved_pindex);
}
/*
@@ -1724,8 +1666,8 @@ __wt_split_prev_race(WT_SESSION_IMPL *session, WT_REF *ref)
* coupling up/down the tree.
*/
static inline int
-__wt_page_swap_func(WT_SESSION_IMPL *session,
- WT_REF *held, WT_REF *want, bool prev_race, uint32_t flags
+__wt_page_swap_func(
+ WT_SESSION_IMPL *session, WT_REF *held, WT_REF *want, uint32_t flags
#ifdef HAVE_DIAGNOSTIC
, const char *file, int line
#endif
@@ -1755,18 +1697,6 @@ __wt_page_swap_func(WT_SESSION_IMPL *session,
);
/*
- * We can race when descending into an internal page as part of moving
- * backwards through the tree, and we have to detect that race before
- * releasing the page from which we are coupling, else we can't restart
- * the movement.
- */
- if (ret == 0 && prev_race && WT_PAGE_IS_INTERNAL(want->page) &&
- __wt_split_prev_race(session, want)) {
- ret = WT_RESTART;
- WT_TRET(__wt_page_release(session, want, flags));
- }
-
- /*
* Expected failures: page not found or restart. Our callers list the
* errors they're expecting to handle.
*/