summaryrefslogtreecommitdiff
path: root/src/btree/bt_split.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/btree/bt_split.c')
-rw-r--r--src/btree/bt_split.c164
1 files changed, 71 insertions, 93 deletions
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index 8122d242666..fcb14be7c76 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -197,7 +197,7 @@ __split_safe_free(WT_SESSION_IMPL *session,
#ifdef HAVE_DIAGNOSTIC
/*
* __split_verify_intl_key_order --
- * Verify the key order on an internal page after a split, diagnostic only.
+ * Verify the key order on an internal page after a split.
*/
static void
__split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page)
@@ -249,6 +249,42 @@ __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page)
break;
}
}
+
+/*
+ * __split_verify_intl --
+ * Verify a set of internal pages involved in a split.
+ */
+static int
+__split_verify_intl(WT_SESSION_IMPL *session,
+ WT_PAGE *page1, WT_PAGE *page2, WT_PAGE *pindex_page, bool skip_first)
+{
+ WT_DECL_RET;
+ WT_REF *ref;
+
+ /* The split is complete and live, verify all of the pages involved. */
+ if (page1 != NULL)
+ __split_verify_intl_key_order(session, page1);
+ if (page2 != NULL)
+ __split_verify_intl_key_order(session, page2);
+
+ /* Skip the first slot on non-root internal pages, it's not set. */
+ WT_INTL_FOREACH_BEGIN(session, pindex_page, ref) {
+ if (skip_first) {
+ skip_first = false;
+ continue;
+ }
+ WT_ERR(__wt_page_in(session, ref, WT_READ_NO_EVICT));
+
+ __split_verify_intl_key_order(session, ref->page);
+
+ WT_ERR(__wt_page_release(session, ref, WT_READ_NO_EVICT));
+ } WT_INTL_FOREACH_END;
+
+ return (0);
+
+err: /* Something really bad just happened. */
+ WT_PANIC_RET(session, ret, "fatal error during page split");
+}
#endif
/*
@@ -400,11 +436,11 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
}
/*
- * __split_ref_step1 --
+ * __split_ref_prepare --
* Prepare a set of WT_REFs for a move.
*/
static void
-__split_ref_step1(WT_SESSION_IMPL *session,
+__split_ref_prepare(WT_SESSION_IMPL *session,
WT_PAGE_INDEX *pindex, uint64_t split_gen, bool skip_first)
{
WT_PAGE *child;
@@ -470,58 +506,6 @@ __split_ref_step1(WT_SESSION_IMPL *session,
}
/*
- * __split_ref_step2 --
- * Allow the newly created children to be evicted or split.
- */
-static int
-__split_ref_step2(
- WT_SESSION_IMPL *session, WT_PAGE_INDEX *pindex, bool skip_first)
-{
- WT_DECL_RET;
- WT_REF *ref;
- uint32_t i;
-
- /*
- * The split has gone live, enable eviction and splits on the newly
- * created internal pages.
- */
- WT_WRITE_BARRIER();
-
- for (i = skip_first ? 1 : 0; i < pindex->entries; ++i) {
- ref = pindex->index[i];
-
- /*
- * We don't hold hazard pointers on created pages, they cannot
- * be evicted because the page-modify transaction value set as
- * they were created prevents eviction. (See above, we reset
- * that value as part of fixing up the page.) But, an eviction
- * thread might be attempting to evict the page (the WT_REF may
- * be WT_REF_LOCKED), or it may be a disk based page (the WT_REF
- * may be WT_REF_READING), or it may be in some other state.
- * Acquire a hazard pointer for any in-memory pages so we know
- * the state of the page. Ignore pages not in-memory (deleted,
- * on-disk, being read), there's no in-memory structure to fix.
- */
- if ((ret = __wt_page_in(session,
- ref, WT_READ_CACHE | WT_READ_NO_EVICT)) == WT_NOTFOUND)
- continue;
- WT_ERR(ret);
-
-#ifdef HAVE_DIAGNOSTIC
- WT_WITH_PAGE_INDEX(session,
- __split_verify_intl_key_order(session, ref->page));
-#endif
-
- WT_ERR(__wt_hazard_clear(session, ref));
- }
-
- return (0);
-
-err: /* Something really bad just happened. */
- WT_PANIC_RET(session, ret, "fatal error resolving a split");
-}
-
-/*
* __split_root --
* Split the root page in-memory, deepening the tree.
*/
@@ -657,7 +641,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
root->pg_intl_split_gen = split_gen;
/* Prepare the WT_REFs for the move. */
- __split_ref_step1(session, alloc_index, split_gen, false);
+ __split_ref_prepare(session, alloc_index, split_gen, false);
/*
* Confirm the root page's index hasn't moved, then update it, which
@@ -665,19 +649,16 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
*/
WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(root) == pindex);
WT_INTL_INDEX_SET(root, alloc_index);
-
-#ifdef HAVE_DIAGNOSTIC
- WT_WITH_PAGE_INDEX(session,
- __split_verify_intl_key_order(session, root));
-#endif
- /* Finalize the WT_REFs we moved. */
- WT_ERR(__split_ref_step2(session, alloc_index, false));
+ alloc_index = NULL;
/* The split is complete and correct, ignore benign errors. */
complete = WT_ERR_IGNORE;
- /* We've installed the allocated page-index, ensure error handling. */
- alloc_index = NULL;
+#ifdef HAVE_DIAGNOSTIC
+ WT_WITH_PAGE_INDEX(session,
+ ret = __split_verify_intl(session, root, NULL, root, false));
+ WT_ERR(ret);
+#endif
/*
* We can't free the previous root's index, there may be threads using
@@ -852,11 +833,6 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
WT_INTL_INDEX_SET(parent, alloc_index);
alloc_index = NULL;
-#ifdef HAVE_DIAGNOSTIC
- WT_WITH_PAGE_INDEX(session,
- __split_verify_intl_key_order(session, parent));
-#endif
-
/*
* If discarding the page's original WT_REF field, reset it to split.
* Threads cursoring through the tree were blocked because that WT_REF
@@ -875,18 +851,27 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
__wt_free(session, ref->page_del);
}
+ /*
+ * Set the discarded WT_REF state to split, ensuring we don't
+ * race with any discard of the WT_REF deleted fields.
+ */
WT_PUBLISH(ref->state, WT_REF_SPLIT);
- }
- /*
- * Push out the changes: not required for correctness, but don't let
- * threads spin on incorrect page references longer than necessary.
- */
- WT_FULL_BARRIER();
+ /*
+ * Push out the change: not required for correctness, but stops
+ * threads spinning on incorrect page references.
+ */
+ WT_FULL_BARRIER();
+ }
/* The split is complete and correct, ignore benign errors. */
complete = WT_ERR_IGNORE;
+#ifdef HAVE_DIAGNOSTIC
+ WT_WITH_PAGE_INDEX(session,
+ __split_verify_intl_key_order(session, parent));
+#endif
+
/*
* !!!
* Swapping in the new page index released the page for eviction, we can
@@ -1170,34 +1155,27 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
page->pg_intl_split_gen = split_gen;
/* Prepare the WT_REFs for the move. */
- __split_ref_step1(session, alloc_index, split_gen, true);
+ __split_ref_prepare(session, alloc_index, split_gen, true);
/* Split into the parent. */
WT_ERR(__split_parent(session, page_ref, alloc_index->index,
alloc_index->entries, parent_incr, false, false));
- /* Confirm the page's index hasn't moved, then update it. */
+ /*
+ * Confirm the page's index hasn't moved, then update it, which makes
+ * the split visible to threads descending the tree.
+ */
WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(page) == pindex);
WT_INTL_INDEX_SET(page, replace_index);
-#ifdef HAVE_DIAGNOSTIC
- WT_WITH_PAGE_INDEX(session,
- __split_verify_intl_key_order(session, parent));
- WT_WITH_PAGE_INDEX(session,
- __split_verify_intl_key_order(session, page));
-#endif
-
- /* Finalize the WT_REFs we moved. */
- WT_ERR(__split_ref_step2(session, alloc_index, true));
-
/* The split is complete and correct, ignore benign errors. */
complete = WT_ERR_IGNORE;
- /*
- * Push out the changes: not required for correctness, but no reason
- * to wait.
- */
- WT_FULL_BARRIER();
+#ifdef HAVE_DIAGNOSTIC
+ WT_WITH_PAGE_INDEX(session,
+ ret = __split_verify_intl(session, parent, page, page, true));
+ WT_ERR(ret);
+#endif
/*
* We don't care about the page-index we allocated, all we needed was