diff options
Diffstat (limited to 'src/btree/bt_split.c')
-rw-r--r-- | src/btree/bt_split.c | 164 |
1 files changed, 71 insertions, 93 deletions
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 8122d242666..fcb14be7c76 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -197,7 +197,7 @@ __split_safe_free(WT_SESSION_IMPL *session, #ifdef HAVE_DIAGNOSTIC /* * __split_verify_intl_key_order -- - * Verify the key order on an internal page after a split, diagnostic only. + * Verify the key order on an internal page after a split. */ static void __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page) @@ -249,6 +249,42 @@ __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page) break; } } + +/* + * __split_verify_intl -- + * Verify a set of internal pages involved in a split. + */ +static int +__split_verify_intl(WT_SESSION_IMPL *session, + WT_PAGE *page1, WT_PAGE *page2, WT_PAGE *pindex_page, bool skip_first) +{ + WT_DECL_RET; + WT_REF *ref; + + /* The split is complete and live, verify all of the pages involved. */ + if (page1 != NULL) + __split_verify_intl_key_order(session, page1); + if (page2 != NULL) + __split_verify_intl_key_order(session, page2); + + /* Skip the first slot on non-root internal pages, it's not set. */ + WT_INTL_FOREACH_BEGIN(session, pindex_page, ref) { + if (skip_first) { + skip_first = false; + continue; + } + WT_ERR(__wt_page_in(session, ref, WT_READ_NO_EVICT)); + + __split_verify_intl_key_order(session, ref->page); + + WT_ERR(__wt_page_release(session, ref, WT_READ_NO_EVICT)); + } WT_INTL_FOREACH_END; + + return (0); + +err: /* Something really bad just happened. */ + WT_PANIC_RET(session, ret, "fatal error during page split"); +} #endif /* @@ -400,11 +436,11 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home, } /* - * __split_ref_step1 -- + * __split_ref_prepare -- * Prepare a set of WT_REFs for a move. */ static void -__split_ref_step1(WT_SESSION_IMPL *session, +__split_ref_prepare(WT_SESSION_IMPL *session, WT_PAGE_INDEX *pindex, uint64_t split_gen, bool skip_first) { WT_PAGE *child; @@ -470,58 +506,6 @@ __split_ref_step1(WT_SESSION_IMPL *session, } /* - * __split_ref_step2 -- - * Allow the newly created children to be evicted or split. - */ -static int -__split_ref_step2( - WT_SESSION_IMPL *session, WT_PAGE_INDEX *pindex, bool skip_first) -{ - WT_DECL_RET; - WT_REF *ref; - uint32_t i; - - /* - * The split has gone live, enable eviction and splits on the newly - * created internal pages. - */ - WT_WRITE_BARRIER(); - - for (i = skip_first ? 1 : 0; i < pindex->entries; ++i) { - ref = pindex->index[i]; - - /* - * We don't hold hazard pointers on created pages, they cannot - * be evicted because the page-modify transaction value set as - * they were created prevents eviction. (See above, we reset - * that value as part of fixing up the page.) But, an eviction - * thread might be attempting to evict the page (the WT_REF may - * be WT_REF_LOCKED), or it may be a disk based page (the WT_REF - * may be WT_REF_READING), or it may be in some other state. - * Acquire a hazard pointer for any in-memory pages so we know - * the state of the page. Ignore pages not in-memory (deleted, - * on-disk, being read), there's no in-memory structure to fix. - */ - if ((ret = __wt_page_in(session, - ref, WT_READ_CACHE | WT_READ_NO_EVICT)) == WT_NOTFOUND) - continue; - WT_ERR(ret); - -#ifdef HAVE_DIAGNOSTIC - WT_WITH_PAGE_INDEX(session, - __split_verify_intl_key_order(session, ref->page)); -#endif - - WT_ERR(__wt_hazard_clear(session, ref)); - } - - return (0); - -err: /* Something really bad just happened. */ - WT_PANIC_RET(session, ret, "fatal error resolving a split"); -} - -/* * __split_root -- * Split the root page in-memory, deepening the tree. */ @@ -657,7 +641,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) root->pg_intl_split_gen = split_gen; /* Prepare the WT_REFs for the move. */ - __split_ref_step1(session, alloc_index, split_gen, false); + __split_ref_prepare(session, alloc_index, split_gen, false); /* * Confirm the root page's index hasn't moved, then update it, which @@ -665,19 +649,16 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root) */ WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(root) == pindex); WT_INTL_INDEX_SET(root, alloc_index); - -#ifdef HAVE_DIAGNOSTIC - WT_WITH_PAGE_INDEX(session, - __split_verify_intl_key_order(session, root)); -#endif - /* Finalize the WT_REFs we moved. */ - WT_ERR(__split_ref_step2(session, alloc_index, false)); + alloc_index = NULL; /* The split is complete and correct, ignore benign errors. */ complete = WT_ERR_IGNORE; - /* We've installed the allocated page-index, ensure error handling. */ - alloc_index = NULL; +#ifdef HAVE_DIAGNOSTIC + WT_WITH_PAGE_INDEX(session, + ret = __split_verify_intl(session, root, NULL, root, false)); + WT_ERR(ret); +#endif /* * We can't free the previous root's index, there may be threads using @@ -852,11 +833,6 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, WT_INTL_INDEX_SET(parent, alloc_index); alloc_index = NULL; -#ifdef HAVE_DIAGNOSTIC - WT_WITH_PAGE_INDEX(session, - __split_verify_intl_key_order(session, parent)); -#endif - /* * If discarding the page's original WT_REF field, reset it to split. * Threads cursoring through the tree were blocked because that WT_REF @@ -875,18 +851,27 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, __wt_free(session, ref->page_del); } + /* + * Set the discarded WT_REF state to split, ensuring we don't + * race with any discard of the WT_REF deleted fields. + */ WT_PUBLISH(ref->state, WT_REF_SPLIT); - } - /* - * Push out the changes: not required for correctness, but don't let - * threads spin on incorrect page references longer than necessary. - */ - WT_FULL_BARRIER(); + /* + * Push out the change: not required for correctness, but stops + * threads spinning on incorrect page references. + */ + WT_FULL_BARRIER(); + } /* The split is complete and correct, ignore benign errors. */ complete = WT_ERR_IGNORE; +#ifdef HAVE_DIAGNOSTIC + WT_WITH_PAGE_INDEX(session, + __split_verify_intl_key_order(session, parent)); +#endif + /* * !!! * Swapping in the new page index released the page for eviction, we can @@ -1170,34 +1155,27 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page) page->pg_intl_split_gen = split_gen; /* Prepare the WT_REFs for the move. */ - __split_ref_step1(session, alloc_index, split_gen, true); + __split_ref_prepare(session, alloc_index, split_gen, true); /* Split into the parent. */ WT_ERR(__split_parent(session, page_ref, alloc_index->index, alloc_index->entries, parent_incr, false, false)); - /* Confirm the page's index hasn't moved, then update it. */ + /* + * Confirm the page's index hasn't moved, then update it, which makes + * the split visible to threads descending the tree. + */ WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(page) == pindex); WT_INTL_INDEX_SET(page, replace_index); -#ifdef HAVE_DIAGNOSTIC - WT_WITH_PAGE_INDEX(session, - __split_verify_intl_key_order(session, parent)); - WT_WITH_PAGE_INDEX(session, - __split_verify_intl_key_order(session, page)); -#endif - - /* Finalize the WT_REFs we moved. */ - WT_ERR(__split_ref_step2(session, alloc_index, true)); - /* The split is complete and correct, ignore benign errors. */ complete = WT_ERR_IGNORE; - /* - * Push out the changes: not required for correctness, but no reason - * to wait. - */ - WT_FULL_BARRIER(); +#ifdef HAVE_DIAGNOSTIC + WT_WITH_PAGE_INDEX(session, + ret = __split_verify_intl(session, parent, page, page, true)); + WT_ERR(ret); +#endif /* * We don't care about the page-index we allocated, all we needed was |