From 1953776ada137f3deae50169bf889d2063b353d3 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Thu, 18 Dec 2014 16:45:06 +1100 Subject: Don't try to set and clear session->split_gen in WT_INTL_FOREACH_BEGIN: there are too many of those loops to ensure that none of them skip clearing it. Instead, make sure all calls are wrapped in WT_WITH_APAGE_INDEX. refs SERVER-16546 --- src/btree/bt_debug.c | 12 ++++++-- src/btree/bt_handle.c | 4 ++- src/btree/bt_split.c | 3 +- src/btree/bt_vrfy.c | 3 +- src/evict/evict_page.c | 72 +++++++++++++++++++++++++++++++---------------- src/include/btmem.h | 8 ++---- src/reconcile/rec_write.c | 6 ++-- 7 files changed, 70 insertions(+), 38 deletions(-) diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index ebbb335d3a8..05407537a53 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -453,7 +453,8 @@ __wt_debug_tree_shape( if (page == NULL) page = S2BT(session)->root.page; - __debug_tree_shape_worker(ds, page, 0); + WT_WITH_PAGE_INDEX(session, + __debug_tree_shape_worker(ds, page, 0)); __dmsg_wrapup(ds); return (0); @@ -535,6 +536,7 @@ __debug_tree( static int __debug_page(WT_DBG *ds, WT_PAGE *page, uint32_t flags) { + WT_DECL_RET; WT_SESSION_IMPL *session; session = ds->session; @@ -549,14 +551,18 @@ __debug_page(WT_DBG *ds, WT_PAGE *page, uint32_t flags) __debug_page_col_fix(ds, page); break; case WT_PAGE_COL_INT: - WT_RET(__debug_page_col_int(ds, page, flags)); + WT_WITH_PAGE_INDEX(session, + ret = __debug_page_col_int(ds, page, flags)); + WT_RET(ret); break; case WT_PAGE_COL_VAR: if (LF_ISSET(WT_DEBUG_TREE_LEAF)) WT_RET(__debug_page_col_var(ds, page)); break; case WT_PAGE_ROW_INT: - WT_RET(__debug_page_row_int(ds, page, flags)); + WT_WITH_PAGE_INDEX(session, + ret = __debug_page_row_int(ds, page, flags)); + WT_RET(ret); break; case WT_PAGE_ROW_LEAF: if (LF_ISSET(WT_DEBUG_TREE_LEAF)) diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 6d69bd8fc74..69c9ab27656 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -106,7 +106,9 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[]) session, root_addr, root_addr_size)); /* Warm the cache, if possible. */ - WT_ERR(__btree_preload(session)); + WT_WITH_PAGE_INDEX(session, + ret = __btree_preload(session)); + WT_ERR(ret); /* Get the last record number in a column-store file. */ if (btree->type != BTREE_ROW) diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index c6b97733b69..2af6c806be5 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -900,7 +900,8 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, alloc_index = NULL; #ifdef HAVE_DIAGNOSTIC - __split_verify_intl_key_order(session, parent); + WT_WITH_PAGE_INDEX(session, + __split_verify_intl_key_order(session, parent)); #endif /* diff --git a/src/btree/bt_vrfy.c b/src/btree/bt_vrfy.c index e7caf02fd2f..869fa2bb696 100644 --- a/src/btree/bt_vrfy.c +++ b/src/btree/bt_vrfy.c @@ -122,7 +122,8 @@ __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]) __wt_page_type_string( btree->root.page->type))); #endif - ret = __verify_tree(session, &btree->root, vs); + WT_WITH_PAGE_INDEX(session, + ret = __verify_tree(session, &btree->root, vs)); WT_TRET(__wt_cache_op(session, NULL, WT_SYNC_DISCARD)); } diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index bc791de6d0f..f7cbb55dbe4 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -75,7 +75,8 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive) /* Discard any subtree rooted in this page. */ if (istree) - __evict_discard_tree(session, ref, exclusive, 1); + WT_WITH_PAGE_INDEX(session, + __evict_discard_tree(session, ref, exclusive, 1)); /* Update the reference and discard the page. */ if (mod == NULL || !F_ISSET(mod, WT_PM_REC_MASK)) { @@ -252,6 +253,45 @@ __evict_discard_tree( } } +/* + * __evict_review_subtree -- + * Review a subtree for conditions that would block its eviction. + */ +static int +__evict_review_subtree(WT_SESSION_IMPL *session, + WT_REF *ref, int exclusive, int *inmem_splitp, int *istreep) +{ + WT_PAGE *page; + WT_REF *child; + + page = ref->page; + + WT_INTL_FOREACH_BEGIN(session, page, child) { + switch (child->state) { + case WT_REF_DISK: /* On-disk */ + case WT_REF_DELETED: /* On-disk, deleted */ + break; + case WT_REF_MEM: /* In-memory */ + /* + * Tell our caller if there's a subtree so we + * know to do a full walk when discarding the + * page. + */ + *istreep = 1; + WT_RET(__evict_review(session, child, exclusive, + 0, inmem_splitp, istreep)); + break; + case WT_REF_LOCKED: /* Being evicted */ + case WT_REF_READING: /* Being read */ + case WT_REF_SPLIT: /* Being split */ + return (EBUSY); + WT_ILLEGAL_VALUE(session); + } + } WT_INTL_FOREACH_END; + + return (0); +} + /* * __evict_review -- * Get exclusive access to the page and review the page and its subtree @@ -262,9 +302,9 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive, int top, int *inmem_splitp, int *istreep) { WT_BTREE *btree; + WT_DECL_RET; WT_PAGE *page; WT_PAGE_MODIFY *mod; - WT_REF *child; uint32_t flags; btree = S2BT(session); @@ -292,29 +332,11 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref, * have to write pages in depth-first order, otherwise we'll dirty * pages after we've written them. */ - if (WT_PAGE_IS_INTERNAL(page)) - WT_INTL_FOREACH_BEGIN(session, page, child) { - switch (child->state) { - case WT_REF_DISK: /* On-disk */ - case WT_REF_DELETED: /* On-disk, deleted */ - break; - case WT_REF_MEM: /* In-memory */ - /* - * Tell our caller if there's a subtree so we - * know to do a full walk when discarding the - * page. - */ - *istreep = 1; - WT_RET(__evict_review(session, child, exclusive, - 0, inmem_splitp, istreep)); - break; - case WT_REF_LOCKED: /* Being evicted */ - case WT_REF_READING: /* Being read */ - case WT_REF_SPLIT: /* Being split */ - return (EBUSY); - WT_ILLEGAL_VALUE(session); - } - } WT_INTL_FOREACH_END; + if (WT_PAGE_IS_INTERNAL(page)) { + WT_WITH_PAGE_INDEX(session, ret = __evict_review_subtree( + session, ref, exclusive, inmem_splitp, istreep)); + WT_RET(ret); + } mod = page->modify; diff --git a/src/include/btmem.h b/src/include/btmem.h index 8a3ec9ade89..72feb3dce8f 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -424,17 +424,15 @@ struct __wt_page { #define WT_INTL_FOREACH_BEGIN(session, page, ref) do { \ WT_PAGE_INDEX *__pindex; \ WT_REF **__refp; \ - WT_SESSION_IMPL *__session = (session); \ uint32_t __entries; \ - WT_ENTER_PAGE_INDEX(session); \ + WT_ASSERT(session, session->split_gen != 0); \ for (__pindex = WT_INTL_INDEX_COPY(page), \ __refp = __pindex->index, \ __entries = __pindex->entries; __entries > 0; --__entries) {\ (ref) = *__refp++; #define WT_INTL_FOREACH_END \ - } \ - WT_LEAVE_PAGE_INDEX(__session); \ - } while (0) + } \ +} while (0) /* Row-store leaf page. */ struct { diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 839ab028afd..6ce8247c2d4 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -397,13 +397,15 @@ __wt_reconcile(WT_SESSION_IMPL *session, ret = __rec_col_fix(session, r, page); break; case WT_PAGE_COL_INT: - ret = __rec_col_int(session, r, page); + WT_WITH_PAGE_INDEX(session, + ret = __rec_col_int(session, r, page)); break; case WT_PAGE_COL_VAR: ret = __rec_col_var(session, r, page, salvage); break; case WT_PAGE_ROW_INT: - ret = __rec_row_int(session, r, page); + WT_WITH_PAGE_INDEX(session, + ret = __rec_row_int(session, r, page)); break; case WT_PAGE_ROW_LEAF: ret = __rec_row_leaf(session, r, page, salvage); -- cgit v1.2.1 From 541714072c1e15bd94f30d18e30e4678f7a43170 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Thu, 18 Dec 2014 17:33:06 +1100 Subject: Set split-gen when deepening the tree, add paranoia that the eviction server doesn't keep split-gen pinned. --- src/btree/bt_split.c | 3 ++- src/evict/evict_lru.c | 5 ++++- src/txn/txn.c | 1 + 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 2af6c806be5..f1b96ad37f5 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -978,7 +978,8 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, * are holding it locked. */ if (ret == 0 && !exclusive && __split_should_deepen(session, parent)) - ret = __split_deepen(session, parent); + WT_WITH_PAGE_INDEX(session, + ret = __split_deepen(session, parent)); err: if (locked) F_CLR_ATOMIC(parent, WT_PAGE_SPLITTING); diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index 9037a4a1fcc..dbf3a71f222 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -941,9 +941,11 @@ retry: while (slot < max_entries && ret == 0) { * Re-check the "no eviction" flag -- it is used to enforce * exclusive access when a handle is being closed. */ - if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) + if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) { WT_WITH_BTREE(session, btree, ret = __evict_walk_file(session, &slot, flags)); + WT_ASSERT(session, session->split_gen == 0); + } __wt_spin_unlock(session, &cache->evict_walk_lock); @@ -1281,6 +1283,7 @@ __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_app) page->read_gen = __wt_cache_read_gen_set(session); WT_WITH_BTREE(session, btree, ret = __wt_evict_page(session, ref)); + WT_ASSERT(session, is_app || session->split_gen == 0); (void)WT_ATOMIC_SUB4(btree->evict_busy, 1); diff --git a/src/txn/txn.c b/src/txn/txn.c index f958361a905..d6aa483c18a 100644 --- a/src/txn/txn.c +++ b/src/txn/txn.c @@ -322,6 +322,7 @@ __wt_txn_release(WT_SESSION_IMPL *session) __wt_logrec_free(session, &txn->logrec); /* Discard any memory from the session's split stash that we can. */ + WT_ASSERT(session, session->split_gen == 0); if (session->split_stash_cnt > 0) __wt_split_stash_discard(session); -- cgit v1.2.1