diff options
-rw-r--r-- | src/btree/bt_split.c | 48 | ||||
-rw-r--r-- | src/btree/col_srch.c | 1 | ||||
-rw-r--r-- | src/btree/row_srch.c | 3 | ||||
-rw-r--r-- | src/include/btmem.h | 5 | ||||
-rw-r--r-- | src/include/btree.i | 3 | ||||
-rw-r--r-- | src/include/connection.h | 2 | ||||
-rw-r--r-- | src/reconcile/rec_write.c | 8 |
7 files changed, 44 insertions, 26 deletions
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index e4fe51ea28f..a11c75a00ba 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -49,7 +49,8 @@ __split_oldest_gen(WT_SESSION_IMPL *session) * Add a new entry into the session's split stash list. */ static int -__split_stash_add(WT_SESSION_IMPL *session, void *p, size_t len) +__split_stash_add( + WT_SESSION_IMPL *session, uint64_t split_gen, void *p, size_t len) { WT_SPLIT_STASH *stash; @@ -60,7 +61,7 @@ __split_stash_add(WT_SESSION_IMPL *session, void *p, size_t len) session->split_stash_cnt + 1, &session->split_stash)); stash = session->split_stash + session->split_stash_cnt++; - stash->split_gen = WT_ATOMIC_ADD8(S2C(session)->split_gen, 1); + stash->split_gen = split_gen; stash->p = p; stash->len = len; @@ -150,14 +151,14 @@ __wt_split_stash_discard_all( * it to be freed otherwise. */ static int -__split_safe_free(WT_SESSION_IMPL *session, int exclusive, void *p, size_t s) +__split_safe_free(WT_SESSION_IMPL *session, + uint64_t split_gen, int exclusive, void *p, size_t s) { /* * We have swapped something in a page: if we don't have exclusive * access, check whether there are other threads in the same tree. */ - if (!exclusive && - __split_oldest_gen(session) == S2C(session)->split_gen + 1) + if (!exclusive && __split_oldest_gen(session) > split_gen) exclusive = 1; if (exclusive) { @@ -165,7 +166,7 @@ __split_safe_free(WT_SESSION_IMPL *session, int exclusive, void *p, size_t s) return (0); } - return (__split_stash_add(session, p, s)); + return (__split_stash_add(session, split_gen, p, s)); } /* @@ -393,6 +394,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children) WT_REF **alloc_refp; WT_REF *child_ref, **child_refp, *parent_ref, **parent_refp, *ref; size_t child_incr, parent_decr, parent_incr, size; + uint64_t split_gen; uint32_t chunk, i, j, remain, slots; int panic; void *p; @@ -527,6 +529,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children) * needs to be paid. */ WT_INTL_INDEX_SET(parent, alloc_index); + split_gen = WT_ATOMIC_ADD8(S2C(session)->split_gen, 1); panic = 1; #ifdef HAVE_DIAGNOSTIC @@ -596,7 +599,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children) * be using the new index. */ size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *); - WT_ERR(__split_safe_free(session, 0, pindex, size)); + WT_ERR(__split_safe_free(session, split_gen, 0, pindex, size)); WT_MEMSIZE_ADD(parent_decr, size); #if 0 @@ -815,7 +818,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session, static int __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, uint32_t new_entries, size_t parent_decr, size_t parent_incr, - int exclusive, int ref_discard) + int exclusive, int ref_discard, uint64_t *split_genp) { WT_DECL_RET; WT_IKEY *ikey; @@ -823,6 +826,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, WT_PAGE_INDEX *alloc_index, *pindex; WT_REF **alloc_refp, *next_ref, *parent_ref; size_t size; + uint64_t split_gen; uint32_t children, i, j; uint32_t deleted_entries, parent_entries, result_entries; int complete, hazard, locked; @@ -929,6 +933,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, * to threads descending the tree. */ WT_INTL_INDEX_SET(parent, alloc_index); + split_gen = *split_genp = WT_ATOMIC_ADD8(S2C(session)->split_gen, 1); alloc_index = NULL; #ifdef HAVE_DIAGNOSTIC @@ -975,7 +980,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, if (ikey != NULL) { size = sizeof(WT_IKEY) + ikey->size; WT_TRET(__split_safe_free( - session, 0, ikey, size)); + session, split_gen, 0, ikey, size)); WT_MEMSIZE_ADD(parent_decr, size); } /* @@ -993,7 +998,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, } WT_TRET(__split_safe_free( - session, 0, next_ref, sizeof(WT_REF))); + session, split_gen, 0, next_ref, sizeof(WT_REF))); WT_MEMSIZE_ADD(parent_decr, sizeof(WT_REF)); } } @@ -1003,7 +1008,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, * Add it to the session discard list, to be freed when it's safe. */ size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *); - WT_TRET(__split_safe_free(session, exclusive, pindex, size)); + WT_TRET(__split_safe_free(session, split_gen, exclusive, pindex, size)); WT_MEMSIZE_ADD(parent_decr, size); /* @@ -1110,6 +1115,7 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp) WT_PAGE *page, *right; WT_REF *child, *split_ref[2] = { NULL, NULL }; size_t page_decr, parent_decr, parent_incr, right_incr; + uint64_t split_gen; int i; *splitp = 0; @@ -1358,8 +1364,8 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp) * longer locked, so we cannot safely look at it. */ page = NULL; - if ((ret = __split_parent( - session, ref, split_ref, 2, parent_decr, parent_incr, 0, 0)) != 0) { + if ((ret = __split_parent(session, ref, split_ref, 2, + parent_decr, parent_incr, 0, 0, &split_gen)) != 0) { /* * Move the insert list element back to the original page list. * For simplicity, the previous skip list pointers originally @@ -1396,8 +1402,8 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp) */ if (ikey != NULL) WT_TRET(__split_safe_free( - session, 0, ikey, sizeof(WT_IKEY) + ikey->size)); - WT_TRET(__split_safe_free(session, 0, ref, sizeof(WT_REF))); + session, split_gen, 0, ikey, sizeof(WT_IKEY) + ikey->size)); + WT_TRET(__split_safe_free(session, split_gen, 0, ref, sizeof(WT_REF))); /* * A note on error handling: if we completed the split, return success, @@ -1480,6 +1486,7 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive) WT_PAGE_MODIFY *mod; WT_REF **ref_new; size_t parent_decr, parent_incr; + uint64_t split_gen; uint32_t i, new_entries; page = ref->page; @@ -1510,8 +1517,8 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive) parent_decr, sizeof(WT_IKEY) + ikey->size); /* Split into the parent. */ - WT_ERR(__split_parent(session, - ref, ref_new, new_entries, parent_decr, parent_incr, exclusive, 1)); + WT_ERR(__split_parent(session, ref, ref_new, new_entries, + parent_decr, parent_incr, exclusive, 1, &split_gen)); __wt_free(session, ref_new); @@ -1534,9 +1541,10 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive) * safe. */ if (ikey != NULL) - WT_TRET(__split_safe_free( - session, exclusive, ikey, sizeof(WT_IKEY) + ikey->size)); - WT_TRET(__split_safe_free(session, exclusive, ref, sizeof(WT_REF))); + WT_TRET(__split_safe_free(session, split_gen, exclusive, + ikey, sizeof(WT_IKEY) + ikey->size)); + WT_TRET(__split_safe_free(session, split_gen, exclusive, + ref, sizeof(WT_REF))); /* * A note on error handling: if we completed the split, return success, diff --git a/src/btree/col_srch.c b/src/btree/col_srch.c index 4c418f91de0..db1b565b439 100644 --- a/src/btree/col_srch.c +++ b/src/btree/col_srch.c @@ -49,6 +49,7 @@ restart: page = current->page; WT_ASSERT(session, current->key.recno == page->pg_intl_recno); + WT_ASSERT(session, session->split_gen != 0); pindex = WT_INTL_INDEX_COPY(page); base = pindex->entries; descent = pindex->index[base - 1]; diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c index 036e11bec6d..9967c5ecb0c 100644 --- a/src/btree/row_srch.c +++ b/src/btree/row_srch.c @@ -195,6 +195,7 @@ restart: page = current->page; if (page->type != WT_PAGE_ROW_INT) break; + WT_ASSERT(session, session->split_gen != 0); pindex = WT_INTL_INDEX_COPY(page); /* @@ -487,6 +488,7 @@ restart: if (page->type != WT_PAGE_ROW_INT) break; + WT_ASSERT(session, session->split_gen != 0); pindex = WT_INTL_INDEX_COPY(page); descent = pindex->index[ __wt_random(session->rnd) % pindex->entries]; @@ -521,6 +523,7 @@ restart: */ cbt->ref = current; cbt->compare = 0; + WT_ASSERT(session, session->split_gen != 0); pindex = WT_INTL_INDEX_COPY(btree->root.page); cbt->slot = pindex->entries < 2 ? __wt_random(session->rnd) % page->pg_row_entries : 0; diff --git a/src/include/btmem.h b/src/include/btmem.h index aea9ee98742..d020d29351c 100644 --- a/src/include/btmem.h +++ b/src/include/btmem.h @@ -1008,7 +1008,10 @@ struct __wt_insert_head { #define WT_ENTER_PAGE_INDEX(session) do { \ uint64_t __prev_split_gen = (session)->split_gen; \ if (__prev_split_gen == 0) \ - WT_PUBLISH((session)->split_gen, S2C(session)->split_gen) + do { \ + WT_PUBLISH((session)->split_gen, \ + S2C(session)->split_gen); \ + } while ((session)->split_gen != S2C(session)->split_gen) #define WT_LEAVE_PAGE_INDEX(session) \ if (__prev_split_gen == 0) \ diff --git a/src/include/btree.i b/src/include/btree.i index 059b7257d02..0af7f57025a 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -221,8 +221,7 @@ __wt_page_refp(WT_SESSION_IMPL *session, WT_PAGE_INDEX *pindex; uint32_t i; - WT_ASSERT(session, - WT_SESSION_TXN_STATE(session)->snap_min != WT_TXN_NONE); + WT_ASSERT(session, session->split_gen != 0); /* * Copy the parent page's index value: the page can split at any time, diff --git a/src/include/connection.h b/src/include/connection.h index ff34b014ecf..7b94a7ea94b 100644 --- a/src/include/connection.h +++ b/src/include/connection.h @@ -146,7 +146,7 @@ struct __wt_connection_impl { WT_FH *lock_fh; /* Lock file handle */ - uint64_t split_gen; /* Generation number for splits */ + volatile uint64_t split_gen; /* Generation number for splits */ /* * The connection keeps a cache of data handles. The set of handles diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 0300596f90b..4b60fe6a6cd 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -440,8 +440,11 @@ __wt_reconcile(WT_SESSION_IMPL *session, * Root pages are special, splits have to be done, we can't put it off * as the parent's problem any more. */ - if (__wt_ref_is_root(ref)) - return (__rec_root_write(session, page, flags)); + if (__wt_ref_is_root(ref)) { + WT_WITH_PAGE_INDEX(session, + ret = __rec_root_write(session, page, flags)); + return (ret); + } /* * Otherwise, mark the page's parent dirty. @@ -504,6 +507,7 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags) WT_ILLEGAL_VALUE(session); } + WT_ASSERT(session, session->split_gen != 0); pindex = WT_INTL_INDEX_COPY(next); for (i = 0; i < mod->mod_multi_entries; ++i) { WT_ERR(__wt_multi_to_ref(session, |