summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@wiredtiger.com>2015-02-02 17:11:27 +1100
committerMichael Cahill <michael.cahill@wiredtiger.com>2015-02-02 17:11:27 +1100
commit5f00de07b5bad20a6ffb5ec7d412c4ca0b10c64f (patch)
tree759ef608bc56c2ff84f0b42c4b53ca341e974548
parent7f72921fdfc5ad3b4f4d089a192469972c69bf08 (diff)
downloadmongo-5f00de07b5bad20a6ffb5ec7d412c4ca0b10c64f.tar.gz
split_gen paranoia: always increment split_gen once per split, use the allocated value to check for existing readers. Make sure that publishing a split_gen doesn't miss an update.
-rw-r--r--src/btree/bt_split.c48
-rw-r--r--src/btree/col_srch.c1
-rw-r--r--src/btree/row_srch.c3
-rw-r--r--src/include/btmem.h5
-rw-r--r--src/include/btree.i3
-rw-r--r--src/include/connection.h2
-rw-r--r--src/reconcile/rec_write.c8
7 files changed, 44 insertions, 26 deletions
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index e4fe51ea28f..a11c75a00ba 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -49,7 +49,8 @@ __split_oldest_gen(WT_SESSION_IMPL *session)
* Add a new entry into the session's split stash list.
*/
static int
-__split_stash_add(WT_SESSION_IMPL *session, void *p, size_t len)
+__split_stash_add(
+ WT_SESSION_IMPL *session, uint64_t split_gen, void *p, size_t len)
{
WT_SPLIT_STASH *stash;
@@ -60,7 +61,7 @@ __split_stash_add(WT_SESSION_IMPL *session, void *p, size_t len)
session->split_stash_cnt + 1, &session->split_stash));
stash = session->split_stash + session->split_stash_cnt++;
- stash->split_gen = WT_ATOMIC_ADD8(S2C(session)->split_gen, 1);
+ stash->split_gen = split_gen;
stash->p = p;
stash->len = len;
@@ -150,14 +151,14 @@ __wt_split_stash_discard_all(
* it to be freed otherwise.
*/
static int
-__split_safe_free(WT_SESSION_IMPL *session, int exclusive, void *p, size_t s)
+__split_safe_free(WT_SESSION_IMPL *session,
+ uint64_t split_gen, int exclusive, void *p, size_t s)
{
/*
* We have swapped something in a page: if we don't have exclusive
* access, check whether there are other threads in the same tree.
*/
- if (!exclusive &&
- __split_oldest_gen(session) == S2C(session)->split_gen + 1)
+ if (!exclusive && __split_oldest_gen(session) > split_gen)
exclusive = 1;
if (exclusive) {
@@ -165,7 +166,7 @@ __split_safe_free(WT_SESSION_IMPL *session, int exclusive, void *p, size_t s)
return (0);
}
- return (__split_stash_add(session, p, s));
+ return (__split_stash_add(session, split_gen, p, s));
}
/*
@@ -393,6 +394,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
WT_REF **alloc_refp;
WT_REF *child_ref, **child_refp, *parent_ref, **parent_refp, *ref;
size_t child_incr, parent_decr, parent_incr, size;
+ uint64_t split_gen;
uint32_t chunk, i, j, remain, slots;
int panic;
void *p;
@@ -527,6 +529,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
* needs to be paid.
*/
WT_INTL_INDEX_SET(parent, alloc_index);
+ split_gen = WT_ATOMIC_ADD8(S2C(session)->split_gen, 1);
panic = 1;
#ifdef HAVE_DIAGNOSTIC
@@ -596,7 +599,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
* be using the new index.
*/
size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *);
- WT_ERR(__split_safe_free(session, 0, pindex, size));
+ WT_ERR(__split_safe_free(session, split_gen, 0, pindex, size));
WT_MEMSIZE_ADD(parent_decr, size);
#if 0
@@ -815,7 +818,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
static int
__split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
uint32_t new_entries, size_t parent_decr, size_t parent_incr,
- int exclusive, int ref_discard)
+ int exclusive, int ref_discard, uint64_t *split_genp)
{
WT_DECL_RET;
WT_IKEY *ikey;
@@ -823,6 +826,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
WT_PAGE_INDEX *alloc_index, *pindex;
WT_REF **alloc_refp, *next_ref, *parent_ref;
size_t size;
+ uint64_t split_gen;
uint32_t children, i, j;
uint32_t deleted_entries, parent_entries, result_entries;
int complete, hazard, locked;
@@ -929,6 +933,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
* to threads descending the tree.
*/
WT_INTL_INDEX_SET(parent, alloc_index);
+ split_gen = *split_genp = WT_ATOMIC_ADD8(S2C(session)->split_gen, 1);
alloc_index = NULL;
#ifdef HAVE_DIAGNOSTIC
@@ -975,7 +980,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
if (ikey != NULL) {
size = sizeof(WT_IKEY) + ikey->size;
WT_TRET(__split_safe_free(
- session, 0, ikey, size));
+ session, split_gen, 0, ikey, size));
WT_MEMSIZE_ADD(parent_decr, size);
}
/*
@@ -993,7 +998,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
}
WT_TRET(__split_safe_free(
- session, 0, next_ref, sizeof(WT_REF)));
+ session, split_gen, 0, next_ref, sizeof(WT_REF)));
WT_MEMSIZE_ADD(parent_decr, sizeof(WT_REF));
}
}
@@ -1003,7 +1008,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
* Add it to the session discard list, to be freed when it's safe.
*/
size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *);
- WT_TRET(__split_safe_free(session, exclusive, pindex, size));
+ WT_TRET(__split_safe_free(session, split_gen, exclusive, pindex, size));
WT_MEMSIZE_ADD(parent_decr, size);
/*
@@ -1110,6 +1115,7 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
WT_PAGE *page, *right;
WT_REF *child, *split_ref[2] = { NULL, NULL };
size_t page_decr, parent_decr, parent_incr, right_incr;
+ uint64_t split_gen;
int i;
*splitp = 0;
@@ -1358,8 +1364,8 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
* longer locked, so we cannot safely look at it.
*/
page = NULL;
- if ((ret = __split_parent(
- session, ref, split_ref, 2, parent_decr, parent_incr, 0, 0)) != 0) {
+ if ((ret = __split_parent(session, ref, split_ref, 2,
+ parent_decr, parent_incr, 0, 0, &split_gen)) != 0) {
/*
* Move the insert list element back to the original page list.
* For simplicity, the previous skip list pointers originally
@@ -1396,8 +1402,8 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
*/
if (ikey != NULL)
WT_TRET(__split_safe_free(
- session, 0, ikey, sizeof(WT_IKEY) + ikey->size));
- WT_TRET(__split_safe_free(session, 0, ref, sizeof(WT_REF)));
+ session, split_gen, 0, ikey, sizeof(WT_IKEY) + ikey->size));
+ WT_TRET(__split_safe_free(session, split_gen, 0, ref, sizeof(WT_REF)));
/*
* A note on error handling: if we completed the split, return success,
@@ -1480,6 +1486,7 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
WT_PAGE_MODIFY *mod;
WT_REF **ref_new;
size_t parent_decr, parent_incr;
+ uint64_t split_gen;
uint32_t i, new_entries;
page = ref->page;
@@ -1510,8 +1517,8 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
parent_decr, sizeof(WT_IKEY) + ikey->size);
/* Split into the parent. */
- WT_ERR(__split_parent(session,
- ref, ref_new, new_entries, parent_decr, parent_incr, exclusive, 1));
+ WT_ERR(__split_parent(session, ref, ref_new, new_entries,
+ parent_decr, parent_incr, exclusive, 1, &split_gen));
__wt_free(session, ref_new);
@@ -1534,9 +1541,10 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
* safe.
*/
if (ikey != NULL)
- WT_TRET(__split_safe_free(
- session, exclusive, ikey, sizeof(WT_IKEY) + ikey->size));
- WT_TRET(__split_safe_free(session, exclusive, ref, sizeof(WT_REF)));
+ WT_TRET(__split_safe_free(session, split_gen, exclusive,
+ ikey, sizeof(WT_IKEY) + ikey->size));
+ WT_TRET(__split_safe_free(session, split_gen, exclusive,
+ ref, sizeof(WT_REF)));
/*
* A note on error handling: if we completed the split, return success,
diff --git a/src/btree/col_srch.c b/src/btree/col_srch.c
index 4c418f91de0..db1b565b439 100644
--- a/src/btree/col_srch.c
+++ b/src/btree/col_srch.c
@@ -49,6 +49,7 @@ restart: page = current->page;
WT_ASSERT(session, current->key.recno == page->pg_intl_recno);
+ WT_ASSERT(session, session->split_gen != 0);
pindex = WT_INTL_INDEX_COPY(page);
base = pindex->entries;
descent = pindex->index[base - 1];
diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c
index 036e11bec6d..9967c5ecb0c 100644
--- a/src/btree/row_srch.c
+++ b/src/btree/row_srch.c
@@ -195,6 +195,7 @@ restart: page = current->page;
if (page->type != WT_PAGE_ROW_INT)
break;
+ WT_ASSERT(session, session->split_gen != 0);
pindex = WT_INTL_INDEX_COPY(page);
/*
@@ -487,6 +488,7 @@ restart:
if (page->type != WT_PAGE_ROW_INT)
break;
+ WT_ASSERT(session, session->split_gen != 0);
pindex = WT_INTL_INDEX_COPY(page);
descent = pindex->index[
__wt_random(session->rnd) % pindex->entries];
@@ -521,6 +523,7 @@ restart:
*/
cbt->ref = current;
cbt->compare = 0;
+ WT_ASSERT(session, session->split_gen != 0);
pindex = WT_INTL_INDEX_COPY(btree->root.page);
cbt->slot = pindex->entries < 2 ?
__wt_random(session->rnd) % page->pg_row_entries : 0;
diff --git a/src/include/btmem.h b/src/include/btmem.h
index aea9ee98742..d020d29351c 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -1008,7 +1008,10 @@ struct __wt_insert_head {
#define WT_ENTER_PAGE_INDEX(session) do { \
uint64_t __prev_split_gen = (session)->split_gen; \
if (__prev_split_gen == 0) \
- WT_PUBLISH((session)->split_gen, S2C(session)->split_gen)
+ do { \
+ WT_PUBLISH((session)->split_gen, \
+ S2C(session)->split_gen); \
+ } while ((session)->split_gen != S2C(session)->split_gen)
#define WT_LEAVE_PAGE_INDEX(session) \
if (__prev_split_gen == 0) \
diff --git a/src/include/btree.i b/src/include/btree.i
index 059b7257d02..0af7f57025a 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -221,8 +221,7 @@ __wt_page_refp(WT_SESSION_IMPL *session,
WT_PAGE_INDEX *pindex;
uint32_t i;
- WT_ASSERT(session,
- WT_SESSION_TXN_STATE(session)->snap_min != WT_TXN_NONE);
+ WT_ASSERT(session, session->split_gen != 0);
/*
* Copy the parent page's index value: the page can split at any time,
diff --git a/src/include/connection.h b/src/include/connection.h
index ff34b014ecf..7b94a7ea94b 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -146,7 +146,7 @@ struct __wt_connection_impl {
WT_FH *lock_fh; /* Lock file handle */
- uint64_t split_gen; /* Generation number for splits */
+ volatile uint64_t split_gen; /* Generation number for splits */
/*
* The connection keeps a cache of data handles. The set of handles
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index 0300596f90b..4b60fe6a6cd 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -440,8 +440,11 @@ __wt_reconcile(WT_SESSION_IMPL *session,
* Root pages are special, splits have to be done, we can't put it off
* as the parent's problem any more.
*/
- if (__wt_ref_is_root(ref))
- return (__rec_root_write(session, page, flags));
+ if (__wt_ref_is_root(ref)) {
+ WT_WITH_PAGE_INDEX(session,
+ ret = __rec_root_write(session, page, flags));
+ return (ret);
+ }
/*
* Otherwise, mark the page's parent dirty.
@@ -504,6 +507,7 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
WT_ILLEGAL_VALUE(session);
}
+ WT_ASSERT(session, session->split_gen != 0);
pindex = WT_INTL_INDEX_COPY(next);
for (i = 0; i < mod->mod_multi_entries; ++i) {
WT_ERR(__wt_multi_to_ref(session,