summaryrefslogtreecommitdiff
path: root/src/btree/bt_split.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/btree/bt_split.c')
-rw-r--r--src/btree/bt_split.c214
1 files changed, 56 insertions, 158 deletions
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index 49043c8bab4..c1b7b6c4001 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2014-2016 MongoDB, Inc.
+ * Copyright (c) 2014-2017 MongoDB, Inc.
* Copyright (c) 2008-2014 WiredTiger, Inc.
* All rights reserved.
*
@@ -31,143 +31,6 @@ typedef enum {
} WT_SPLIT_ERROR_PHASE;
/*
- * __split_oldest_gen --
- * Calculate the oldest active split generation.
- */
-static uint64_t
-__split_oldest_gen(WT_SESSION_IMPL *session)
-{
- WT_CONNECTION_IMPL *conn;
- WT_SESSION_IMPL *s;
- uint64_t gen, oldest;
- u_int i, session_cnt;
-
- conn = S2C(session);
- WT_ORDERED_READ(session_cnt, conn->session_cnt);
- for (i = 0, s = conn->sessions, oldest = conn->split_gen + 1;
- i < session_cnt;
- i++, s++)
- if (((gen = s->split_gen) != 0) && gen < oldest)
- oldest = gen;
-
- return (oldest);
-}
-
-/*
- * __wt_split_obsolete --
- * Check if it is safe to free / evict based on split generation.
- */
-bool
-__wt_split_obsolete(WT_SESSION_IMPL *session, uint64_t split_gen)
-{
- return (split_gen < __split_oldest_gen(session));
-}
-
-/*
- * __split_stash_add --
- * Add a new entry into the session's split stash list.
- */
-static int
-__split_stash_add(
- WT_SESSION_IMPL *session, uint64_t split_gen, void *p, size_t len)
-{
- WT_CONNECTION_IMPL *conn;
- WT_SPLIT_STASH *stash;
-
- WT_ASSERT(session, p != NULL);
-
- conn = S2C(session);
-
- /* Grow the list as necessary. */
- WT_RET(__wt_realloc_def(session, &session->split_stash_alloc,
- session->split_stash_cnt + 1, &session->split_stash));
-
- stash = session->split_stash + session->split_stash_cnt++;
- stash->split_gen = split_gen;
- stash->p = p;
- stash->len = len;
-
- (void)__wt_atomic_add64(&conn->split_stashed_bytes, len);
- (void)__wt_atomic_add64(&conn->split_stashed_objects, 1);
-
- /* See if we can free any previous entries. */
- if (session->split_stash_cnt > 1)
- __wt_split_stash_discard(session);
-
- return (0);
-}
-
-/*
- * __wt_split_stash_discard --
- * Discard any memory from a session's split stash that we can.
- */
-void
-__wt_split_stash_discard(WT_SESSION_IMPL *session)
-{
- WT_CONNECTION_IMPL *conn;
- WT_SPLIT_STASH *stash;
- uint64_t oldest;
- size_t i;
-
- conn = S2C(session);
-
- /* Get the oldest split generation. */
- oldest = __split_oldest_gen(session);
-
- for (i = 0, stash = session->split_stash;
- i < session->split_stash_cnt;
- ++i, ++stash) {
- if (stash->p == NULL)
- continue;
- if (stash->split_gen >= oldest)
- break;
- /*
- * It's a bad thing if another thread is in this memory after
- * we free it, make sure nothing good happens to that thread.
- */
- (void)__wt_atomic_sub64(&conn->split_stashed_bytes, stash->len);
- (void)__wt_atomic_sub64(&conn->split_stashed_objects, 1);
- __wt_overwrite_and_free_len(session, stash->p, stash->len);
- }
-
- /*
- * If there are enough free slots at the beginning of the list, shuffle
- * everything down.
- */
- if (i > 100 || i == session->split_stash_cnt)
- if ((session->split_stash_cnt -= i) > 0)
- memmove(session->split_stash, stash,
- session->split_stash_cnt * sizeof(*stash));
-}
-
-/*
- * __wt_split_stash_discard_all --
- * Discard all memory from a session's split stash.
- */
-void
-__wt_split_stash_discard_all(
- WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session)
-{
- WT_SPLIT_STASH *stash;
- size_t i;
-
- /*
- * This function is called during WT_CONNECTION.close to discard any
- * memory that remains. For that reason, we take two WT_SESSION_IMPL
- * arguments: session_safe is still linked to the WT_CONNECTION and
- * can be safely used for calls to other WiredTiger functions, while
- * session is the WT_SESSION_IMPL we're cleaning up.
- */
- for (i = 0, stash = session->split_stash;
- i < session->split_stash_cnt;
- ++i, ++stash)
- __wt_free(session_safe, stash->p);
-
- __wt_free(session_safe, session->split_stash);
- session->split_stash_cnt = session->split_stash_alloc = 0;
-}
-
-/*
* __split_safe_free --
* Free a buffer if we can be sure no thread is accessing it, or schedule
* it to be freed otherwise.
@@ -177,13 +40,14 @@ __split_safe_free(WT_SESSION_IMPL *session,
uint64_t split_gen, bool exclusive, void *p, size_t s)
{
/* We should only call safe free if we aren't pinning the memory. */
- WT_ASSERT(session, session->split_gen != split_gen);
+ WT_ASSERT(session,
+ __wt_session_gen(session, WT_GEN_SPLIT) != split_gen);
/*
* We have swapped something in a page: if we don't have exclusive
* access, check whether there are other threads in the same tree.
*/
- if (!exclusive && __split_oldest_gen(session) > split_gen)
+ if (!exclusive && __wt_gen_oldest(session, WT_GEN_SPLIT) > split_gen)
exclusive = true;
if (exclusive) {
@@ -191,7 +55,7 @@ __split_safe_free(WT_SESSION_IMPL *session,
return (0);
}
- return (__split_stash_add(session, split_gen, p, s));
+ return (__wt_stash_add(session, WT_GEN_SPLIT, split_gen, p, s));
}
#ifdef HAVE_DIAGNOSTIC
@@ -645,7 +509,8 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
* generation to block splits in newly created pages, so get one.
*/
WT_ENTER_PAGE_INDEX(session);
- __split_ref_prepare(session, alloc_index, session->split_gen, false);
+ __split_ref_prepare(session, alloc_index,
+ __wt_session_gen(session, WT_GEN_SPLIT), false);
/*
* Confirm the root page's index hasn't moved, then update it, which
@@ -662,7 +527,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
* after the new index is swapped into place in order to know that no
* readers are looking at the old index.
*/
- split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1);
+ split_gen = __wt_gen_next(session, WT_GEN_SPLIT);
root->pg_intl_split_gen = split_gen;
#ifdef HAVE_DIAGNOSTIC
@@ -848,7 +713,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
* the new index is swapped into place in order to know that no readers
* are looking at the old index.
*/
- split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1);
+ split_gen = __wt_gen_next(session, WT_GEN_SPLIT);
parent->pg_intl_split_gen = split_gen;
/*
@@ -1173,7 +1038,8 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
* generation to block splits in newly created pages, so get one.
*/
WT_ENTER_PAGE_INDEX(session);
- __split_ref_prepare(session, alloc_index, session->split_gen, true);
+ __split_ref_prepare(session, alloc_index,
+ __wt_session_gen(session, WT_GEN_SPLIT), true);
/* Split into the parent. */
if ((ret = __split_parent(session, page_ref, alloc_index->index,
@@ -1194,7 +1060,7 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
* after the new index is swapped into place in order to know that no
* readers are looking at the old index.
*/
- split_gen = __wt_atomic_addv64(&S2C(session)->split_gen, 1);
+ split_gen = __wt_gen_next(session, WT_GEN_SPLIT);
page->pg_intl_split_gen = split_gen;
#ifdef HAVE_DIAGNOSTIC
@@ -1256,12 +1122,12 @@ err: switch (complete) {
}
/*
- * __split_internal_lock --
+ * __split_internal_lock_worker --
* Lock an internal page.
*/
static int
-__split_internal_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool trylock,
- WT_PAGE **parentp, bool *hazardp)
+__split_internal_lock_worker(WT_SESSION_IMPL *session,
+ WT_REF *ref, bool trylock, WT_PAGE **parentp, bool *hazardp)
{
WT_DECL_RET;
WT_PAGE *parent;
@@ -1300,13 +1166,19 @@ __split_internal_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool trylock,
for (;;) {
parent = ref->home;
+ /*
+ * The page will be marked dirty, and we can only lock a page
+ * with a modify structure.
+ */
+ WT_RET(__wt_page_modify_init(session, parent));
+
if (trylock)
- WT_RET(__wt_try_writelock(session, &parent->page_lock));
+ WT_RET(WT_PAGE_TRYLOCK(session, parent));
else
- __wt_writelock(session, &parent->page_lock);
+ WT_PAGE_LOCK(session, parent);
if (parent == ref->home)
break;
- __wt_writeunlock(session, &parent->page_lock);
+ WT_PAGE_UNLOCK(session, parent);
}
/*
@@ -1329,7 +1201,33 @@ __split_internal_lock(WT_SESSION_IMPL *session, WT_REF *ref, bool trylock,
*parentp = parent;
return (0);
-err: __wt_writeunlock(session, &parent->page_lock);
+err: WT_PAGE_UNLOCK(session, parent);
+ return (ret);
+}
+
+/*
+ * __split_internal_lock --
+ * Lock an internal page.
+ */
+static int
+__split_internal_lock(WT_SESSION_IMPL *session,
+ WT_REF *ref, bool trylock, WT_PAGE **parentp, bool *hazardp)
+{
+ WT_DECL_RET;
+
+ /*
+ * There's no lock on our parent page and we're about to acquire one,
+ * which implies using the WT_REF.home field to reference our parent
+ * page. As a child of the parent page, we prevent its eviction, but
+ * that's a weak guarantee. If the parent page splits, and our WT_REF
+ * were to move with the split, the WT_REF.home field might change
+ * underneath us and we could race, and end up attempting to access
+ * an evicted page. Set the session page-index generation so if the
+ * parent splits, it still can't be evicted.
+ */
+ WT_WITH_PAGE_INDEX(session,
+ ret = __split_internal_lock_worker(
+ session, ref, trylock, parentp, hazardp));
return (ret);
}
@@ -1345,7 +1243,7 @@ __split_internal_unlock(WT_SESSION_IMPL *session, WT_PAGE *parent, bool hazard)
if (hazard)
ret = __wt_hazard_clear(session, parent->pg_intl_parent_ref);
- __wt_writeunlock(session, &parent->page_lock);
+ WT_PAGE_UNLOCK(session, parent);
return (ret);
}
@@ -1558,8 +1456,8 @@ __split_multi_inmem(
WT_ERR(__wt_col_search(session, recno, ref, &cbt));
/* Apply the modification. */
- WT_ERR(__wt_col_modify(
- session, &cbt, recno, NULL, upd, false));
+ WT_ERR(__wt_col_modify(session,
+ &cbt, recno, NULL, upd, WT_UPDATE_STANDARD, true));
break;
case WT_PAGE_ROW_LEAF:
/* Build a key. */
@@ -1580,8 +1478,8 @@ __split_multi_inmem(
WT_ERR(__wt_row_search(session, key, ref, &cbt, true));
/* Apply the modification. */
- WT_ERR(__wt_row_modify(
- session, &cbt, key, NULL, upd, false));
+ WT_ERR(__wt_row_modify(session, &cbt,
+ key, NULL, upd, WT_UPDATE_STANDARD, true));
break;
WT_ILLEGAL_VALUE_ERR(session);
}