summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@wiredtiger.com>2015-01-14 08:38:59 +1100
committerMichael Cahill <michael.cahill@wiredtiger.com>2015-01-14 08:38:59 +1100
commit712fadc6e2759c00ca601132b52ce0e27d086e35 (patch)
tree1f8ae53f897829e7d30cf7e3e21baabab23aede7
parent4afb64d35aba7af9bc4e89e93758c025e2fe8815 (diff)
downloadmongo-712fadc6e2759c00ca601132b52ce0e27d086e35.tar.gz
Fixes for discarding deleted references:
1. deal with overflow keys; 2. (safely) free memory associated with the WT_REF; 3. atomically swap the ref state so that concurrent readers can't race. refs SERVER-16775, #1548
-rw-r--r--src/btree/bt_delete.c3
-rw-r--r--src/btree/bt_split.c41
2 files changed, 38 insertions, 6 deletions
diff --git a/src/btree/bt_delete.c b/src/btree/bt_delete.c
index 570b7f80742..622dfb1b294 100644
--- a/src/btree/bt_delete.c
+++ b/src/btree/bt_delete.c
@@ -234,7 +234,8 @@ __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref)
if (!WT_ATOMIC_CAS4(ref->state, WT_REF_DELETED, WT_REF_LOCKED))
return (0);
- skip = __wt_txn_visible(session, ref->page_del->txnid) ? 1 : 0;
+ skip = (ref->page_del == NULL ||
+ __wt_txn_visible(session, ref->page_del->txnid));
WT_PUBLISH(ref->state, WT_REF_DELETED);
return (skip);
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index 6d6a97c981b..c1a8d190de1 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -810,6 +810,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
int exclusive, int ref_discard)
{
WT_DECL_RET;
+ WT_IKEY *ikey;
WT_PAGE *parent;
WT_PAGE_INDEX *alloc_index, *pindex;
WT_REF **alloc_refp, *next_ref, *parent_ref;
@@ -866,11 +867,18 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
/*
* Remove any refs to deleted pages while we are splitting, we have
* the internal page locked down, and are copying the refs into a new
- * array anyway.
+ * array anyway. Switch them to the special split state, so that any
+ * reading thread will restart.
*/
- for (i = 0, deleted_entries = 0; i < parent_entries; ++i)
- if (pindex->index[i]->state == WT_REF_DELETED)
+ for (i = 0, deleted_entries = 0; i < parent_entries; ++i) {
+ next_ref = pindex->index[i];
+ WT_ASSERT(session, next_ref->state != WT_REF_SPLIT);
+ if (next_ref->state == WT_REF_DELETED &&
+ next_ref->page_del == NULL &&
+ WT_ATOMIC_CAS4(next_ref->state,
+ WT_REF_DELETED, WT_REF_SPLIT))
deleted_entries++;
+ }
/*
* The final entry count consists of: The original count, plus any
@@ -903,7 +911,24 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
*/
ref_new[j] = NULL;
}
- else if (next_ref->state != WT_REF_DELETED)
+ else if (next_ref->state == WT_REF_SPLIT) {
+ /*
+ * We're discarding a deleted reference.
+ * Free any resources it holds.
+ */
+ if (parent->type == WT_PAGE_ROW_INT) {
+ WT_TRET(__split_ovfl_key_cleanup(
+ session, parent, next_ref));
+ ikey = __wt_ref_key_instantiated(next_ref);
+ if (ikey != NULL)
+ WT_TRET(__split_safe_free(session, 0,
+ ikey,
+ sizeof(WT_IKEY) + ikey->size));
+ }
+
+ WT_TRET(__split_safe_free(
+ session, 0, next_ref, sizeof(WT_REF)));
+ } else
*alloc_refp++ = next_ref;
}
@@ -997,7 +1022,13 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
WT_WITH_PAGE_INDEX(session,
ret = __split_deepen(session, parent, children));
-err: if (locked)
+err: if (!complete)
+ for (i = 0; i < parent_entries; ++i) {
+ next_ref = pindex->index[i];
+ if (next_ref->state == WT_REF_SPLIT)
+ next_ref->state = WT_REF_DELETED;
+ }
+ if (locked)
F_CLR_ATOMIC(parent, WT_PAGE_SPLITTING);
if (hazard)