summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Bostic <keith.bostic@mongodb.com>2016-06-26 22:45:12 -0400
committerAlex Gorrod <alexander.gorrod@mongodb.com>2016-06-28 14:32:57 +1000
commit552a33b5bdb4f4d6e561c603a33ccb58a7ee9eca (patch)
tree3281eaed6b6c066613d144efb1bd5a132e10d8d3
parent9cfe4e14acc569a313fd7d7781ac036b82269020 (diff)
downloadmongo-552a33b5bdb4f4d6e561c603a33ccb58a7ee9eca.tar.gz
WT-2708 split child-update race with reconciliation/eviction (#2835)
(cherry picked from commit 521270d54c41294da86a95690a54068cc23d4f1d) When splitting the root page and updating the child's WT_REF.addr, reconciliation/eviction can race with us, updating WT_REF.addr after our read and before our update. The update is necessary because the child's address points into the page being split: if the address changes, then it can no longer point into the page being split and the update is no longer necessary.
-rw-r--r--src/btree/bt_split.c23
1 files changed, 16 insertions, 7 deletions
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index 7f3620bb361..39c10133542 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -251,7 +251,7 @@ static int
__split_ref_deepen_move(WT_SESSION_IMPL *session,
WT_PAGE *parent, WT_REF *ref, size_t *parent_decrp, size_t *child_incrp)
{
- WT_ADDR *addr;
+ WT_ADDR *addr, *ref_addr;
WT_CELL_UNPACK unpack;
WT_DECL_RET;
WT_IKEY *ikey;
@@ -287,13 +287,18 @@ __split_ref_deepen_move(WT_SESSION_IMPL *session,
}
/*
- * If there's no address (the page has never been written), or the
- * address has been instantiated, there's no work to do. Otherwise,
- * get the address from the on-page cell.
+ * If there's no address at all (the page has never been written), or
+ * the address has already been instantiated, there's no work to do.
+ * Otherwise, the address still references a split page on-page cell,
+ * instantiate it. We can race with reconciliation and/or eviction of
+ * the child pages, be cautious: read the address and verify it, and
+ * only update it if the value is unchanged from the original. In the
+ * case of a race, the address must no longer reference the split page,
+ * we're done.
*/
- addr = ref->addr;
- if (addr != NULL && !__wt_off_page(parent, addr)) {
- __wt_cell_unpack((WT_CELL *)ref->addr, &unpack);
+ WT_ORDERED_READ(ref_addr, ref->addr);
+ if (ref_addr != NULL && !__wt_off_page(parent, ref_addr)) {
+ __wt_cell_unpack((WT_CELL *)ref_addr, &unpack);
WT_RET(__wt_calloc_one(session, &addr));
if ((ret = __wt_strndup(
session, unpack.data, unpack.size, &addr->addr)) != 0) {
@@ -304,6 +309,10 @@ __split_ref_deepen_move(WT_SESSION_IMPL *session,
addr->type =
unpack.raw == WT_CELL_ADDR_INT ? WT_ADDR_INT : WT_ADDR_LEAF;
ref->addr = addr;
+ if (!__wt_atomic_cas_ptr(&ref->addr, ref_addr, addr)) {
+ __wt_free(session, addr->addr);
+ __wt_free(session, addr);
+ }
}
/* And finally, the WT_REF itself. */