summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Cahill <michael.cahill@mongodb.com>2015-12-02 22:00:52 +1100
committerMichael Cahill <michael.cahill@mongodb.com>2015-12-02 22:00:52 +1100
commit27d0cbdf8046565dba6902f4e6ee93b2642f0d19 (patch)
tree11db010e007a2025cafd08590fea43eff6efa199
parent0bc4f8f2dc7b51a86d6e8c120f48264af1e500af (diff)
parent38369aebcf7da942929a3c27f72495e8dbe3e2d3 (diff)
downloadmongo-27d0cbdf8046565dba6902f4e6ee93b2642f0d19.tar.gz
Merge pull request #2354 from wiredtiger/server_21553_30backportmongodb-3.0.8
SERVER-21553 3.0 backport
-rw-r--r--src/btree/bt_delete.c12
-rw-r--r--src/btree/bt_discard.c5
-rw-r--r--src/btree/bt_slvg.c8
-rw-r--r--src/btree/bt_split.c60
-rw-r--r--src/btree/bt_walk.c4
-rw-r--r--src/evict/evict_page.c14
-rw-r--r--src/include/btree.i55
-rw-r--r--src/reconcile/rec_write.c44
8 files changed, 121 insertions, 81 deletions
diff --git a/src/btree/bt_delete.c b/src/btree/bt_delete.c
index 7313e31267f..e19085830bd 100644
--- a/src/btree/bt_delete.c
+++ b/src/btree/bt_delete.c
@@ -250,6 +250,18 @@ __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all)
__wt_txn_visible_all(session, ref->page_del->txnid) :
__wt_txn_visible(session, ref->page_del->txnid));
+ /*
+ * The page_del structure can be freed as soon as the delete is stable:
+ * it is only read when the ref state is WT_REF_DELETED. It is worth
+ * checking every time we come through because once this is freed, we
+ * no longer need synchronization to check the ref.
+ */
+ if (skip && ref->page_del != NULL && (visible_all ||
+ __wt_txn_visible_all(session, ref->page_del->txnid))) {
+ __wt_free(session, ref->page_del->update_list);
+ __wt_free(session, ref->page_del);
+ }
+
WT_PUBLISH(ref->state, WT_REF_DELETED);
return (skip);
}
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c
index c876da6309c..30e19147e12 100644
--- a/src/btree/bt_discard.c
+++ b/src/btree/bt_discard.c
@@ -252,10 +252,7 @@ __wt_free_ref(
}
/* Free any address allocation. */
- if (ref->addr != NULL && __wt_off_page(page, ref->addr)) {
- __wt_free(session, ((WT_ADDR *)ref->addr)->addr);
- __wt_free(session, ref->addr);
- }
+ __wt_ref_addr_free(session, ref);
/* Free any page-deleted information. */
if (ref->page_del != NULL) {
diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c
index 89355baeb5c..e4e611f947a 100644
--- a/src/btree/bt_slvg.c
+++ b/src/btree/bt_slvg.c
@@ -1299,9 +1299,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
* would have been lost.) Clear the reference addr so eviction doesn't
* free the underlying blocks.
*/
- __wt_free(session, ((WT_ADDR *)ref->addr)->addr);
- __wt_free(session, ref->addr);
- ref->addr = NULL;
+ __wt_ref_addr_free(session, ref);
/* Write the new version of the leaf page to disk. */
WT_ERR(__slvg_modify_init(session, page));
@@ -2008,9 +2006,7 @@ __slvg_row_build_leaf(
* would have been lost.) Clear the reference addr so eviction doesn't
* free the underlying blocks.
*/
- __wt_free(session, ((WT_ADDR *)ref->addr)->addr);
- __wt_free(session, ref->addr);
- ref->addr = NULL;
+ __wt_ref_addr_free(session, ref);
/* Write the new version of the leaf page to disk. */
WT_ERR(__slvg_modify_init(session, page));
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index 9d8e463feb0..6e0436bb01f 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -1030,6 +1030,16 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref,
#endif
/*
+ * Page-delete information is only read when the WT_REF state is
+ * WT_REF_DELETED. The page-delete memory wasn't added to the
+ * parent's footprint, ignore it here.
+ */
+ if (ref->page_del != NULL) {
+ __wt_free(session, ref->page_del->update_list);
+ __wt_free(session, ref->page_del);
+ }
+
+ /*
* Reset the page's original WT_REF field to split. Threads cursoring
* through the tree were blocked because that WT_REF state was set to
* locked. This update changes the locked state to split, unblocking
@@ -1090,19 +1100,15 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref,
session, split_gen, 0, ikey, size));
parent_decr += size;
}
- /*
- * The page_del structure can be freed immediately: it
- * is only read when the ref state is WT_REF_DELETED.
- * The size of the structure wasn't added to the parent,
- * don't decrement.
- */
- if (next_ref->page_del != NULL) {
- __wt_free(session,
- next_ref->page_del->update_list);
- __wt_free(session, next_ref->page_del);
- }
}
+ /*
+ * If this page was fast-truncated, any attached structure
+ * should have been freed before now.
+ */
+ WT_ASSERT(session, next_ref->page_del == NULL);
+
+ WT_TRET(__wt_ref_block_free(session, next_ref));
WT_TRET(__split_safe_free(
session, split_gen, 0, next_ref, sizeof(WT_REF)));
parent_decr += sizeof(WT_REF);
@@ -1213,21 +1219,30 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
* The first page in the split is the current page, but we still have
* to create a replacement WT_REF, the original WT_REF will be set to
* split status and eventually freed.
+ *
+ * The new WT_REF is not quite identical: we have to instantiate a key,
+ * and the new reference is visible to readers once the split completes.
+ *
+ * The key-instantiation code checks for races, leave the key fields
+ * zeroed we don't trigger them.
+ *
+ * Don't copy any deleted page state: we may be splitting a page that
+ * was instantiated after a truncate and that history should not be
+ * carried onto these new child pages.
*/
WT_ERR(__wt_calloc_one(session, &split_ref[0]));
child = split_ref[0];
- *child = *ref;
+ child->page = ref->page;
+ child->home = ref->home;
+ child->pindex_hint = ref->pindex_hint;
+ child->state = WT_REF_MEM;
+ child->addr = ref->addr;
/*
- * The new WT_REF is not quite identical: we have to instantiate a key,
- * and the new reference is visible to readers once the split completes.
- *
- * The key-instantiation code checks for races, clear the key fields so
- * we don't trigger them.
+ * The address has moved to the replacement WT_REF. Make sure it isn't
+ * freed when the original ref is discarded.
*/
- child->key.recno = 0;
- child->key.ikey = NULL;
- child->state = WT_REF_MEM;
+ ref->addr = NULL;
/*
* Copy the first key from the original page into first ref in the new
@@ -1429,6 +1444,11 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
return (0);
err: if (split_ref[0] != NULL) {
+ /*
+ * The address was moved to the replacement WT_REF, restore it.
+ */
+ ref->addr = split_ref[0]->addr;
+
__wt_free(session, split_ref[0]->key.ikey);
__wt_free(session, split_ref[0]);
}
diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c
index 8e0f4036b79..c7d83d8dfff 100644
--- a/src/btree/bt_walk.c
+++ b/src/btree/bt_walk.c
@@ -244,7 +244,8 @@ ascend: /*
* If we see any child states other than deleted, the
* page isn't empty.
*/
- if (ref->state != WT_REF_DELETED)
+ if (ref->state != WT_REF_DELETED &&
+ !LF_ISSET(WT_READ_TRUNCATE))
empty_internal = false;
if (LF_ISSET(WT_READ_CACHE)) {
@@ -270,6 +271,7 @@ ascend: /*
WT_ERR(__wt_delete_page(session, ref, &skip));
if (skip)
break;
+ empty_internal = false;
} else if (LF_ISSET(WT_READ_COMPACT)) {
/*
* Skip deleted pages, rewriting them doesn't
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index 9de66922931..046d8bb3eba 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -222,19 +222,14 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
{
WT_ADDR *addr;
WT_DECL_RET;
- WT_PAGE *parent;
WT_PAGE_MODIFY *mod;
- parent = ref->home;
mod = ref->page->modify;
+ WT_ASSERT(session, ref->addr == NULL);
+
switch (mod->rec_result) {
case WT_PM_REC_EMPTY: /* Page is empty */
- if (ref->addr != NULL && __wt_off_page(parent, ref->addr)) {
- __wt_free(session, ((WT_ADDR *)ref->addr)->addr);
- __wt_free(session, ref->addr);
- }
-
/*
* Update the parent to reference a deleted page. The fact that
* reconciliation left the page "empty" means there's no older
@@ -261,11 +256,6 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
WT_RET(__wt_split_multi(session, ref, closing));
break;
case WT_PM_REC_REPLACE: /* 1-for-1 page swap */
- if (ref->addr != NULL && __wt_off_page(parent, ref->addr)) {
- __wt_free(session, ((WT_ADDR *)ref->addr)->addr);
- __wt_free(session, ref->addr);
- }
-
/*
* Update the parent to reference the replacement page.
*
diff --git a/src/include/btree.i b/src/include/btree.i
index 1c416c99e13..4029b29d207 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -1008,6 +1008,61 @@ __wt_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
}
/*
+ * __wt_ref_addr_free --
+ * Free the address in a reference, if necessary.
+ */
+static inline void
+__wt_ref_addr_free(WT_SESSION_IMPL *session, WT_REF *ref)
+ {
+ if (ref->addr == NULL)
+ return;
+
+ if (ref->home == NULL || __wt_off_page(ref->home, ref->addr)) {
+ __wt_free(session, ((WT_ADDR *)ref->addr)->addr);
+ __wt_free(session, ref->addr);
+ }
+ ref->addr = NULL;
+}
+
+/*
+ * __wt_btree_block_free --
+ * Helper function to free a block from the current tree.
+ */
+static inline int
+__wt_btree_block_free(
+ WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size)
+{
+ WT_BM *bm;
+ WT_BTREE *btree;
+
+ btree = S2BT(session);
+ bm = btree->bm;
+
+ return (bm->free(bm, session, addr, addr_size));
+}
+
+/*
+ * __wt_ref_block_free --
+ * Free the on-disk block for a reference and clear the address.
+ */
+static inline int
+__wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref)
+{
+ const uint8_t *addr;
+ size_t addr_size;
+
+ if (ref->addr == NULL)
+ return (0);
+
+ WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL));
+ WT_RET(__wt_btree_block_free(session, addr, addr_size));
+
+ /* Clear the address (so we don't free it twice). */
+ __wt_ref_addr_free(session, ref);
+ return (0);
+}
+
+/*
* __wt_page_can_evict --
* Check whether a page can be evicted.
*/
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index 73b7f4968e9..67b43057c8a 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -1066,10 +1066,7 @@ static int
__rec_child_deleted(WT_SESSION_IMPL *session,
WT_RECONCILE *r, WT_REF *ref, WT_CHILD_STATE *statep)
{
- WT_BM *bm;
WT_PAGE_DELETED *page_del;
- size_t addr_size;
- const uint8_t *addr;
page_del = ref->page_del;
@@ -1117,17 +1114,8 @@ __rec_child_deleted(WT_SESSION_IMPL *session,
*/
if (ref->addr != NULL &&
(page_del == NULL ||
- __wt_txn_visible_all(session, page_del->txnid))) {
- WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL));
- bm = S2BT(session)->bm;
- WT_RET(bm->free(bm, session, addr, addr_size));
-
- if (__wt_off_page(ref->home, ref->addr)) {
- __wt_free(session, ((WT_ADDR *)ref->addr)->addr);
- __wt_free(session, ref->addr);
- }
- ref->addr = NULL;
- }
+ __wt_txn_visible_all(session, page_del->txnid)))
+ WT_RET(__wt_ref_block_free(session, ref));
/*
* If the original page is gone, we can skip the slot on the internal
@@ -4790,13 +4778,11 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
static int
__rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page)
{
- WT_BM *bm;
WT_DECL_RET;
WT_PAGE_MODIFY *mod;
WT_MULTI *multi;
uint32_t i;
- bm = S2BT(session)->bm;
mod = page->modify;
/*
@@ -4816,7 +4802,7 @@ __rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page)
if (multi->addr.reuse)
multi->addr.addr = NULL;
else {
- WT_RET(bm->free(bm, session,
+ WT_RET(__wt_btree_block_free(session,
multi->addr.addr, multi->addr.size));
__wt_free(session, multi->addr.addr);
}
@@ -4862,8 +4848,6 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_MULTI *multi;
WT_PAGE_MODIFY *mod;
WT_REF *ref;
- size_t addr_size;
- const uint8_t *addr;
btree = S2BT(session);
bm = btree->bm;
@@ -4888,21 +4872,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
*/
if (__wt_ref_is_root(ref))
break;
- if (ref->addr != NULL) {
- /*
- * Free the page and clear the address (so we don't free
- * it twice).
- */
- WT_RET(__wt_ref_info(
- session, ref, &addr, &addr_size, NULL));
- WT_RET(bm->free(bm, session, addr, addr_size));
- if (__wt_off_page(ref->home, ref->addr)) {
- __wt_free(
- session, ((WT_ADDR *)ref->addr)->addr);
- __wt_free(session, ref->addr);
- }
- ref->addr = NULL;
- }
+ WT_RET(__wt_ref_block_free(session, ref));
break;
case WT_PM_REC_EMPTY: /* Page deleted */
break;
@@ -4921,7 +4891,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
* are checkpoints, and must be explicitly dropped.
*/
if (!__wt_ref_is_root(ref))
- WT_RET(bm->free(bm, session,
+ WT_RET(__wt_btree_block_free(session,
mod->mod_replace.addr, mod->mod_replace.size));
/* Discard the replacement page's address. */
@@ -5126,14 +5096,12 @@ err: __wt_scr_free(session, &tkey);
static int
__rec_write_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
{
- WT_BM *bm;
WT_BOUNDARY *bnd;
WT_DECL_RET;
WT_MULTI *multi;
WT_PAGE_MODIFY *mod;
uint32_t i;
- bm = S2BT(session)->bm;
mod = page->modify;
/*
@@ -5164,7 +5132,7 @@ __rec_write_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
if (bnd->addr.reuse)
bnd->addr.addr = NULL;
else {
- WT_TRET(bm->free(bm, session,
+ WT_TRET(__wt_btree_block_free(session,
bnd->addr.addr, bnd->addr.size));
__wt_free(session, bnd->addr.addr);
}