diff options
author | Michael Cahill <michael.cahill@mongodb.com> | 2015-12-02 22:00:52 +1100 |
---|---|---|
committer | Michael Cahill <michael.cahill@mongodb.com> | 2015-12-02 22:00:52 +1100 |
commit | 27d0cbdf8046565dba6902f4e6ee93b2642f0d19 (patch) | |
tree | 11db010e007a2025cafd08590fea43eff6efa199 | |
parent | 0bc4f8f2dc7b51a86d6e8c120f48264af1e500af (diff) | |
parent | 38369aebcf7da942929a3c27f72495e8dbe3e2d3 (diff) | |
download | mongo-27d0cbdf8046565dba6902f4e6ee93b2642f0d19.tar.gz |
Merge pull request #2354 from wiredtiger/server_21553_30backportmongodb-3.0.8
SERVER-21553 3.0 backport
-rw-r--r-- | src/btree/bt_delete.c | 12 | ||||
-rw-r--r-- | src/btree/bt_discard.c | 5 | ||||
-rw-r--r-- | src/btree/bt_slvg.c | 8 | ||||
-rw-r--r-- | src/btree/bt_split.c | 60 | ||||
-rw-r--r-- | src/btree/bt_walk.c | 4 | ||||
-rw-r--r-- | src/evict/evict_page.c | 14 | ||||
-rw-r--r-- | src/include/btree.i | 55 | ||||
-rw-r--r-- | src/reconcile/rec_write.c | 44 |
8 files changed, 121 insertions, 81 deletions
diff --git a/src/btree/bt_delete.c b/src/btree/bt_delete.c index 7313e31267f..e19085830bd 100644 --- a/src/btree/bt_delete.c +++ b/src/btree/bt_delete.c @@ -250,6 +250,18 @@ __wt_delete_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool visible_all) __wt_txn_visible_all(session, ref->page_del->txnid) : __wt_txn_visible(session, ref->page_del->txnid)); + /* + * The page_del structure can be freed as soon as the delete is stable: + * it is only read when the ref state is WT_REF_DELETED. It is worth + * checking every time we come through because once this is freed, we + * no longer need synchronization to check the ref. + */ + if (skip && ref->page_del != NULL && (visible_all || + __wt_txn_visible_all(session, ref->page_del->txnid))) { + __wt_free(session, ref->page_del->update_list); + __wt_free(session, ref->page_del); + } + WT_PUBLISH(ref->state, WT_REF_DELETED); return (skip); } diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c index c876da6309c..30e19147e12 100644 --- a/src/btree/bt_discard.c +++ b/src/btree/bt_discard.c @@ -252,10 +252,7 @@ __wt_free_ref( } /* Free any address allocation. */ - if (ref->addr != NULL && __wt_off_page(page, ref->addr)) { - __wt_free(session, ((WT_ADDR *)ref->addr)->addr); - __wt_free(session, ref->addr); - } + __wt_ref_addr_free(session, ref); /* Free any page-deleted information. */ if (ref->page_del != NULL) { diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c index 89355baeb5c..e4e611f947a 100644 --- a/src/btree/bt_slvg.c +++ b/src/btree/bt_slvg.c @@ -1299,9 +1299,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref) * would have been lost.) Clear the reference addr so eviction doesn't * free the underlying blocks. */ - __wt_free(session, ((WT_ADDR *)ref->addr)->addr); - __wt_free(session, ref->addr); - ref->addr = NULL; + __wt_ref_addr_free(session, ref); /* Write the new version of the leaf page to disk. */ WT_ERR(__slvg_modify_init(session, page)); @@ -2008,9 +2006,7 @@ __slvg_row_build_leaf( * would have been lost.) Clear the reference addr so eviction doesn't * free the underlying blocks. */ - __wt_free(session, ((WT_ADDR *)ref->addr)->addr); - __wt_free(session, ref->addr); - ref->addr = NULL; + __wt_ref_addr_free(session, ref); /* Write the new version of the leaf page to disk. */ WT_ERR(__slvg_modify_init(session, page)); diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 9d8e463feb0..6e0436bb01f 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -1030,6 +1030,16 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, #endif /* + * Page-delete information is only read when the WT_REF state is + * WT_REF_DELETED. The page-delete memory wasn't added to the + * parent's footprint, ignore it here. + */ + if (ref->page_del != NULL) { + __wt_free(session, ref->page_del->update_list); + __wt_free(session, ref->page_del); + } + + /* * Reset the page's original WT_REF field to split. Threads cursoring * through the tree were blocked because that WT_REF state was set to * locked. This update changes the locked state to split, unblocking @@ -1090,19 +1100,15 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, session, split_gen, 0, ikey, size)); parent_decr += size; } - /* - * The page_del structure can be freed immediately: it - * is only read when the ref state is WT_REF_DELETED. - * The size of the structure wasn't added to the parent, - * don't decrement. - */ - if (next_ref->page_del != NULL) { - __wt_free(session, - next_ref->page_del->update_list); - __wt_free(session, next_ref->page_del); - } } + /* + * If this page was fast-truncated, any attached structure + * should have been freed before now. + */ + WT_ASSERT(session, next_ref->page_del == NULL); + + WT_TRET(__wt_ref_block_free(session, next_ref)); WT_TRET(__split_safe_free( session, split_gen, 0, next_ref, sizeof(WT_REF))); parent_decr += sizeof(WT_REF); @@ -1213,21 +1219,30 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref) * The first page in the split is the current page, but we still have * to create a replacement WT_REF, the original WT_REF will be set to * split status and eventually freed. + * + * The new WT_REF is not quite identical: we have to instantiate a key, + * and the new reference is visible to readers once the split completes. + * + * The key-instantiation code checks for races, leave the key fields + * zeroed we don't trigger them. + * + * Don't copy any deleted page state: we may be splitting a page that + * was instantiated after a truncate and that history should not be + * carried onto these new child pages. */ WT_ERR(__wt_calloc_one(session, &split_ref[0])); child = split_ref[0]; - *child = *ref; + child->page = ref->page; + child->home = ref->home; + child->pindex_hint = ref->pindex_hint; + child->state = WT_REF_MEM; + child->addr = ref->addr; /* - * The new WT_REF is not quite identical: we have to instantiate a key, - * and the new reference is visible to readers once the split completes. - * - * The key-instantiation code checks for races, clear the key fields so - * we don't trigger them. + * The address has moved to the replacement WT_REF. Make sure it isn't + * freed when the original ref is discarded. */ - child->key.recno = 0; - child->key.ikey = NULL; - child->state = WT_REF_MEM; + ref->addr = NULL; /* * Copy the first key from the original page into first ref in the new @@ -1429,6 +1444,11 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref) return (0); err: if (split_ref[0] != NULL) { + /* + * The address was moved to the replacement WT_REF, restore it. + */ + ref->addr = split_ref[0]->addr; + __wt_free(session, split_ref[0]->key.ikey); __wt_free(session, split_ref[0]); } diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c index 8e0f4036b79..c7d83d8dfff 100644 --- a/src/btree/bt_walk.c +++ b/src/btree/bt_walk.c @@ -244,7 +244,8 @@ ascend: /* * If we see any child states other than deleted, the * page isn't empty. */ - if (ref->state != WT_REF_DELETED) + if (ref->state != WT_REF_DELETED && + !LF_ISSET(WT_READ_TRUNCATE)) empty_internal = false; if (LF_ISSET(WT_READ_CACHE)) { @@ -270,6 +271,7 @@ ascend: /* WT_ERR(__wt_delete_page(session, ref, &skip)); if (skip) break; + empty_internal = false; } else if (LF_ISSET(WT_READ_COMPACT)) { /* * Skip deleted pages, rewriting them doesn't diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index 9de66922931..046d8bb3eba 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -222,19 +222,14 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) { WT_ADDR *addr; WT_DECL_RET; - WT_PAGE *parent; WT_PAGE_MODIFY *mod; - parent = ref->home; mod = ref->page->modify; + WT_ASSERT(session, ref->addr == NULL); + switch (mod->rec_result) { case WT_PM_REC_EMPTY: /* Page is empty */ - if (ref->addr != NULL && __wt_off_page(parent, ref->addr)) { - __wt_free(session, ((WT_ADDR *)ref->addr)->addr); - __wt_free(session, ref->addr); - } - /* * Update the parent to reference a deleted page. The fact that * reconciliation left the page "empty" means there's no older @@ -261,11 +256,6 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) WT_RET(__wt_split_multi(session, ref, closing)); break; case WT_PM_REC_REPLACE: /* 1-for-1 page swap */ - if (ref->addr != NULL && __wt_off_page(parent, ref->addr)) { - __wt_free(session, ((WT_ADDR *)ref->addr)->addr); - __wt_free(session, ref->addr); - } - /* * Update the parent to reference the replacement page. * diff --git a/src/include/btree.i b/src/include/btree.i index 1c416c99e13..4029b29d207 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -1008,6 +1008,61 @@ __wt_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page) } /* + * __wt_ref_addr_free -- + * Free the address in a reference, if necessary. + */ +static inline void +__wt_ref_addr_free(WT_SESSION_IMPL *session, WT_REF *ref) + { + if (ref->addr == NULL) + return; + + if (ref->home == NULL || __wt_off_page(ref->home, ref->addr)) { + __wt_free(session, ((WT_ADDR *)ref->addr)->addr); + __wt_free(session, ref->addr); + } + ref->addr = NULL; +} + +/* + * __wt_btree_block_free -- + * Helper function to free a block from the current tree. + */ +static inline int +__wt_btree_block_free( + WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) +{ + WT_BM *bm; + WT_BTREE *btree; + + btree = S2BT(session); + bm = btree->bm; + + return (bm->free(bm, session, addr, addr_size)); +} + +/* + * __wt_ref_block_free -- + * Free the on-disk block for a reference and clear the address. + */ +static inline int +__wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref) +{ + const uint8_t *addr; + size_t addr_size; + + if (ref->addr == NULL) + return (0); + + WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL)); + WT_RET(__wt_btree_block_free(session, addr, addr_size)); + + /* Clear the address (so we don't free it twice). */ + __wt_ref_addr_free(session, ref); + return (0); +} + +/* * __wt_page_can_evict -- * Check whether a page can be evicted. */ diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 73b7f4968e9..67b43057c8a 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -1066,10 +1066,7 @@ static int __rec_child_deleted(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *ref, WT_CHILD_STATE *statep) { - WT_BM *bm; WT_PAGE_DELETED *page_del; - size_t addr_size; - const uint8_t *addr; page_del = ref->page_del; @@ -1117,17 +1114,8 @@ __rec_child_deleted(WT_SESSION_IMPL *session, */ if (ref->addr != NULL && (page_del == NULL || - __wt_txn_visible_all(session, page_del->txnid))) { - WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL)); - bm = S2BT(session)->bm; - WT_RET(bm->free(bm, session, addr, addr_size)); - - if (__wt_off_page(ref->home, ref->addr)) { - __wt_free(session, ((WT_ADDR *)ref->addr)->addr); - __wt_free(session, ref->addr); - } - ref->addr = NULL; - } + __wt_txn_visible_all(session, page_del->txnid))) + WT_RET(__wt_ref_block_free(session, ref)); /* * If the original page is gone, we can skip the slot on the internal @@ -4790,13 +4778,11 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins) static int __rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page) { - WT_BM *bm; WT_DECL_RET; WT_PAGE_MODIFY *mod; WT_MULTI *multi; uint32_t i; - bm = S2BT(session)->bm; mod = page->modify; /* @@ -4816,7 +4802,7 @@ __rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page) if (multi->addr.reuse) multi->addr.addr = NULL; else { - WT_RET(bm->free(bm, session, + WT_RET(__wt_btree_block_free(session, multi->addr.addr, multi->addr.size)); __wt_free(session, multi->addr.addr); } @@ -4862,8 +4848,6 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_MULTI *multi; WT_PAGE_MODIFY *mod; WT_REF *ref; - size_t addr_size; - const uint8_t *addr; btree = S2BT(session); bm = btree->bm; @@ -4888,21 +4872,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) */ if (__wt_ref_is_root(ref)) break; - if (ref->addr != NULL) { - /* - * Free the page and clear the address (so we don't free - * it twice). - */ - WT_RET(__wt_ref_info( - session, ref, &addr, &addr_size, NULL)); - WT_RET(bm->free(bm, session, addr, addr_size)); - if (__wt_off_page(ref->home, ref->addr)) { - __wt_free( - session, ((WT_ADDR *)ref->addr)->addr); - __wt_free(session, ref->addr); - } - ref->addr = NULL; - } + WT_RET(__wt_ref_block_free(session, ref)); break; case WT_PM_REC_EMPTY: /* Page deleted */ break; @@ -4921,7 +4891,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) * are checkpoints, and must be explicitly dropped. */ if (!__wt_ref_is_root(ref)) - WT_RET(bm->free(bm, session, + WT_RET(__wt_btree_block_free(session, mod->mod_replace.addr, mod->mod_replace.size)); /* Discard the replacement page's address. */ @@ -5126,14 +5096,12 @@ err: __wt_scr_free(session, &tkey); static int __rec_write_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) { - WT_BM *bm; WT_BOUNDARY *bnd; WT_DECL_RET; WT_MULTI *multi; WT_PAGE_MODIFY *mod; uint32_t i; - bm = S2BT(session)->bm; mod = page->modify; /* @@ -5164,7 +5132,7 @@ __rec_write_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) if (bnd->addr.reuse) bnd->addr.addr = NULL; else { - WT_TRET(bm->free(bm, session, + WT_TRET(__wt_btree_block_free(session, bnd->addr.addr, bnd->addr.size)); __wt_free(session, bnd->addr.addr); } |