From 479818af02f4983b5abed2c5ba43fb86d9c3df0f Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Tue, 1 Dec 2015 18:29:20 -0500 Subject: WT-2553: If a leaf or internal page's read generation is set to WT_READGEN_OLDEST, prioritize it for eviction. --- src/evict/evict_lru.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index f2784890ab7..3d5a79de82d 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -36,6 +36,10 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry) page = entry->ref->page; + /* Any page set to the oldest generation should be discarded. */ + if (page->read_gen == WT_READGEN_OLDEST) + return (WT_READGEN_OLDEST); + /* Any empty page (leaf or internal), is a good choice. */ if (__wt_page_is_empty(page)) return (WT_READGEN_OLDEST); -- cgit v1.2.1 From f6a2db06a9d86a64e486a4b6151c6d72dec9fb46 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 2 Dec 2015 11:17:54 +1100 Subject: SERVER-21553 Free blocks during reverse splits. --- src/btree/bt_slvg.c | 4 ++-- src/btree/bt_split.c | 2 +- src/include/btree.i | 55 ++++++++++++++++++++++++++++++++++++++++------- src/reconcile/rec_write.c | 44 ++++++------------------------------- 4 files changed, 57 insertions(+), 48 deletions(-) diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c index e4a860bb421..756ffd98f3a 100644 --- a/src/btree/bt_slvg.c +++ b/src/btree/bt_slvg.c @@ -1290,7 +1290,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref) * would have been lost.) Clear the reference addr so eviction doesn't * free the underlying blocks. */ - __wt_ref_free_addr(session, ref); + __wt_ref_addr_free(session, ref); /* Write the new version of the leaf page to disk. */ WT_ERR(__slvg_modify_init(session, page)); @@ -2011,7 +2011,7 @@ __slvg_row_build_leaf( * would have been lost.) Clear the reference addr so eviction doesn't * free the underlying blocks. */ - __wt_ref_free_addr(session, ref); + __wt_ref_addr_free(session, ref); /* Write the new version of the leaf page to disk. */ WT_ERR(__slvg_modify_init(session, page)); diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index e2031553aed..631aca0d5c0 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -895,7 +895,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, */ WT_ASSERT(session, next_ref->page_del == NULL); - __wt_ref_free_addr(session, next_ref); + WT_TRET(__wt_ref_block_free(session, next_ref)); WT_TRET(__split_safe_free( session, split_gen, exclusive, next_ref, sizeof(WT_REF))); parent_decr += sizeof(WT_REF); diff --git a/src/include/btree.i b/src/include/btree.i index 02633c3206a..3e2e7158e04 100644 --- a/src/include/btree.i +++ b/src/include/btree.i @@ -37,6 +37,23 @@ __wt_page_is_modified(WT_PAGE *page) return (page->modify != NULL && page->modify->write_gen != 0); } +/* + * __wt_btree_block_free -- + * Helper function to free a block from the current tree. + */ +static inline int +__wt_btree_block_free( + WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) +{ + WT_BM *bm; + WT_BTREE *btree; + + btree = S2BT(session); + bm = btree->bm; + + return (bm->free(bm, session, addr, addr_size)); +} + /* * __wt_cache_page_inmem_incr -- * Increment a page's memory footprint in the cache. @@ -466,19 +483,20 @@ __wt_off_page(WT_PAGE *page, const void *p) } /* - * __wt_ref_free_addr -- + * __wt_ref_addr_free -- * Free the address in a reference, if necessary. */ static inline void -__wt_ref_free_addr(WT_SESSION_IMPL *session, WT_REF *ref) +__wt_ref_addr_free(WT_SESSION_IMPL *session, WT_REF *ref) { - if (ref->addr != NULL) { - if (ref->home == NULL || __wt_off_page(ref->home, ref->addr)) { - __wt_free(session, ((WT_ADDR *)ref->addr)->addr); - __wt_free(session, ref->addr); - } else - ref->addr = NULL; + if (ref->addr == NULL) + return; + + if (ref->home == NULL || __wt_off_page(ref->home, ref->addr)) { + __wt_free(session, ((WT_ADDR *)ref->addr)->addr); + __wt_free(session, ref->addr); } + ref->addr = NULL; } /* @@ -978,6 +996,27 @@ __wt_ref_info(WT_SESSION_IMPL *session, return (0); } +/* + * __wt_ref_block_free -- + * Free the on-disk block for a reference and clear the address. + */ +static inline int +__wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref) +{ + const uint8_t *addr; + size_t addr_size; + + if (ref->addr == NULL) + return (0); + + WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL)); + WT_RET(__wt_btree_block_free(session, addr, addr_size)); + + /* Clear the address (so we don't free it twice). */ + __wt_ref_addr_free(session, ref); + return (0); +} + /* * __wt_leaf_page_can_split -- * Check whether a page can be split in memory. diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index 0e1e7498568..e9559d2477d 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -984,23 +984,6 @@ __rec_bnd_cleanup(WT_SESSION_IMPL *session, WT_RECONCILE *r, bool destroy) } } -/* - * __rec_block_free -- - * Helper function to free a block. - */ -static int -__rec_block_free( - WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) -{ - WT_BM *bm; - WT_BTREE *btree; - - btree = S2BT(session); - bm = btree->bm; - - return (bm->free(bm, session, addr, addr_size)); -} - /* * __rec_update_save -- * Save a WT_UPDATE list for later restoration. @@ -1390,13 +1373,9 @@ __rec_child_deleted(WT_SESSION_IMPL *session, * read into this part of the name space again, the cache read function * instantiates an entirely new page.) */ - if (ref->addr != NULL && - (page_del == NULL || - __wt_txn_visible_all(session, page_del->txnid))) { - WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL)); - WT_RET(__rec_block_free(session, addr, addr_size)); - __wt_ref_free_addr(session, ref); - } + if (page_del == NULL || + __wt_txn_visible_all(session, page_del->txnid)) + WT_RET(__wt_ref_block_free(session, ref)); /* * If the original page is gone, we can skip the slot on the internal @@ -5312,7 +5291,7 @@ __rec_split_discard(WT_SESSION_IMPL *session, WT_PAGE *page) if (multi->addr.reuse) multi->addr.addr = NULL; else { - WT_RET(__rec_block_free(session, + WT_RET(__wt_btree_block_free(session, multi->addr.addr, multi->addr.size)); __wt_free(session, multi->addr.addr); } @@ -5421,16 +5400,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) */ if (__wt_ref_is_root(ref)) break; - if (ref->addr != NULL) { - /* - * Free the page and clear the address (so we don't free - * it twice). - */ - WT_RET(__wt_ref_info( - session, ref, &addr, &addr_size, NULL)); - WT_RET(__rec_block_free(session, addr, addr_size)); - __wt_ref_free_addr(session, ref); - } + WT_RET(__wt_ref_block_free(session, ref)); break; case WT_PM_REC_EMPTY: /* Page deleted */ break; @@ -5448,7 +5418,7 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) * are checkpoints, and must be explicitly dropped. */ if (!__wt_ref_is_root(ref)) - WT_RET(__rec_block_free(session, + WT_RET(__wt_btree_block_free(session, mod->mod_replace.addr, mod->mod_replace.size)); /* Discard the replacement page's address. */ @@ -5612,7 +5582,7 @@ __rec_write_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) if (bnd->addr.reuse) bnd->addr.addr = NULL; else { - WT_TRET(__rec_block_free(session, + WT_TRET(__wt_btree_block_free(session, bnd->addr.addr, bnd->addr.size)); __wt_free(session, bnd->addr.addr); } -- cgit v1.2.1 From 7a1050dbddf01bd73dcc98ca7cfa92287a11423b Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 2 Dec 2015 11:22:35 +1100 Subject: WT-2553 Include pages resulting from in-memory splits when in the "would block" phase of eviction. --- src/evict/evict_lru.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c index f2784890ab7..4ec59853b7e 100644 --- a/src/evict/evict_lru.c +++ b/src/evict/evict_lru.c @@ -1247,6 +1247,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp) * eviction, skip anything that isn't marked. */ if (FLD_ISSET(cache->state, WT_EVICT_PASS_WOULD_BLOCK) && + page->memory_footprint < btree->splitmempage && page->read_gen != WT_READGEN_OLDEST) continue; -- cgit v1.2.1 From 8cf3e9bbaf06634bac1a191f225bc37817da3b25 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 2 Dec 2015 11:29:46 +1100 Subject: SERVER-21553 Finish rename to __wt_ref_addr_free. --- src/btree/bt_discard.c | 2 +- src/evict/evict_page.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c index 54d9761c487..54b07513089 100644 --- a/src/btree/bt_discard.c +++ b/src/btree/bt_discard.c @@ -269,7 +269,7 @@ __wt_free_ref( * Free any address allocation; if there's no linked WT_REF page, it * must be allocated. */ - __wt_ref_free_addr(session, ref); + __wt_ref_addr_free(session, ref); /* Free any page-deleted information. */ if (ref->page_del != NULL) { diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index 9281e7cdb2d..e5ec2ad7a8c 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -248,7 +248,7 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) switch (mod->rec_result) { case WT_PM_REC_EMPTY: /* Page is empty */ /* Discard the parent's address. */ - __wt_ref_free_addr(session, ref); + __wt_ref_addr_free(session, ref); /* * Update the parent to reference a deleted page. The fact that @@ -303,7 +303,7 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) return (EBUSY); /* Discard the parent's address. */ - __wt_ref_free_addr(session, ref); + __wt_ref_addr_free(session, ref); /* * Update the parent to reference the replacement page. -- cgit v1.2.1 From fa28552449d81f24a2c666934d508c525e752bd5 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 2 Dec 2015 11:36:47 +1100 Subject: SERVER-21553 Remove no-longer-used variables. --- src/reconcile/rec_write.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index e9559d2477d..dca6de964d6 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -1326,8 +1326,6 @@ __rec_child_deleted(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *ref, WT_CHILD_STATE *statep) { WT_PAGE_DELETED *page_del; - size_t addr_size; - const uint8_t *addr; page_del = ref->page_del; @@ -5374,8 +5372,6 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_BTREE *btree; WT_PAGE_MODIFY *mod; WT_REF *ref; - size_t addr_size; - const uint8_t *addr; btree = S2BT(session); bm = btree->bm; -- cgit v1.2.1 From 5ebfd9211956d794795c0be293c76e8dd22d3f4d Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 2 Dec 2015 12:30:39 +1100 Subject: SERVER-21553 Review feedback. --- src/reconcile/rec_write.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c index dca6de964d6..21cc68ed119 100644 --- a/src/reconcile/rec_write.c +++ b/src/reconcile/rec_write.c @@ -1371,8 +1371,9 @@ __rec_child_deleted(WT_SESSION_IMPL *session, * read into this part of the name space again, the cache read function * instantiates an entirely new page.) */ - if (page_del == NULL || - __wt_txn_visible_all(session, page_del->txnid)) + if (ref->addr != NULL && + (page_del == NULL || + __wt_txn_visible_all(session, page_del->txnid))) WT_RET(__wt_ref_block_free(session, ref)); /* -- cgit v1.2.1 From 745eb56977bb7b636572d7eb8e7b1619f561bbb3 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 2 Dec 2015 13:10:01 +1100 Subject: SERVER-21553 If truncate leaves an internal page empty, evict it asap. (Note: works in conjunction with the change to LRU policy for internal pages in WT-2553). --- src/btree/bt_walk.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c index 8e0f4036b79..c7d83d8dfff 100644 --- a/src/btree/bt_walk.c +++ b/src/btree/bt_walk.c @@ -244,7 +244,8 @@ ascend: /* * If we see any child states other than deleted, the * page isn't empty. */ - if (ref->state != WT_REF_DELETED) + if (ref->state != WT_REF_DELETED && + !LF_ISSET(WT_READ_TRUNCATE)) empty_internal = false; if (LF_ISSET(WT_READ_CACHE)) { @@ -270,6 +271,7 @@ ascend: /* WT_ERR(__wt_delete_page(session, ref, &skip)); if (skip) break; + empty_internal = false; } else if (LF_ISSET(WT_READ_COMPACT)) { /* * Skip deleted pages, rewriting them doesn't -- cgit v1.2.1 From 5fe8c70e33731e0a6d5d25d2883faa52916f8dcf Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Wed, 2 Dec 2015 13:19:51 +1100 Subject: SERVER-21553 Check that ref->addr is NULL, don't try to free it. During review of this change, noticed that we are trying to free an address in a path where it should never be set (or we would leak blocks). Assert that the address is NULL instead of checking whether it needs to be freed. --- src/evict/evict_page.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c index e5ec2ad7a8c..1cdf07a9a55 100644 --- a/src/evict/evict_page.c +++ b/src/evict/evict_page.c @@ -245,11 +245,10 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) mod = ref->page->modify; + WT_ASSERT(session, ref->addr == NULL); + switch (mod->rec_result) { case WT_PM_REC_EMPTY: /* Page is empty */ - /* Discard the parent's address. */ - __wt_ref_addr_free(session, ref); - /* * Update the parent to reference a deleted page. The fact that * reconciliation left the page "empty" means there's no older @@ -302,9 +301,6 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing) if (!closing && __wt_eviction_dirty_target(session)) return (EBUSY); - /* Discard the parent's address. */ - __wt_ref_addr_free(session, ref); - /* * Update the parent to reference the replacement page. * -- cgit v1.2.1