From 78ff40bd6410cfca6ca117d69a83852fd21a5bd9 Mon Sep 17 00:00:00 2001 From: Michael Cahill Date: Thu, 7 Mar 2013 20:56:38 +1100 Subject: Look more carefully for merge candidates in LRU walks. --- src/btree/bt_evict.c | 54 +++++++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 24 deletions(-) (limited to 'src') diff --git a/src/btree/bt_evict.c b/src/btree/bt_evict.c index e7c3064f4ff..4208e145d96 100644 --- a/src/btree/bt_evict.c +++ b/src/btree/bt_evict.c @@ -967,9 +967,9 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, int clean) WT_CACHE *cache; WT_DECL_RET; WT_EVICT_ENTRY *end, *evict, *start; - WT_PAGE *page, *parent; + WT_PAGE *page; wt_txnid_t oldest_txn; - int modified, restarts, splits; + int modified, restarts, levels; btree = session->btree; cache = S2C(session)->cache; @@ -982,7 +982,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, int clean) /* * Get some more eviction candidate pages. */ - for (evict = start, restarts = splits = 0; + for (evict = start, restarts = 0; evict < end && ret == 0; ret = __wt_tree_walk(session, &btree->evict_page, WT_TREE_EVICT)) { if ((page = btree->evict_page) == NULL) { @@ -1001,6 +1001,33 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, int clean) WT_CSTAT_INCR(session, cache_eviction_walk); +#define WT_IS_SPLIT_MERGE(p) \ + ((p)->modify != NULL && F_ISSET((p)->modify, WT_PM_REC_SPLIT_MERGE)) + + /* Look for a split-merge (grand)parent page to merge. */ + for (levels = 0; + levels < WT_MERGE_STACK_MIN && page != NULL && + WT_IS_SPLIT_MERGE(page); + page = page->parent, levels++) + ; + + /* + * Only look for a parent at exactly the right height above: if + * the stack is deep enough, we'll find it eventually, and we + * don't want to do too much work on every level. + * + * !!! + * We don't restrict ourselves to only the top-most page. If + * there are split-merge pages under the root page in a big, + * busy tree, the merge will only happen if we can lock the + * whole tree exclusively. Consider subtrees if locking the + * whole tree fails. + */ + if (page == NULL || + (levels != 0 && levels != WT_MERGE_STACK_MIN) || + (levels == WT_MERGE_STACK_MIN && !WT_IS_SPLIT_MERGE(page))) + continue; + /* Ignore root pages entirely. */ if (WT_PAGE_IS_ROOT(page)) continue; @@ -1022,26 +1049,6 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, int clean) if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU)) continue; - /* - * Skip split-merge pages that have split-merge pages as their - * parents (we're only interested in the top-most split-merge - * page of deep trees). - * - * Don't skip empty or split pages: updates after their last - * reconciliation may have changed their state and only the - * reconciliation/eviction code can confirm if they should be - * skipped. - */ - if (page->modify != NULL && - F_ISSET(page->modify, WT_PM_REC_SPLIT_MERGE)) { - parent = page->parent; - if (++splits < WT_MERGE_STACK_MIN || - (parent->modify != NULL && - F_ISSET(parent->modify, WT_PM_REC_SPLIT_MERGE))) - continue; - } else - splits = 0; - /* * If the file is being checkpointed, there's a period of time * where we can't discard any page with a modification @@ -1068,7 +1075,6 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, int clean) * this page was written, there's no chance to make progress... */ if (modified && - !F_ISSET(page->modify, WT_PM_REC_SPLIT_MERGE) && TXNID_LE(oldest_txn, page->modify->disk_txn)) continue; -- cgit v1.2.1 From 010c459aa29291cba7a409d9bf954061dc59eab2 Mon Sep 17 00:00:00 2001 From: Keith Bostic Date: Thu, 7 Mar 2013 10:13:52 -0500 Subject: Don't pass in both a session and a btree, the standard pattern is to get the btree handle from the session. --- src/btree/bt_handle.c | 6 +++--- src/include/extern.h | 3 +-- src/lsm/lsm_cursor.c | 2 +- 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c index 609f4cab692..55c17f4cd9b 100644 --- a/src/btree/bt_handle.c +++ b/src/btree/bt_handle.c @@ -484,12 +484,12 @@ __wt_btree_leaf_create( * Access the size of an in-memory tree with a single leaf page. */ int -__wt_btree_get_memsize( - WT_SESSION_IMPL *session, WT_BTREE *btree, uint32_t **memsizep) +__wt_btree_get_memsize(WT_SESSION_IMPL *session, uint32_t **memsizep) { + WT_BTREE *btree; WT_PAGE *root, *child; - WT_UNUSED(session); + btree = session->btree; root = btree->root_page; child = root->u.intl.t->page; diff --git a/src/include/extern.h b/src/include/extern.h index 2e26cd7c4dd..6d250c4a2f4 100644 --- a/src/include/extern.h +++ b/src/include/extern.h @@ -268,8 +268,7 @@ extern int __wt_btree_leaf_create( WT_SESSION_IMPL *session, WT_PAGE *parent, WT_REF *ref, WT_PAGE **pagep); -extern int __wt_btree_get_memsize( WT_SESSION_IMPL *session, - WT_BTREE *btree, +extern int __wt_btree_get_memsize(WT_SESSION_IMPL *session, uint32_t **memsizep); extern int __wt_btree_release_memsize(WT_SESSION_IMPL *session, WT_BTREE *btree); diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c index ace76f3ac35..cd9c5754b8b 100644 --- a/src/lsm/lsm_cursor.c +++ b/src/lsm/lsm_cursor.c @@ -221,7 +221,7 @@ __clsm_open_cursors( */ if (lsm_tree->memsizep == NULL) (void)__wt_btree_get_memsize( - session, session->btree, &lsm_tree->memsizep); + session, &lsm_tree->memsizep); } clsm->dsk_gen = lsm_tree->dsk_gen; -- cgit v1.2.1