diff options
-rw-r--r-- | src/btree/bt_debug.c | 21 | ||||
-rw-r--r-- | src/btree/bt_split.c | 84 |
2 files changed, 52 insertions, 53 deletions
diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c index 4de94277364..af9f6a669f2 100644 --- a/src/btree/bt_debug.c +++ b/src/btree/bt_debug.c @@ -408,11 +408,13 @@ __debug_tree_shape_info(WT_PAGE *page) v = page->memory_footprint; if (v >= WT_GIGABYTE) - snprintf(buf, sizeof(buf), "(%" PRIu64 "G)", v / WT_GIGABYTE); + snprintf(buf, sizeof(buf), + "(%p %" PRIu64 "G)", page, v / WT_GIGABYTE); else if (v >= WT_MEGABYTE) - snprintf(buf, sizeof(buf), "(%" PRIu64 "M)", v / WT_MEGABYTE); + snprintf(buf, sizeof(buf), + "(%p %" PRIu64 "M)", page, v / WT_MEGABYTE); else - snprintf(buf, sizeof(buf), "(%" PRIu64 ")", v); + snprintf(buf, sizeof(buf), "(%p %" PRIu64 ")", page, v); return (buf); } @@ -429,16 +431,16 @@ __debug_tree_shape_worker(WT_DBG *ds, WT_PAGE *page, int level) session = ds->session; if (page->type == WT_PAGE_ROW_INT || page->type == WT_PAGE_COL_INT) { - __dmsg(ds, "%*s" "I" "%s\n", - level, " ", __debug_tree_shape_info(page)); + __dmsg(ds, "%*s" "I" "%d %s\n", + level * 3, " ", level, __debug_tree_shape_info(page)); WT_INTL_FOREACH_BEGIN(session, page, ref) { if (ref->state == WT_REF_MEM) __debug_tree_shape_worker( - ds, ref->page, level + 3); + ds, ref->page, level + 1); } WT_INTL_FOREACH_END; } else - __dmsg(ds, "%*s" "L" "%s\n", - level, " ", __debug_tree_shape_info(page)); + __dmsg(ds, "%*s" "L" " %s\n", + level * 3, " ", __debug_tree_shape_info(page)); } /* @@ -458,8 +460,7 @@ __wt_debug_tree_shape( if (page == NULL) page = S2BT(session)->root.page; - WT_WITH_PAGE_INDEX(session, - __debug_tree_shape_worker(ds, page, 0)); + WT_WITH_PAGE_INDEX(session, __debug_tree_shape_worker(ds, page, 1)); __dmsg_wrapup(ds); return (0); diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c index 766efcd5d2d..9a3186a0015 100644 --- a/src/btree/bt_split.c +++ b/src/btree/bt_split.c @@ -9,15 +9,6 @@ #include "wt_internal.h" /* - * Tuning; global variables to allow the binary to be patched, we don't yet have - * any real understanding of what might be useful to surface to applications. - */ -static u_int __split_deepen_max_internal_image = 100; -static u_int __split_deepen_min_child = 10; -static u_int __split_deepen_per_child = 100; -static u_int __split_deepen_split_child = 100; - -/* * Track allocation increments, matching the cache calculations, which add an * estimate of allocation overhead to every object. */ @@ -177,45 +168,57 @@ __split_safe_free(WT_SESSION_IMPL *session, int exclusive, void *p, size_t s) } /* + * Tuning; global variables to allow the binary to be patched, we don't yet have + * any real understanding of what might be useful to surface to applications. + */ +static u_int __split_deepen_min_child = 10000; +static u_int __split_deepen_per_child = 100; + +/* * __split_should_deepen -- * Return if we should deepen the tree. */ static int -__split_should_deepen(WT_SESSION_IMPL *session, WT_PAGE *page) +__split_should_deepen( + WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *childrenp) { WT_PAGE_INDEX *pindex; + WT_PAGE *page; - /* - * Splits are based on either the number of child pages that will be - * created by the split (splitting an internal page that will be slow - * to search), or by the memory footprint of the parent page (avoiding - * an internal page that will eat up all of the cache and put eviction - * pressure on the system). - */ + *childrenp = 0; + + page = ref->page; pindex = WT_INTL_INDEX_COPY(page); /* * Deepen the tree if the page's memory footprint is larger than the - * maximum size for a page in memory. We need an absolute minimum - * number of entries in order to split the page: if there is a single - * huge key, splitting won't help. + * maximum size for a page in memory (presuambly putting eviction + * pressure on the cache). */ - if (page->memory_footprint > S2BT(session)->maxmempage && - pindex->entries >= __split_deepen_min_child) - return (1); + if (page->memory_footprint < S2BT(session)->maxmempage) + return (0); /* - * Deepen the tree if the page's memory footprint is at least N - * times the maximum internal page size chunk in the backing file and - * the split will result in at least N children in the newly created - * intermediate layer. + * Ensure the page has enough entries to make it worth splitting and + * we get a significant payback (in the case of a set of large keys, + * splitting won't help). */ - if (page->memory_footprint > - __split_deepen_max_internal_image * S2BT(session)->maxintlpage && - pindex->entries >= - (__split_deepen_per_child * __split_deepen_split_child)) + if (pindex->entries > __split_deepen_min_child) { + *childrenp = pindex->entries / __split_deepen_per_child; return (1); + } + /* + * The root is a special-case: if it's putting cache pressure on the + * system, split it even if there are only a few entries, we can't + * push it out of memory. Sanity check: if the root page is too big + * with less than 100 keys, there are huge keys and/or a too-small + * cache, there's not much to do. + */ + if (__wt_ref_is_root(ref) && pindex->entries > 100) { + *childrenp = pindex->entries / 10; + return (1); + } return (0); } @@ -383,7 +386,7 @@ __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page) * Split an internal page in-memory, deepening the tree. */ static int -__split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent) +__split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children) { WT_DECL_RET; WT_PAGE *child; @@ -391,7 +394,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent) WT_REF **alloc_refp; WT_REF *child_ref, **child_refp, *parent_ref, **parent_refp, *ref; size_t child_incr, parent_decr, parent_incr, size; - uint32_t children, chunk, i, j, remain, slots; + uint32_t chunk, i, j, remain, slots; int panic; void *p; @@ -401,13 +404,6 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent) pindex = WT_INTL_INDEX_COPY(parent); - /* - * Create N children, unless we are dealing with a large page without - * many entries, in which case split into the minimum number of pages. - */ - children = WT_MAX(pindex->entries / __split_deepen_per_child, - __split_deepen_min_child); - WT_STAT_FAST_CONN_INCR(session, cache_eviction_deepen); WT_ERR(__wt_verbose(session, WT_VERB_SPLIT, "%p: %" PRIu32 " elements, splitting into %" PRIu32 " children", @@ -818,7 +814,8 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, WT_PAGE_INDEX *alloc_index, *pindex; WT_REF **alloc_refp, *next_ref, *parent_ref; size_t size; - uint32_t i, j, deleted_entries, parent_entries, result_entries; + uint32_t children, i, j; + uint32_t deleted_entries, parent_entries, result_entries; int complete, hazard, locked; parent = NULL; /* -Wconditional-uninitialized */ @@ -995,9 +992,10 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new, * Do the check here because we've just grown the parent page and * are holding it locked. */ - if (ret == 0 && !exclusive && __split_should_deepen(session, parent)) + if (ret == 0 && !exclusive && + __split_should_deepen(session, parent_ref, &children)) WT_WITH_PAGE_INDEX(session, - ret = __split_deepen(session, parent)); + ret = __split_deepen(session, parent, children)); err: if (locked) F_CLR_ATOMIC(parent, WT_PAGE_SPLITTING); |