summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/btree/bt_debug.c21
-rw-r--r--src/btree/bt_split.c84
2 files changed, 52 insertions, 53 deletions
diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c
index 4de94277364..af9f6a669f2 100644
--- a/src/btree/bt_debug.c
+++ b/src/btree/bt_debug.c
@@ -408,11 +408,13 @@ __debug_tree_shape_info(WT_PAGE *page)
v = page->memory_footprint;
if (v >= WT_GIGABYTE)
- snprintf(buf, sizeof(buf), "(%" PRIu64 "G)", v / WT_GIGABYTE);
+ snprintf(buf, sizeof(buf),
+ "(%p %" PRIu64 "G)", page, v / WT_GIGABYTE);
else if (v >= WT_MEGABYTE)
- snprintf(buf, sizeof(buf), "(%" PRIu64 "M)", v / WT_MEGABYTE);
+ snprintf(buf, sizeof(buf),
+ "(%p %" PRIu64 "M)", page, v / WT_MEGABYTE);
else
- snprintf(buf, sizeof(buf), "(%" PRIu64 ")", v);
+ snprintf(buf, sizeof(buf), "(%p %" PRIu64 ")", page, v);
return (buf);
}
@@ -429,16 +431,16 @@ __debug_tree_shape_worker(WT_DBG *ds, WT_PAGE *page, int level)
session = ds->session;
if (page->type == WT_PAGE_ROW_INT || page->type == WT_PAGE_COL_INT) {
- __dmsg(ds, "%*s" "I" "%s\n",
- level, " ", __debug_tree_shape_info(page));
+ __dmsg(ds, "%*s" "I" "%d %s\n",
+ level * 3, " ", level, __debug_tree_shape_info(page));
WT_INTL_FOREACH_BEGIN(session, page, ref) {
if (ref->state == WT_REF_MEM)
__debug_tree_shape_worker(
- ds, ref->page, level + 3);
+ ds, ref->page, level + 1);
} WT_INTL_FOREACH_END;
} else
- __dmsg(ds, "%*s" "L" "%s\n",
- level, " ", __debug_tree_shape_info(page));
+ __dmsg(ds, "%*s" "L" " %s\n",
+ level * 3, " ", __debug_tree_shape_info(page));
}
/*
@@ -458,8 +460,7 @@ __wt_debug_tree_shape(
if (page == NULL)
page = S2BT(session)->root.page;
- WT_WITH_PAGE_INDEX(session,
- __debug_tree_shape_worker(ds, page, 0));
+ WT_WITH_PAGE_INDEX(session, __debug_tree_shape_worker(ds, page, 1));
__dmsg_wrapup(ds);
return (0);
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index 766efcd5d2d..9a3186a0015 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -9,15 +9,6 @@
#include "wt_internal.h"
/*
- * Tuning; global variables to allow the binary to be patched, we don't yet have
- * any real understanding of what might be useful to surface to applications.
- */
-static u_int __split_deepen_max_internal_image = 100;
-static u_int __split_deepen_min_child = 10;
-static u_int __split_deepen_per_child = 100;
-static u_int __split_deepen_split_child = 100;
-
-/*
* Track allocation increments, matching the cache calculations, which add an
* estimate of allocation overhead to every object.
*/
@@ -177,45 +168,57 @@ __split_safe_free(WT_SESSION_IMPL *session, int exclusive, void *p, size_t s)
}
/*
+ * Tuning; global variables to allow the binary to be patched, we don't yet have
+ * any real understanding of what might be useful to surface to applications.
+ */
+static u_int __split_deepen_min_child = 10000;
+static u_int __split_deepen_per_child = 100;
+
+/*
* __split_should_deepen --
* Return if we should deepen the tree.
*/
static int
-__split_should_deepen(WT_SESSION_IMPL *session, WT_PAGE *page)
+__split_should_deepen(
+ WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *childrenp)
{
WT_PAGE_INDEX *pindex;
+ WT_PAGE *page;
- /*
- * Splits are based on either the number of child pages that will be
- * created by the split (splitting an internal page that will be slow
- * to search), or by the memory footprint of the parent page (avoiding
- * an internal page that will eat up all of the cache and put eviction
- * pressure on the system).
- */
+ *childrenp = 0;
+
+ page = ref->page;
pindex = WT_INTL_INDEX_COPY(page);
/*
* Deepen the tree if the page's memory footprint is larger than the
- * maximum size for a page in memory. We need an absolute minimum
- * number of entries in order to split the page: if there is a single
- * huge key, splitting won't help.
+ * maximum size for a page in memory (presuambly putting eviction
+ * pressure on the cache).
*/
- if (page->memory_footprint > S2BT(session)->maxmempage &&
- pindex->entries >= __split_deepen_min_child)
- return (1);
+ if (page->memory_footprint < S2BT(session)->maxmempage)
+ return (0);
/*
- * Deepen the tree if the page's memory footprint is at least N
- * times the maximum internal page size chunk in the backing file and
- * the split will result in at least N children in the newly created
- * intermediate layer.
+ * Ensure the page has enough entries to make it worth splitting and
+ * we get a significant payback (in the case of a set of large keys,
+ * splitting won't help).
*/
- if (page->memory_footprint >
- __split_deepen_max_internal_image * S2BT(session)->maxintlpage &&
- pindex->entries >=
- (__split_deepen_per_child * __split_deepen_split_child))
+ if (pindex->entries > __split_deepen_min_child) {
+ *childrenp = pindex->entries / __split_deepen_per_child;
return (1);
+ }
+ /*
+ * The root is a special-case: if it's putting cache pressure on the
+ * system, split it even if there are only a few entries, we can't
+ * push it out of memory. Sanity check: if the root page is too big
+ * with less than 100 keys, there are huge keys and/or a too-small
+ * cache, there's not much to do.
+ */
+ if (__wt_ref_is_root(ref) && pindex->entries > 100) {
+ *childrenp = pindex->entries / 10;
+ return (1);
+ }
return (0);
}
@@ -383,7 +386,7 @@ __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page)
* Split an internal page in-memory, deepening the tree.
*/
static int
-__split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent)
+__split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
{
WT_DECL_RET;
WT_PAGE *child;
@@ -391,7 +394,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent)
WT_REF **alloc_refp;
WT_REF *child_ref, **child_refp, *parent_ref, **parent_refp, *ref;
size_t child_incr, parent_decr, parent_incr, size;
- uint32_t children, chunk, i, j, remain, slots;
+ uint32_t chunk, i, j, remain, slots;
int panic;
void *p;
@@ -401,13 +404,6 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent)
pindex = WT_INTL_INDEX_COPY(parent);
- /*
- * Create N children, unless we are dealing with a large page without
- * many entries, in which case split into the minimum number of pages.
- */
- children = WT_MAX(pindex->entries / __split_deepen_per_child,
- __split_deepen_min_child);
-
WT_STAT_FAST_CONN_INCR(session, cache_eviction_deepen);
WT_ERR(__wt_verbose(session, WT_VERB_SPLIT,
"%p: %" PRIu32 " elements, splitting into %" PRIu32 " children",
@@ -818,7 +814,8 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
WT_PAGE_INDEX *alloc_index, *pindex;
WT_REF **alloc_refp, *next_ref, *parent_ref;
size_t size;
- uint32_t i, j, deleted_entries, parent_entries, result_entries;
+ uint32_t children, i, j;
+ uint32_t deleted_entries, parent_entries, result_entries;
int complete, hazard, locked;
parent = NULL; /* -Wconditional-uninitialized */
@@ -995,9 +992,10 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
* Do the check here because we've just grown the parent page and
* are holding it locked.
*/
- if (ret == 0 && !exclusive && __split_should_deepen(session, parent))
+ if (ret == 0 && !exclusive &&
+ __split_should_deepen(session, parent_ref, &children))
WT_WITH_PAGE_INDEX(session,
- ret = __split_deepen(session, parent));
+ ret = __split_deepen(session, parent, children));
err: if (locked)
F_CLR_ATOMIC(parent, WT_PAGE_SPLITTING);